diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h index f6ac26332a..b3dc0cfb54 100644 --- a/include/pcmki/pcmki_sched_utils.h +++ b/include/pcmki/pcmki_sched_utils.h @@ -1,102 +1,102 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PENGINE_AUTILS__H # define PENGINE_AUTILS__H #include // bool #include // GList, GHashTable, gboolean, guint #include // GListPtr #include // lrmd_event_data_t #include // cib_t #include #include #include /* Constraint helper functions */ pcmk__colocation_t *invert_constraint(pcmk__colocation_t *constraint); pe__location_t *copy_constraint(pe__location_t *constraint); pe__location_t *rsc2node_new(const char *id, pe_resource_t *rsc, int weight, const char *discovery_mode, pe_node_t *node, pe_working_set_t *data_set); void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, const char *state_lh, const char *state_rh, bool influence, pe_working_set_t *data_set); extern gboolean rsc_ticket_new(const char *id, pe_resource_t * rsc_lh, pe_ticket_t * ticket, const char *state_lh, const char *loss_policy, pe_working_set_t * data_set); GHashTable *pcmk__copy_node_table(GHashTable *nodes); GList *pcmk__copy_node_list(const GList *list, bool reset); GList *sort_nodes_by_weight(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set); extern gboolean can_run_resources(const pe_node_t * node); -extern gboolean native_assign_node(pe_resource_t * rsc, GListPtr candidates, pe_node_t * chosen, +extern gboolean native_assign_node(pe_resource_t *rsc, pe_node_t *chosen, gboolean force); void native_deallocate(pe_resource_t * rsc); extern void log_action(unsigned int log_level, const char *pre_text, pe_action_t * action, gboolean details); gboolean can_run_any(GHashTable * nodes); pe_resource_t *find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set); pe_resource_t *find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc, enum rsc_role_e filter, gboolean current); gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current); bool assign_node(pe_resource_t * rsc, pe_node_t * node, gboolean force); enum pe_action_flags summary_action_flags(pe_action_t * action, GListPtr children, pe_node_t * node); enum action_tasks clone_child_action(pe_action_t * action); int copies_per_node(pe_resource_t * rsc); enum filter_colocation_res { influence_nothing = 0, influence_rsc_location, influence_rsc_priority, }; extern enum filter_colocation_res filter_colocation_constraint(pe_resource_t * rsc_lh, pe_resource_t * rsc_rh, pcmk__colocation_t *constraint, gboolean preview); extern int compare_capacity(const pe_node_t * node1, const pe_node_t * node2); extern void calculate_utilization(GHashTable * current_utilization, GHashTable * utilization, gboolean plus); extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_working_set_t * data_set); pe_action_t *create_pseudo_resource_op(pe_resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set); pe_action_t *pe_cancel_op(pe_resource_t *rsc, const char *name, guint interval_ms, pe_node_t *node, pe_working_set_t *data_set); pe_action_t *sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set); xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event, const char *caller_version, int target_rc, const char *node, const char *origin, int level); # define LOAD_STOPPED "load_stopped" void modify_configuration( pe_working_set_t * data_set, cib_t *cib, const char *quorum, const char *watchdog, GListPtr node_up, GListPtr node_down, GListPtr node_fail, GListPtr op_inject, GListPtr ticket_grant, GListPtr ticket_revoke, GListPtr ticket_standby, GListPtr ticket_activate); int run_simulation(pe_working_set_t * data_set, cib_t *cib, GListPtr op_fail_list, bool quiet); #endif diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c index dd6ff489c6..1ba447a736 100644 --- a/lib/pacemaker/pcmk_sched_clone.c +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -1,1508 +1,1508 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #define VARIANT_CLONE 1 #include gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); static gint sort_rsc_id(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; long num1, num2; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* * Sort clone instances numerically by instance number, so instance :10 * comes after :9. */ num1 = strtol(strrchr(resource1->id, ':') + 1, NULL, 10); num2 = strtol(strrchr(resource2->id, ':') + 1, NULL, 10); if (num1 < num2) { return -1; } else if (num1 > num2) { return 1; } return 0; } static pe_node_t * parent_node_instance(const pe_resource_t * rsc, pe_node_t * node) { pe_node_t *ret = NULL; if (node != NULL && rsc->parent) { ret = pe_hash_table_lookup(rsc->parent->allowed_nodes, node->details->id); } else if(node != NULL) { ret = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); } return ret; } static gboolean did_fail(const pe_resource_t * rsc) { GListPtr gIter = rsc->children; if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (did_fail(child_rsc)) { return TRUE; } } return FALSE; } gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) { int rc = 0; pe_node_t *node1 = NULL; pe_node_t *node2 = NULL; pe_node_t *current_node1 = NULL; pe_node_t *current_node2 = NULL; unsigned int nnodes1 = 0; unsigned int nnodes2 = 0; gboolean can1 = TRUE; gboolean can2 = TRUE; const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* allocation order: * - active instances * - instances running on nodes with the least copies * - active instances on nodes that can't support them or are to be fenced * - failed instances * - inactive instances */ current_node1 = pe__find_active_on(resource1, &nnodes1, NULL); current_node2 = pe__find_active_on(resource2, &nnodes2, NULL); if (nnodes1 && nnodes2) { if (nnodes1 < nnodes2) { crm_trace("%s < %s: running_on", resource1->id, resource2->id); return -1; } else if (nnodes1 > nnodes2) { crm_trace("%s > %s: running_on", resource1->id, resource2->id); return 1; } } node1 = current_node1; node2 = current_node2; if (node1) { pe_node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource1->id); node1 = NULL; can1 = FALSE; } } if (node2) { pe_node_t *match = pe_hash_table_lookup(resource2->allowed_nodes, node2->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource2->id); node2 = NULL; can2 = FALSE; } } if (can1 != can2) { if (can1) { crm_trace("%s < %s: availability of current location", resource1->id, resource2->id); return -1; } crm_trace("%s > %s: availability of current location", resource1->id, resource2->id); return 1; } if (resource1->priority < resource2->priority) { crm_trace("%s < %s: priority", resource1->id, resource2->id); return 1; } else if (resource1->priority > resource2->priority) { crm_trace("%s > %s: priority", resource1->id, resource2->id); return -1; } if (node1 == NULL && node2 == NULL) { crm_trace("%s == %s: not active", resource1->id, resource2->id); return 0; } if (node1 != node2) { if (node1 == NULL) { crm_trace("%s > %s: active", resource1->id, resource2->id); return 1; } else if (node2 == NULL) { crm_trace("%s < %s: active", resource1->id, resource2->id); return -1; } } can1 = can_run_resources(node1); can2 = can_run_resources(node2); if (can1 != can2) { if (can1) { crm_trace("%s < %s: can", resource1->id, resource2->id); return -1; } crm_trace("%s > %s: can", resource1->id, resource2->id); return 1; } node1 = parent_node_instance(resource1, node1); node2 = parent_node_instance(resource2, node2); if (node1 != NULL && node2 == NULL) { crm_trace("%s < %s: not allowed", resource1->id, resource2->id); return -1; } else if (node1 == NULL && node2 != NULL) { crm_trace("%s > %s: not allowed", resource1->id, resource2->id); return 1; } if (node1 == NULL || node2 == NULL) { crm_trace("%s == %s: not allowed", resource1->id, resource2->id); return 0; } if (node1->count < node2->count) { crm_trace("%s < %s: count", resource1->id, resource2->id); return -1; } else if (node1->count > node2->count) { crm_trace("%s > %s: count", resource1->id, resource2->id); return 1; } can1 = did_fail(resource1); can2 = did_fail(resource2); if (can1 != can2) { if (can1) { crm_trace("%s > %s: failed", resource1->id, resource2->id); return 1; } crm_trace("%s < %s: failed", resource1->id, resource2->id); return -1; } if (node1 && node2) { int lpc = 0; int max = 0; pe_node_t *n = NULL; GListPtr gIter = NULL; GListPtr list1 = NULL; GListPtr list2 = NULL; GHashTable *hash1 = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); GHashTable *hash2 = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); n = pe__copy_node(current_node1); g_hash_table_insert(hash1, (gpointer) n->details->id, n); n = pe__copy_node(current_node2); g_hash_table_insert(hash2, (gpointer) n->details->id, n); if(resource1->parent) { for (gIter = resource1->parent->rsc_cons; gIter; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; crm_trace("Applying %s to %s", constraint->id, resource1->id); hash1 = pcmk__native_merge_weights(constraint->rsc_rh, resource1->id, hash1, constraint->node_attribute, constraint->score / (float) INFINITY, 0); } for (gIter = resource1->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(constraint, resource1)) { continue; } crm_trace("Applying %s to %s", constraint->id, resource1->id); hash1 = pcmk__native_merge_weights(constraint->rsc_lh, resource1->id, hash1, constraint->node_attribute, constraint->score / (float) INFINITY, pe_weights_positive); } } if(resource2->parent) { for (gIter = resource2->parent->rsc_cons; gIter; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; crm_trace("Applying %s to %s", constraint->id, resource2->id); hash2 = pcmk__native_merge_weights(constraint->rsc_rh, resource2->id, hash2, constraint->node_attribute, constraint->score / (float) INFINITY, 0); } for (gIter = resource2->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(constraint, resource2)) { continue; } crm_trace("Applying %s to %s", constraint->id, resource2->id); hash2 = pcmk__native_merge_weights(constraint->rsc_lh, resource2->id, hash2, constraint->node_attribute, constraint->score / (float) INFINITY, pe_weights_positive); } } /* Current location score */ node1 = g_hash_table_lookup(hash1, current_node1->details->id); node2 = g_hash_table_lookup(hash2, current_node2->details->id); if (node1->weight < node2->weight) { if (node1->weight < 0) { crm_trace("%s > %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight); rc = -1; goto out; } else { crm_trace("%s < %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight); rc = 1; goto out; } } else if (node1->weight > node2->weight) { crm_trace("%s > %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight); rc = -1; goto out; } /* All location scores */ list1 = g_hash_table_get_values(hash1); list2 = g_hash_table_get_values(hash2); list1 = sort_nodes_by_weight(list1, current_node1, data_set); list2 = sort_nodes_by_weight(list2, current_node2, data_set); max = g_list_length(list1); if (max < g_list_length(list2)) { max = g_list_length(list2); } for (; lpc < max; lpc++) { node1 = g_list_nth_data(list1, lpc); node2 = g_list_nth_data(list2, lpc); if (node1 == NULL) { crm_trace("%s < %s: colocated score NULL", resource1->id, resource2->id); rc = 1; break; } else if (node2 == NULL) { crm_trace("%s > %s: colocated score NULL", resource1->id, resource2->id); rc = -1; break; } if (node1->weight < node2->weight) { crm_trace("%s < %s: colocated score", resource1->id, resource2->id); rc = 1; break; } else if (node1->weight > node2->weight) { crm_trace("%s > %s: colocated score", resource1->id, resource2->id); rc = -1; break; } } /* Order by reverse uname - same as sort_node_weight() does? */ out: g_hash_table_destroy(hash1); /* Free mem */ g_hash_table_destroy(hash2); /* Free mem */ g_list_free(list1); g_list_free(list2); if (rc != 0) { return rc; } } rc = strcmp(resource1->id, resource2->id); crm_trace("%s %c %s: default", resource1->id, rc < 0 ? '<' : '>', resource2->id); return rc; } static pe_node_t * can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit) { pe_node_t *local_node = NULL; if (node == NULL && rsc->allowed_nodes) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) { can_run_instance(rsc, local_node, limit); } return NULL; } if (!node) { /* make clang analyzer happy */ goto bail; } else if (can_run_resources(node) == FALSE) { goto bail; } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { goto bail; } local_node = parent_node_instance(rsc, node); if (local_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); goto bail; } else if (local_node->weight < 0) { common_update_score(rsc, node->details->id, local_node->weight); pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.", rsc->id, node->details->uname); } else if (local_node->count < limit) { pe_rsc_trace(rsc, "%s can run on %s (already running %d)", rsc->id, node->details->uname, local_node->count); return local_node; } else { pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)", rsc->id, node->details->uname, local_node->count, limit); } bail: if (node) { common_update_score(rsc, node->details->id, -INFINITY); } return NULL; } static pe_node_t * allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc, int limit, pe_working_set_t *data_set) { pe_node_t *chosen = NULL; GHashTable *backup = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)", rsc->id, (prefer? prefer->details->uname: "none"), (all_coloc? "all" : "some")); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->fns->location(rsc, NULL, FALSE); } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } /* Only include positive colocation preferences of dependent resources * if not every node will get a copy of the clone */ append_parent_colocation(rsc->parent, rsc, all_coloc); if (prefer) { pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (local_prefer == NULL || local_prefer->weight < 0) { pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id, prefer->details->uname); return NULL; } } can_run_instance(rsc, NULL, limit); backup = pcmk__copy_node_table(rsc->allowed_nodes); pe_rsc_trace(rsc, "Allocating instance %s", rsc->id); chosen = rsc->cmds->allocate(rsc, prefer, data_set); if (chosen && prefer && (chosen->details != prefer->details)) { crm_info("Not pre-allocating %s to %s because %s is better", rsc->id, prefer->details->uname, chosen->details->uname); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = backup; native_deallocate(rsc); chosen = NULL; backup = NULL; } if (chosen) { pe_node_t *local_node = parent_node_instance(rsc, chosen); if (local_node) { local_node->count++; } else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { /* what to do? we can't enforce per-node limits in this case */ pcmk__config_err("%s not found in %s (list of %d)", chosen->details->id, rsc->parent->id, g_hash_table_size(rsc->parent->allowed_nodes)); } } if(backup) { g_hash_table_destroy(backup); } return chosen; } static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) { GListPtr gIter = NULL; gIter = rsc->rsc_cons; for (; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; if (all || cons->score < 0 || cons->score == INFINITY) { child->rsc_cons = g_list_prepend(child->rsc_cons, cons); } } gIter = rsc->rsc_cons_lhs; for (; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(cons, child)) { continue; } if (all || cons->score < 0) { child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons); } } } void distribute_children(pe_resource_t *rsc, GListPtr children, GListPtr nodes, int max, int per_host_max, pe_working_set_t * data_set); void distribute_children(pe_resource_t *rsc, GListPtr children, GListPtr nodes, int max, int per_host_max, pe_working_set_t * data_set) { int loop_max = 0; int allocated = 0; int available_nodes = 0; /* count now tracks the number of clones currently allocated */ for(GListPtr nIter = nodes; nIter != NULL; nIter = nIter->next) { pe_node_t *node = nIter->data; node->count = 0; if (can_run_resources(node)) { available_nodes++; } } if(available_nodes) { loop_max = max / available_nodes; } if (loop_max < 1) { loop_max = 1; } pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)", max, rsc->id, available_nodes, per_host_max, loop_max); /* Pre-allocate as many instances as we can to their current location */ for (GListPtr gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (child->running_on && pcmk_is_set(child->flags, pe_rsc_provisional) && !pcmk_is_set(child->flags, pe_rsc_failed)) { pe_node_t *child_node = pe__current_node(child); pe_node_t *local_node = parent_node_instance(child, child_node); pe_rsc_trace(rsc, "Checking pre-allocation of %s to %s (%d remaining of %d)", child->id, child_node->details->uname, max - allocated, max); if (can_run_resources(child_node) == FALSE || child_node->weight < 0) { pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s", child_node->details->uname, child->id); } else if(local_node && local_node->count >= loop_max) { pe_rsc_trace(rsc, "Not pre-allocating because %s already allocated optimal instances", child_node->details->uname); } else if (allocate_instance(child, child_node, max < available_nodes, per_host_max, data_set)) { pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, child_node->details->uname); allocated++; } } } pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (child->running_on != NULL) { pe_node_t *child_node = pe__current_node(child); pe_node_t *local_node = parent_node_instance(child, child_node); if (local_node == NULL) { crm_err("%s is running on %s which isn't allowed", child->id, child_node->details->uname); } } if (!pcmk_is_set(child->flags, pe_rsc_provisional)) { } else if (allocated >= max) { pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set); } else { if (allocate_instance(child, NULL, max < available_nodes, per_host_max, data_set)) { allocated++; } } } pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", allocated, rsc->id, max); } pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GListPtr nodes = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return NULL; } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { apply_master_prefs(rsc); } pe__set_resource_flags(rsc, pe_rsc_allocating); /* this information is used by sort_clone_instance() when deciding in which * order to allocate clone instances */ for (GListPtr gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_rsc_trace(rsc, "%s: Allocating %s first", rsc->id, constraint->rsc_rh->id); constraint->rsc_rh->cmds->allocate(constraint->rsc_rh, prefer, data_set); } for (GListPtr gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(constraint, NULL)) { continue; } rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, (pe_weights_rollback | pe_weights_positive)); } pe__show_node_weights(!show_scores, rsc, __func__, rsc->allowed_nodes); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set); distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set); g_list_free(nodes); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__set_instance_roles(rsc, data_set); } pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating); pe_rsc_trace(rsc, "Done allocating %s", rsc->id); return NULL; } static void clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting, gboolean * active) { GListPtr gIter = NULL; if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; clone_update_pseudo_status(child, stopping, starting, active); } return; } CRM_ASSERT(active != NULL); CRM_ASSERT(starting != NULL); CRM_ASSERT(stopping != NULL); if (rsc->running_on) { *active = TRUE; } gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (*starting && *stopping) { return; } else if (pcmk_is_set(action->flags, pe_action_optional)) { pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); continue; } else if (!pcmk_any_flags_set(action->flags, pe_action_pseudo|pe_action_runnable)) { pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); continue; } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) { pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); *stopping = TRUE; } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) { if (!pcmk_is_set(action->flags, pe_action_runnable)) { pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", action->uuid, pcmk_is_set(action->flags, pe_action_runnable), pcmk_is_set(action->flags, pe_action_pseudo)); } else { pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", action->uuid, pcmk_is_set(action->flags, pe_action_runnable), pcmk_is_set(action->flags, pe_action_pseudo)); *starting = TRUE; } } } } static pe_action_t * find_rsc_action(pe_resource_t *rsc, const char *task, gboolean active_only, GList **list) { pe_action_t *match = NULL; GListPtr possible = NULL; GListPtr active = NULL; possible = pe__resource_actions(rsc, NULL, task, FALSE); if (active_only) { GListPtr gIter = possible; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; if (!pcmk_is_set(op->flags, pe_action_optional)) { active = g_list_prepend(active, op); } } if (active && pcmk__list_of_1(active)) { match = g_list_nth_data(active, 0); } if (list) { *list = active; active = NULL; } } else if (possible && pcmk__list_of_1(possible)) { match = g_list_nth_data(possible, 0); } if (list) { *list = possible; possible = NULL; } if (possible) { g_list_free(possible); } if (active) { g_list_free(active); } return match; } static void child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *stop = NULL; pe_action_t *start = NULL; pe_action_t *last_stop = NULL; pe_action_t *last_start = NULL; GListPtr gIter = NULL; gboolean active_only = TRUE; /* change to false to get the old behavior */ clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (clone_data->ordered == FALSE) { return; } /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; stop = find_rsc_action(child, RSC_STOP, active_only, NULL); if (stop) { if (last_stop) { /* child/child relative stop */ order_actions(stop, last_stop, pe_order_optional); } last_stop = stop; } start = find_rsc_action(child, RSC_START, active_only, NULL); if (start) { if (last_start) { /* child/child relative start */ order_actions(last_start, start, pe_order_optional); } last_start = start; } } } void clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify,data_set); child_ordering_constraints(rsc, data_set); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { create_promotable_actions(rsc, data_set); } } void clone_create_pseudo_actions( pe_resource_t * rsc, GListPtr children, notify_data_t **start_notify, notify_data_t **stop_notify, pe_working_set_t * data_set) { gboolean child_active = FALSE; gboolean child_starting = FALSE; gboolean child_stopping = FALSE; gboolean allow_dependent_migrations = TRUE; pe_action_t *stop = NULL; pe_action_t *stopped = NULL; pe_action_t *start = NULL; pe_action_t *started = NULL; pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean starting = FALSE; gboolean stopping = FALSE; child_rsc->cmds->create_actions(child_rsc, data_set); clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); if (stopping && starting) { allow_dependent_migrations = FALSE; } child_stopping |= stopping; child_starting |= starting; } /* start */ start = create_pseudo_resource_op(rsc, RSC_START, !child_starting, TRUE, data_set); started = create_pseudo_resource_op(rsc, RSC_STARTED, !child_starting, FALSE, data_set); started->priority = INFINITY; if (child_active || child_starting) { update_action_flags(started, pe_action_runnable, __func__, __LINE__); } if (start_notify != NULL && *start_notify == NULL) { *start_notify = create_notification_boundaries(rsc, RSC_START, start, started, data_set); } /* stop */ stop = create_pseudo_resource_op(rsc, RSC_STOP, !child_stopping, TRUE, data_set); stopped = create_pseudo_resource_op(rsc, RSC_STOPPED, !child_stopping, TRUE, data_set); stopped->priority = INFINITY; if (allow_dependent_migrations) { update_action_flags(stop, pe_action_migrate_runnable, __func__, __LINE__); } if (stop_notify != NULL && *stop_notify == NULL) { *stop_notify = create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set); if (start_notify && *start_notify && *stop_notify) { order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); } } } void clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_resource_t *last_rsc = NULL; GListPtr gIter; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id); new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { new_rsc_order(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional, data_set); new_rsc_order(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set); } if (clone_data->ordered) { /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->internal_constraints(child_rsc, data_set); order_start_start(rsc, child_rsc, pe_order_runnable_left | pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { order_start_start(last_rsc, child_rsc, pe_order_optional); } order_stop_stop(rsc, child_rsc, pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { order_stop_stop(child_rsc, last_rsc, pe_order_optional); } last_rsc = child_rsc; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { promotable_constraints(rsc, data_set); } } bool assign_node(pe_resource_t * rsc, pe_node_t * node, gboolean force) { bool changed = FALSE; if (rsc->children) { for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; changed |= assign_node(child_rsc, node, force); } return changed; } if (rsc->allocated_to != NULL) { changed = true; } - native_assign_node(rsc, NULL, node, force); + native_assign_node(rsc, node, force); return changed; } gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current) { pe_node_t *node = NULL; enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); CRM_CHECK(child_rsc && local_node, return FALSE); if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { /* We only want instances that haven't failed */ node = child_rsc->fns->location(child_rsc, NULL, current); } if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { crm_trace("Filtered %s", child_rsc->id); return FALSE; } if (node && (node->details == local_node->details)) { return TRUE; } else if (node) { crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname, local_node->details->uname); } else { crm_trace("%s - not allocated %d", child_rsc->id, current); } return FALSE; } pe_resource_t * find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { pe_resource_t *pair = NULL; GListPtr gIter = NULL; GListPtr scratch = NULL; pe_node_t *local_node = NULL; local_node = local_child->fns->location(local_child, NULL, current); if (local_node) { return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); } scratch = g_hash_table_get_values(local_child->allowed_nodes); scratch = sort_nodes_by_weight(scratch, NULL, data_set); gIter = scratch; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); done: g_list_free(scratch); return pair; } void clone_rsc_colocation_lh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { /* -- Never called -- * * Instead we add the colocation constraints to the child and call from there */ CRM_ASSERT(FALSE); } void clone_rsc_colocation_rh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { GListPtr gIter = NULL; gboolean do_interleave = FALSE; const char *interleave_s = NULL; CRM_CHECK(constraint != NULL, return); CRM_CHECK(rsc_lh != NULL, pe_err("rsc_lh was NULL for %s", constraint->id); return); CRM_CHECK(rsc_rh != NULL, pe_err("rsc_rh was NULL for %s", constraint->id); return); CRM_CHECK(rsc_lh->variant == pe_native, return); pe_rsc_trace(rsc_rh, "Processing constraint %s: %s -> %s %d", constraint->id, rsc_lh->id, rsc_rh->id, constraint->score); if (pcmk_is_set(rsc_rh->flags, pe_rsc_promotable)) { if (pcmk_is_set(rsc_rh->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id); return; } else if (constraint->role_rh == RSC_ROLE_UNKNOWN) { pe_rsc_trace(rsc_rh, "Handling %s as a clone colocation", constraint->id); } else { promotable_colocation_rh(rsc_lh, rsc_rh, constraint, data_set); return; } } /* only the LHS side needs to be labeled as interleave */ interleave_s = g_hash_table_lookup(constraint->rsc_lh->meta, XML_RSC_ATTR_INTERLEAVE); if(crm_is_true(interleave_s) && constraint->rsc_lh->variant > pe_group) { // TODO: Do we actually care about multiple RH copies sharing a LH copy anymore? if (copies_per_node(constraint->rsc_lh) != copies_per_node(constraint->rsc_rh)) { pcmk__config_err("Cannot interleave %s and %s because they do not " "support the same number of instances per node", constraint->rsc_lh->id, constraint->rsc_rh->id); } else { do_interleave = TRUE; } } if (pcmk_is_set(rsc_rh->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id); return; } else if (do_interleave) { pe_resource_t *rh_child = NULL; rh_child = find_compatible_child(rsc_lh, rsc_rh, RSC_ROLE_UNKNOWN, FALSE, data_set); if (rh_child) { pe_rsc_debug(rsc_rh, "Pairing %s with %s", rsc_lh->id, rh_child->id); rsc_lh->cmds->rsc_colocation_lh(rsc_lh, rh_child, constraint, data_set); } else if (constraint->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id); assign_node(rsc_lh, NULL, TRUE); } else { pe_rsc_debug(rsc_rh, "Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id); } return; } else if (constraint->score >= INFINITY) { GListPtr rhs = NULL; gIter = rsc_rh->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { pe_rsc_trace(rsc_rh, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); rhs = g_list_prepend(rhs, chosen); } } node_list_exclude(rsc_lh->allowed_nodes, rhs, FALSE); g_list_free(rhs); return; } gIter = rsc_rh->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint, data_set); } } enum action_tasks clone_child_action(pe_action_t * action) { enum action_tasks result = no_action; pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { /* Find the action we're notifying about instead */ int stop = 0; char *key = action->uuid; int lpc = strlen(key); for (; lpc > 0; lpc--) { if (key[lpc] == '_' && stop == 0) { stop = lpc; } else if (key[lpc] == '_') { char *task_mutable = NULL; lpc++; task_mutable = strdup(key + lpc); task_mutable[stop - lpc] = 0; crm_trace("Extracted action '%s' from '%s'", task_mutable, key); result = get_complex_task(child, task_mutable, TRUE); free(task_mutable); break; } } } else { result = get_complex_task(child, action->task, TRUE); } return result; } #define pe__clear_action_summary_flags(flags, action, flag) do { \ flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Action summary", action->rsc->id, \ flags, flag, #flag); \ } while (0) enum pe_action_flags summary_action_flags(pe_action_t * action, GListPtr children, pe_node_t * node) { GListPtr gIter = NULL; gboolean any_runnable = FALSE; gboolean check_runnable = TRUE; enum action_tasks task = clone_child_action(action); enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); const char *task_s = task2text(task); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_action_t *child_action = NULL; pe_resource_t *child = (pe_resource_t *) gIter->data; child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, node ? node->details->uname : "none", child_action?child_action->uuid:"NA"); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (pcmk_is_set(flags, pe_action_optional) && !pcmk_is_set(child_flags, pe_action_optional)) { pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, child_action->uuid); pe__clear_action_summary_flags(flags, action, pe_action_optional); pe__clear_action_flags(action, pe_action_optional); } if (pcmk_is_set(child_flags, pe_action_runnable)) { any_runnable = TRUE; } } } if (check_runnable && any_runnable == FALSE) { pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); pe__clear_action_summary_flags(flags, action, pe_action_runnable); if (node == NULL) { pe__clear_action_flags(action, pe_action_runnable); } } return flags; } enum pe_action_flags clone_action_flags(pe_action_t * action, pe_node_t * node) { return summary_action_flags(action, action->rsc->children, node); } void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GListPtr gIter = rsc->children; pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_location(child_rsc, constraint); } } void clone_expand(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; rsc->cmds->action_flags(op, NULL); } if (clone_data->start_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify); pcmk__create_notification_keys(rsc, clone_data->start_notify, data_set); create_notifications(rsc, clone_data->start_notify, data_set); } if (clone_data->stop_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify); pcmk__create_notification_keys(rsc, clone_data->stop_notify, data_set); create_notifications(rsc, clone_data->stop_notify, data_set); } if (clone_data->promote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify); pcmk__create_notification_keys(rsc, clone_data->promote_notify, data_set); create_notifications(rsc, clone_data->promote_notify, data_set); } if (clone_data->demote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify); pcmk__create_notification_keys(rsc, clone_data->demote_notify, data_set); create_notifications(rsc, clone_data->demote_notify, data_set); } /* Now that the notifcations have been created we can expand the children */ gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } native_expand(rsc, data_set); /* The notifications are in the graph now, we can destroy the notify_data */ free_notification_data(clone_data->demote_notify); clone_data->demote_notify = NULL; free_notification_data(clone_data->stop_notify); clone_data->stop_notify = NULL; free_notification_data(clone_data->start_notify); clone_data->start_notify = NULL; free_notification_data(clone_data->promote_notify); clone_data->promote_notify = NULL; } // Check whether a resource or any of its children is known on node static bool rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node) { if (rsc->children) { for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; if (rsc_known_on(child, node)) { return TRUE; } } } else if (rsc->known_on) { GHashTableIter iter; pe_node_t *known_node = NULL; g_hash_table_iter_init(&iter, rsc->known_on); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { if (node->details == known_node->details) { return TRUE; } } } return FALSE; } // Look for an instance of clone that is known on node static pe_resource_t * find_instance_on(const pe_resource_t *clone, const pe_node_t *node) { for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (rsc_known_on(child, node)) { return child; } } return NULL; } // For unique clones, probe each instance separately static gboolean probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { gboolean any_created = FALSE; for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; any_created |= child->cmds->create_probe(child, node, complete, force, data_set); } return any_created; } // For anonymous clones, only a single instance needs to be probed static gboolean probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { // First, check if we probed an instance on this node last time pe_resource_t *child = find_instance_on(rsc, node); // Otherwise, check if we plan to start an instance on this node if (child == NULL) { for (GList *child_iter = rsc->children; child_iter && !child; child_iter = child_iter->next) { pe_node_t *local_node = NULL; pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data; if (child_rsc) { /* make clang analyzer happy */ local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if (local_node && (local_node->details == node->details)) { child = child_rsc; } } } } // Otherwise, use the first clone instance if (child == NULL) { child = rsc->children->data; } CRM_ASSERT(child); return child->cmds->create_probe(child, node, complete, force, data_set); } gboolean clone_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete, gboolean force, pe_working_set_t * data_set) { gboolean any_created = FALSE; CRM_ASSERT(rsc); rsc->children = g_list_sort(rsc->children, sort_rsc_id); if (rsc->children == NULL) { pe_warn("Clone %s has no children", rsc->id); return FALSE; } if (rsc->exclusive_discover) { pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on * * remove the node from allowed_nodes so that the * notification contains only nodes that we might ever run * on */ g_hash_table_remove(rsc->allowed_nodes, node->details->id); /* Bit of a shortcut - might as well take it */ return FALSE; } } if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { any_created = probe_unique_clone(rsc, node, complete, force, data_set); } else { any_created = probe_anonymous_clone(rsc, node, complete, force, data_set); } return any_created; } void clone_append_meta(pe_resource_t * rsc, xmlNode * xml) { char *name = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); name = crm_meta_name(XML_RSC_ATTR_UNIQUE); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique)); free(name); name = crm_meta_name(XML_RSC_ATTR_NOTIFY); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify)); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); crm_xml_add_int(xml, name, clone_data->clone_max); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); crm_xml_add_int(xml, name, clone_data->clone_node_max); free(name); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); /* @COMPAT Maintain backward compatibility with resource agents that * expect the old names (deprecated since 2.0.0). */ name = crm_meta_name(XML_RSC_ATTR_MASTER_MAX); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_MASTER_NODEMAX); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); } } diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c index 1fd7c4f6b8..b47cbb5fcc 100644 --- a/lib/pacemaker/pcmk_sched_native.c +++ b/lib/pacemaker/pcmk_sched_native.c @@ -1,3484 +1,3484 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // The controller removes the resource from the CIB, making this redundant // #define DELETE_THEN_REFRESH 1 #define INFINITY_HACK (INFINITY * -100) #define VARIANT_NATIVE 1 #include static void Recurring(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, pe_working_set_t *data_set); static void RecurringOp(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, xmlNode *operation, pe_working_set_t *data_set); static void Recurring_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, pe_working_set_t *data_set); static void RecurringOp_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, xmlNode *operation, pe_working_set_t *data_set); void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set); gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set); gboolean StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); /* This array says what the *next* role should be when transitioning from one * role to another. For example going from Stopped to Master, the next role is * RSC_ROLE_SLAVE, because the resource must be started before being promoted. * The current state then becomes Started, which is fed into this array again, * giving a next role of RSC_ROLE_MASTER. */ static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current state Next state*/ /* Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; typedef gboolean (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *next, gboolean optional, pe_working_set_t *data_set); // This array picks the function needed to transition from one role to another static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current state Next state */ /* Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, /* Master */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp , }, }; #define clear_node_weights_flags(nw_flags, nw_rsc, flags_to_clear) do { \ flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Node weight", (nw_rsc)->id, (flags), \ (flags_to_clear), #flags_to_clear); \ } while (0) static gboolean native_choose_node(pe_resource_t * rsc, pe_node_t * prefer, pe_working_set_t * data_set) { GListPtr nodes = NULL; pe_node_t *chosen = NULL; pe_node_t *best = NULL; int multiple = 1; int length = 0; gboolean result = FALSE; process_utilization(rsc, &prefer, data_set); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to ? TRUE : FALSE; } // Sort allowed nodes by weight if (rsc->allowed_nodes) { length = g_hash_table_size(rsc->allowed_nodes); } if (length > 0) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, pe__current_node(rsc), data_set); // First node in sorted list has the best score best = g_list_nth_data(nodes, 0); } if (prefer && nodes) { chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen == NULL) { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", prefer->details->uname, rsc->id); /* Favor the preferred node as long as its weight is at least as good as * the best allowed node's. * * An alternative would be to favor the preferred node even if the best * node is better, when the best node's weight is less than INFINITY. */ } else if ((chosen->weight < 0) || (chosen->weight < best->weight)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", chosen->details->uname, rsc->id); chosen = NULL; } else if (!can_run_resources(chosen)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", chosen->details->uname, rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Chose preferred node %s for %s (ignoring %d candidates)", chosen->details->uname, rsc->id, length); } } if ((chosen == NULL) && nodes) { /* Either there is no preferred node, or the preferred node is not * available, but there are other nodes allowed to run the resource. */ chosen = best; pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates", chosen ? chosen->details->uname : "", rsc->id, length); if (!pe_rsc_is_unique_clone(rsc->parent) && chosen && (chosen->weight > 0) && can_run_resources(chosen)) { /* If the resource is already running on a node, prefer that node if * it is just as good as the chosen node. * * We don't do this for unique clone instances, because * distribute_children() has already assigned instances to their * running nodes when appropriate, and if we get here, we don't want * remaining unallocated instances to prefer a node that's already * running another instance. */ pe_node_t *running = pe__current_node(rsc); if (running && (can_run_resources(running) == FALSE)) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, running->details->uname); } else if (running) { for (GList *iter = nodes->next; iter; iter = iter->next) { pe_node_t *tmp = (pe_node_t *) iter->data; if (tmp->weight != chosen->weight) { // The nodes are sorted by weight, so no more are equal break; } if (tmp->details == running->details) { // Scores are equal, so prefer the current node chosen = tmp; } multiple++; } } } } if (multiple > 1) { static char score[33]; int log_level = (chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO; score2char_stack(chosen->weight, score, sizeof(score)); do_crm_log(log_level, "Chose node %s for %s from %d nodes with score %s", chosen->details->uname, rsc->id, multiple, score); } - result = native_assign_node(rsc, nodes, chosen, FALSE); + result = native_assign_node(rsc, chosen, FALSE); g_list_free(nodes); return result; } /*! * \internal * \brief Find score of highest-scored node that matches colocation attribute * * \param[in] rsc Resource whose allowed nodes should be searched * \param[in] attr Colocation attribute name (must not be NULL) * \param[in] value Colocation attribute value to require */ static int best_node_score_matching_attr(const pe_resource_t *rsc, const char *attr, const char *value) { GHashTableIter iter; pe_node_t *node = NULL; int best_score = -INFINITY; const char *best_node = NULL; // Find best allowed node with matching attribute g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if ((node->weight > best_score) && can_run_resources(node) && pcmk__str_eq(value, pe_node_attribute_raw(node, attr), pcmk__str_casei)) { best_score = node->weight; best_node = node->details->uname; } } if (!pcmk__str_eq(attr, CRM_ATTR_UNAME, pcmk__str_casei)) { if (best_node == NULL) { crm_info("No allowed node for %s matches node attribute %s=%s", rsc->id, attr, value); } else { crm_info("Allowed node %s for %s had best score (%d) " "of those matching node attribute %s=%s", best_node, rsc->id, best_score, attr, value); } } return best_score; } /*! * \internal * \brief Add resource's colocation matches to current node allocation scores * * For each node in a given table, if any of a given resource's allowed nodes * have a matching value for the colocation attribute, add the highest of those * nodes' scores to the node's score. * * \param[in,out] nodes Hash table of nodes with allocation scores so far * \param[in] rsc Resource whose allowed nodes should be compared * \param[in] attr Colocation attribute that must match (NULL for default) * \param[in] factor Factor by which to multiply scores being added * \param[in] only_positive Whether to add only positive scores */ static void add_node_scores_matching_attr(GHashTable *nodes, const pe_resource_t *rsc, const char *attr, float factor, bool only_positive) { GHashTableIter iter; pe_node_t *node = NULL; if (attr == NULL) { attr = CRM_ATTR_UNAME; } // Iterate through each node g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { float weight_f = 0; int weight = 0; int score = 0; int new_score = 0; score = best_node_score_matching_attr(rsc, attr, pe_node_attribute_raw(node, attr)); if ((factor < 0) && (score < 0)) { /* Negative preference for a node with a negative score * should not become a positive preference. * * @TODO Consider filtering only if weight is -INFINITY */ crm_trace("%s: Filtering %d + %f * %d (double negative disallowed)", node->details->uname, node->weight, factor, score); continue; } if (node->weight == INFINITY_HACK) { crm_trace("%s: Filtering %d + %f * %d (node was marked unusable)", node->details->uname, node->weight, factor, score); continue; } weight_f = factor * score; // Round the number; see http://c-faq.com/fp/round.html weight = (int) ((weight_f < 0)? (weight_f - 0.5) : (weight_f + 0.5)); /* Small factors can obliterate the small scores that are often actually * used in configurations. If the score and factor are nonzero, ensure * that the result is nonzero as well. */ if ((weight == 0) && (score != 0)) { if (factor > 0.0) { weight = 1; } else if (factor < 0.0) { weight = -1; } } new_score = pe__add_scores(weight, node->weight); if (only_positive && (new_score < 0) && (node->weight > 0)) { crm_trace("%s: Filtering %d + %f * %d = %d " "(negative disallowed, marking node unusable)", node->details->uname, node->weight, factor, score, new_score); node->weight = INFINITY_HACK; continue; } if (only_positive && (new_score < 0) && (node->weight == 0)) { crm_trace("%s: Filtering %d + %f * %d = %d (negative disallowed)", node->details->uname, node->weight, factor, score, new_score); continue; } crm_trace("%s: %d + %f * %d = %d", node->details->uname, node->weight, factor, score, new_score); node->weight = new_score; } } static inline bool is_nonempty_group(pe_resource_t *rsc) { return rsc && (rsc->variant == pe_group) && (rsc->children != NULL); } /*! * \internal * \brief Incorporate colocation constraint scores into node weights * * \param[in,out] rsc Resource being placed * \param[in] rhs ID of 'with' resource * \param[in,out] nodes Nodes, with scores as of this point * \param[in] attr Colocation attribute (ID by default) * \param[in] factor Incorporate scores multiplied by this factor * \param[in] flags Bitmask of enum pe_weights values * * \return Nodes, with scores modified by this constraint * \note This function assumes ownership of the nodes argument. The caller * should free the returned copy rather than the original. */ GHashTable * pcmk__native_merge_weights(pe_resource_t *rsc, const char *rhs, GHashTable *nodes, const char *attr, float factor, uint32_t flags) { GHashTable *work = NULL; // Avoid infinite recursion if (pcmk_is_set(rsc->flags, pe_rsc_merging)) { pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", rhs, rsc->id); return nodes; } pe__set_resource_flags(rsc, pe_rsc_merging); if (pcmk_is_set(flags, pe_weights_init)) { if (is_nonempty_group(rsc)) { GList *last = g_list_last(rsc->children); pe_resource_t *last_rsc = last->data; pe_rsc_trace(rsc, "%s: Merging scores from group %s " "using last member %s (at %.6f)", rhs, rsc->id, last_rsc->id, factor); work = pcmk__native_merge_weights(last_rsc, rhs, NULL, attr, factor, flags); } else { work = pcmk__copy_node_table(rsc->allowed_nodes); } clear_node_weights_flags(flags, rsc, pe_weights_init); } else if (is_nonempty_group(rsc)) { /* The first member of the group will recursively incorporate any * constraints involving other members (including the group internal * colocation). * * @TODO The indirect colocations from the dependent group's other * members will be incorporated at full strength rather than by * factor, so the group's combined stickiness will be treated as * (factor + (#members - 1)) * stickiness. It is questionable what * the right approach should be. */ pe_rsc_trace(rsc, "%s: Merging scores from first member of group %s " "(at %.6f)", rhs, rsc->id, factor); work = pcmk__copy_node_table(nodes); work = pcmk__native_merge_weights(rsc->children->data, rhs, work, attr, factor, flags); } else { pe_rsc_trace(rsc, "%s: Merging scores from %s (at %.6f)", rhs, rsc->id, factor); work = pcmk__copy_node_table(nodes); add_node_scores_matching_attr(work, rsc, attr, factor, pcmk_is_set(flags, pe_weights_positive)); } if (can_run_any(work)) { GListPtr gIter = NULL; int multiplier = (factor < 0)? -1 : 1; if (pcmk_is_set(flags, pe_weights_forward)) { gIter = rsc->rsc_cons; pe_rsc_trace(rsc, "Checking additional %d optional '%s with' constraints", g_list_length(gIter), rsc->id); } else if (is_nonempty_group(rsc)) { pe_resource_t *last_rsc = g_list_last(rsc->children)->data; gIter = last_rsc->rsc_cons_lhs; pe_rsc_trace(rsc, "Checking additional %d optional 'with group %s' " "constraints using last member %s", g_list_length(gIter), rsc->id, last_rsc->id); } else { gIter = rsc->rsc_cons_lhs; pe_rsc_trace(rsc, "Checking additional %d optional 'with %s' constraints", g_list_length(gIter), rsc->id); } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *other = NULL; pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (pcmk_is_set(flags, pe_weights_forward)) { other = constraint->rsc_rh; } else if (!pcmk__colocation_has_influence(constraint, NULL)) { continue; } else { other = constraint->rsc_lh; } pe_rsc_trace(rsc, "Optionally merging score of '%s' constraint (%s with %s)", constraint->id, constraint->rsc_lh->id, constraint->rsc_rh->id); work = pcmk__native_merge_weights(other, rhs, work, constraint->node_attribute, multiplier * constraint->score / (float) INFINITY, flags|pe_weights_rollback); pe__show_node_weights(true, NULL, rhs, work); } } else if (pcmk_is_set(flags, pe_weights_rollback)) { pe_rsc_info(rsc, "%s: Rolling back optional scores from %s", rhs, rsc->id); g_hash_table_destroy(work); pe__clear_resource_flags(rsc, pe_rsc_merging); return nodes; } if (pcmk_is_set(flags, pe_weights_positive)) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->weight == INFINITY_HACK) { node->weight = 1; } } } if (nodes) { g_hash_table_destroy(nodes); } pe__clear_resource_flags(rsc, pe_rsc_merging); return work; } static inline bool node_has_been_unfenced(pe_node_t *node) { const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED); return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches); } static inline bool is_unfence_device(pe_resource_t *rsc, pe_working_set_t *data_set) { return pcmk_is_set(rsc->flags, pe_rsc_fence_device) && pcmk_is_set(data_set->flags, pe_flag_enable_unfencing); } pe_node_t * pcmk__native_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GListPtr gIter = NULL; if (rsc->parent && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->allocate(rsc->parent, prefer, data_set); } if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } pe__set_resource_flags(rsc, pe_rsc_allocating); pe__show_node_weights(true, rsc, "Pre-alloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; GHashTable *archive = NULL; pe_resource_t *rsc_rh = constraint->rsc_rh; if (constraint->role_lh >= RSC_ROLE_MASTER || (constraint->score < 0 && constraint->score > -INFINITY)) { archive = pcmk__copy_node_table(rsc->allowed_nodes); } pe_rsc_trace(rsc, "%s: Allocating %s first (constraint=%s score=%d role=%s)", rsc->id, rsc_rh->id, constraint->id, constraint->score, role2text(constraint->role_lh)); rsc_rh->cmds->allocate(rsc_rh, NULL, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint, data_set); if (archive && can_run_any(rsc->allowed_nodes) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, rsc_rh->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive) { g_hash_table_destroy(archive); } } pe__show_node_weights(true, rsc, "Post-coloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(constraint, NULL)) { continue; } pe_rsc_trace(rsc, "Merging score of '%s' constraint (%s with %s)", constraint->id, constraint->rsc_lh->id, constraint->rsc_rh->id); rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, pe_weights_rollback); } if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id); /* make sure it doesn't come up again */ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } else if(rsc->next_role > rsc->role && !pcmk_is_set(data_set->flags, pe_flag_have_quorum) && data_set->no_quorum_policy == no_quorum_freeze) { crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze"); } pe__show_node_weights(!show_scores, rsc, __func__, rsc->allowed_nodes); if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) && !pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) { pe__clear_resource_flags(rsc, pe_rsc_managed); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; pe_node_t *assign_to = NULL; pe__set_next_role(rsc, rsc->role, "unmanaged"); assign_to = pe__current_node(rsc); if (assign_to == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_MASTER) { reason = "master"; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { reason = "failed"; } else { reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, (assign_to? assign_to->details->uname : "no node"), reason); - native_assign_node(rsc, NULL, assign_to, TRUE); + native_assign_node(rsc, assign_to, TRUE); } else if (pcmk_is_set(data_set->flags, pe_flag_stop_everything)) { pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id); - native_assign_node(rsc, NULL, NULL, TRUE); + native_assign_node(rsc, NULL, TRUE); } else if (pcmk_is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc, prefer, data_set)) { pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if (rsc->allocated_to == NULL) { if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } else { pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } pe__clear_resource_flags(rsc, pe_rsc_allocating); if (rsc->is_remote_node) { pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); CRM_ASSERT(remote_node != NULL); if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) { crm_trace("Setting Pacemaker Remote node %s to ONLINE", remote_node->details->id); remote_node->details->online = TRUE; /* We shouldn't consider an unseen remote-node unclean if we are going * to try and connect to it. Otherwise we get an unnecessary fence */ if (remote_node->details->unseen == TRUE) { remote_node->details->unclean = FALSE; } } else { crm_trace("Setting Pacemaker Remote node %s to SHUTDOWN (next role %s, %sallocated)", remote_node->details->id, role2text(rsc->next_role), (rsc->allocated_to? "" : "un")); remote_node->details->shutdown = TRUE; } } return rsc->allocated_to; } static gboolean is_op_dup(pe_resource_t *rsc, const char *name, guint interval_ms) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xmlNode *operation = NULL; guint interval2_ms = 0; CRM_ASSERT(rsc); for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { value = crm_element_value(operation, "name"); if (!pcmk__str_eq(value, name, pcmk__str_casei)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval2_ms = crm_parse_interval_spec(value); if (interval_ms != interval2_ms) { continue; } if (id == NULL) { id = ID(operation); } else { pcmk__config_err("Operation %s is duplicate of %s (do not use " "same name and interval combination more " "than once per resource)", ID(operation), id); dup = TRUE; } } } return dup; } static bool op_cannot_recur(const char *name) { return pcmk__strcase_any_of(name, RSC_STOP, RSC_START, RSC_DEMOTE, RSC_PROMOTE, NULL); } static void RecurringOp(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval_spec = NULL; const char *node_uname = node? node->details->uname : "n/a"; guint interval_ms = 0; pe_action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; CRM_ASSERT(rsc); /* Only process for the operations without role="Stopped" */ role = crm_element_value(operation, "role"); if (role && text2role(role) == RSC_ROLE_STOPPED) { return; } interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval_ms)) { crm_trace("Not creating duplicate recurring action %s for %dms %s", ID(operation), interval_ms, name); return; } if (op_cannot_recur(name)) { pcmk__config_err("Ignoring %s because action '%s' cannot be recurring", ID(operation), name); return; } key = pcmk__op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { crm_trace("Not creating recurring action %s for disabled resource %s", ID(operation), rsc->id); free(key); return; } pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s", ID(operation), rsc->id, role2text(rsc->next_role), node_uname); if (start != NULL) { pe_rsc_trace(rsc, "Marking %s %s due to %s", key, pcmk_is_set(start->flags, pe_action_optional)? "optional" : "mandatory", start->uuid); is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional); } else { pe_rsc_trace(rsc, "Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { is_optional = FALSE; pe_rsc_trace(rsc, "Marking %s mandatory: not active", key); } else { GListPtr gIter = NULL; for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; if (pcmk_is_set(op->flags, pe_action_reschedule)) { is_optional = FALSE; break; } } g_list_free(possible_matches); } if ((rsc->next_role == RSC_ROLE_MASTER && role == NULL) || (role != NULL && text2role(role) != rsc->next_role)) { int log_level = LOG_TRACE; const char *result = "Ignoring"; if (is_optional) { char *after_key = NULL; pe_action_t *cancel_op = NULL; // It's running, so cancel it log_level = LOG_INFO; result = "Cancelling"; cancel_op = pe_cancel_op(rsc, name, interval_ms, node, data_set); switch (rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if (rsc->next_role == RSC_ROLE_MASTER) { after_key = promote_key(rsc); } else if (rsc->next_role == RSC_ROLE_STOPPED) { after_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: after_key = demote_key(rsc); break; default: break; } if (after_key) { custom_action_order(rsc, NULL, cancel_op, rsc, after_key, NULL, pe_order_runnable_left, data_set); } } do_crm_log(log_level, "%s action %s (%s vs. %s)", result, key, role ? role : role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); free(key); return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if (is_optional) { pe_rsc_trace(rsc, "%s\t %s (optional)", node_uname, mon->uuid); } if ((start == NULL) || !pcmk_is_set(start->flags, pe_action_runnable)) { pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)", node_uname, mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear, __func__, __LINE__); } else if (node == NULL || node->details->online == FALSE || node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", node_uname, mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear, __func__, __LINE__); } else if (!pcmk_is_set(mon->flags, pe_action_optional)) { pe_rsc_info(rsc, " Start recurring %s (%us) for %s on %s", mon->task, interval_ms / 1000, rsc->id, node_uname); } if (rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); free(running_master); } if ((node == NULL) || pcmk_is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, start_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); custom_action_order(rsc, reload_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); if (rsc->next_role == RSC_ROLE_MASTER) { custom_action_order(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } else if (rsc->role == RSC_ROLE_MASTER) { custom_action_order(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } } } static void Recurring(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set) { if (!pcmk_is_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { RecurringOp(rsc, start, node, operation, data_set); } } } } static void RecurringOp_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval_spec = NULL; const char *node_uname = node? node->details->uname : "n/a"; guint interval_ms = 0; GListPtr possible_matches = NULL; GListPtr gIter = NULL; /* Only process for the operations with role="Stopped" */ role = crm_element_value(operation, "role"); if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) { return; } interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval_ms)) { crm_trace("Not creating duplicate recurring action %s for %dms %s", ID(operation), interval_ms, name); return; } if (op_cannot_recur(name)) { pcmk__config_err("Ignoring %s because action '%s' cannot be recurring", ID(operation), name); return; } key = pcmk__op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { crm_trace("Not creating recurring action %s for disabled resource %s", ID(operation), rsc->id); free(key); return; } // @TODO add support if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { crm_notice("Ignoring %s (recurring monitors for Stopped role are " "not supported for anonymous clones)", ID(operation)); return; } pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on nodes where it should not be running", ID(operation), rsc->id, role2text(rsc->next_role)); /* if the monitor exists on the node where the resource will be running, cancel it */ if (node != NULL) { possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches) { pe_action_t *cancel_op = NULL; g_list_free(possible_matches); cancel_op = pe_cancel_op(rsc, name, interval_ms, node, data_set); if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) { /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */ /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */ custom_action_order(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, data_set); } pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s", key, role, role2text(rsc->next_role), node_uname); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *stop_node = (pe_node_t *) gIter->data; const char *stop_node_uname = stop_node->details->uname; gboolean is_optional = TRUE; gboolean probe_is_optional = TRUE; gboolean stop_is_optional = TRUE; pe_action_t *stopped_mon = NULL; char *rc_inactive = NULL; GListPtr probe_complete_ops = NULL; GListPtr stop_ops = NULL; GListPtr local_gIter = NULL; if (node && pcmk__str_eq(stop_node_uname, node_uname, pcmk__str_casei)) { continue; } pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s", ID(operation), rsc->id, crm_str(stop_node_uname)); /* start a monitor for an already stopped resource */ possible_matches = find_actions_exact(rsc->actions, key, stop_node); if (possible_matches == NULL) { pe_rsc_trace(rsc, "Marking %s mandatory on %s: not active", key, crm_str(stop_node_uname)); is_optional = FALSE; } else { pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key, crm_str(stop_node_uname)); is_optional = TRUE; g_list_free(possible_matches); } stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set); rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); free(rc_inactive); if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { GList *probes = pe__resource_actions(rsc, stop_node, RSC_STATUS, FALSE); GListPtr pIter = NULL; for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; order_actions(probe, stopped_mon, pe_order_runnable_left); crm_trace("%s then %s on %s", probe->uuid, stopped_mon->uuid, stop_node->details->uname); } g_list_free(probes); } if (probe_complete_ops) { g_list_free(probe_complete_ops); } stop_ops = pe__resource_actions(rsc, stop_node, RSC_STOP, TRUE); for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) { pe_action_t *stop = (pe_action_t *) local_gIter->data; if (!pcmk_is_set(stop->flags, pe_action_optional)) { stop_is_optional = FALSE; } if (!pcmk_is_set(stop->flags, pe_action_runnable)) { crm_debug("%s\t %s (cancelled : stop un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear, __func__, __LINE__); } if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, stop_key(rsc), stop, NULL, strdup(key), stopped_mon, pe_order_implies_then | pe_order_runnable_left, data_set); } } if (stop_ops) { g_list_free(stop_ops); } if (is_optional == FALSE && probe_is_optional && stop_is_optional && !pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged", key, crm_str(stop_node_uname)); update_action_flags(stopped_mon, pe_action_optional, __func__, __LINE__); } if (pcmk_is_set(stopped_mon->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid); } if (stop_node->details->online == FALSE || stop_node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear, __func__, __LINE__); } if (pcmk_is_set(stopped_mon->flags, pe_action_runnable) && !pcmk_is_set(stopped_mon->flags, pe_action_optional)) { crm_notice(" Start recurring %s (%us) for %s on %s", stopped_mon->task, interval_ms / 1000, rsc->id, crm_str(stop_node_uname)); } } free(key); } static void Recurring_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set) { if (!pcmk_is_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { RecurringOp_Stopped(rsc, start, node, operation, data_set); } } } } static void handle_migration_actions(pe_resource_t * rsc, pe_node_t *current, pe_node_t *chosen, pe_working_set_t * data_set) { pe_action_t *migrate_to = NULL; pe_action_t *migrate_from = NULL; pe_action_t *start = NULL; pe_action_t *stop = NULL; gboolean partial = rsc->partial_migration_target ? TRUE : FALSE; pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s", rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE"); start = start_action(rsc, chosen, TRUE); stop = stop_action(rsc, current, TRUE); if (partial == FALSE) { migrate_to = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, current, TRUE, TRUE, data_set); } migrate_from = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, chosen, TRUE, TRUE, data_set); if ((migrate_to && migrate_from) || (migrate_from && partial)) { pe__set_action_flags(start, pe_action_migrate_runnable); pe__set_action_flags(stop, pe_action_migrate_runnable); // This is easier than trying to delete it from the graph update_action_flags(start, pe_action_pseudo, __func__, __LINE__); /* order probes before migrations */ if (partial) { pe__set_action_flags(migrate_from, pe_action_migrate_runnable); migrate_from->needs = start->needs; custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional, data_set); } else { pe__set_action_flags(migrate_from, pe_action_migrate_runnable); pe__set_action_flags(migrate_to, pe_action_migrate_runnable); migrate_to->needs = start->needs; custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_optional, data_set); custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional|pe_order_implies_first_migratable, data_set); } custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional|pe_order_implies_first_migratable, data_set); custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional|pe_order_implies_first_migratable|pe_order_pseudo_left, data_set); } if (migrate_to) { add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); /* Pacemaker Remote connections don't require pending to be recorded in * the CIB. We can reduce CIB writes by not setting PENDING for them. */ if (rsc->is_remote_node == FALSE) { /* migrate_to takes place on the source node, but can * have an effect on the target node depending on how * the agent is written. Because of this, we have to maintain * a record that the migrate_to occurred, in case the source node * loses membership while the migrate_to action is still in-flight. */ add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true"); } } if (migrate_from) { add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); } } void native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *start = NULL; pe_node_t *chosen = NULL; pe_node_t *current = NULL; gboolean need_stop = FALSE; bool need_promote = FALSE; gboolean is_moving = FALSE; gboolean allow_migrate = pcmk_is_set(rsc->flags, pe_rsc_allow_migrate)? TRUE : FALSE; GListPtr gIter = NULL; unsigned int num_all_active = 0; unsigned int num_clean_active = 0; bool multiply_active = FALSE; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; CRM_ASSERT(rsc); chosen = rsc->allocated_to; next_role = rsc->next_role; if (next_role == RSC_ROLE_UNKNOWN) { pe__set_next_role(rsc, (chosen == NULL)? RSC_ROLE_STOPPED : RSC_ROLE_STARTED, "allocation"); } pe_rsc_trace(rsc, "Creating all actions for %s transition from %s to %s (%s) on %s", rsc->id, role2text(rsc->role), role2text(rsc->next_role), ((next_role == RSC_ROLE_UNKNOWN)? "implicit" : "explicit"), ((chosen == NULL)? "no node" : chosen->details->uname)); current = pe__find_active_on(rsc, &num_all_active, &num_clean_active); for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { pe_node_t *dangling_source = (pe_node_t *) gIter->data; pe_action_t *stop = NULL; pe_rsc_trace(rsc, "Creating stop action %sfor %s on %s due to dangling migration", pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)? "and cleanup " : "", rsc->id, dangling_source->details->uname); stop = stop_action(rsc, dangling_source, FALSE); pe__set_action_flags(stop, pe_action_dangle); if (pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, dangling_source, FALSE, data_set); } } if ((num_all_active == 2) && (num_clean_active == 2) && chosen && rsc->partial_migration_source && rsc->partial_migration_target && (current->details == rsc->partial_migration_source->details) && (chosen->details == rsc->partial_migration_target->details)) { /* The chosen node is still the migration target from a partial * migration. Attempt to continue the migration instead of recovering * by stopping the resource everywhere and starting it on a single node. */ pe_rsc_trace(rsc, "Will attempt to continue with partial migration " "to target %s from %s", rsc->partial_migration_target->details->id, rsc->partial_migration_source->details->id); } else if (!pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) { /* If a resource has "requires" set to nothing or quorum, don't consider * it active on unclean nodes (similar to how all resources behave when * stonith-enabled is false). We can start such resources elsewhere * before fencing completes, and if we considered the resource active on * the failed node, we would attempt recovery for being active on * multiple nodes. */ multiply_active = (num_clean_active > 1); } else { multiply_active = (num_all_active > 1); } if (multiply_active) { if (rsc->partial_migration_target && rsc->partial_migration_source) { // Migration was in progress, but we've chosen a different target crm_notice("Resource %s can no longer migrate from %s to %s " "(will stop on both nodes)", rsc->id, rsc->partial_migration_source->details->uname, rsc->partial_migration_target->details->uname); } else { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); // Resource was incorrectly multiply active pe_proc_err("%s resource %s is active on %u nodes (%s)", crm_str(class), rsc->id, num_all_active, recovery2text(rsc->recovery_type)); crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information"); } if (rsc->recovery_type == recovery_stop_start) { need_stop = TRUE; } /* If by chance a partial migration is in process, but the migration * target is not chosen still, clear all partial migration data. */ rsc->partial_migration_source = rsc->partial_migration_target = NULL; allow_migrate = FALSE; } if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { pe_rsc_trace(rsc, "Creating start action for %s to represent already pending start", rsc->id); start = start_action(rsc, chosen, TRUE); pe__set_action_flags(start, pe_action_print_always); } if (current && chosen && current->details != chosen->details) { pe_rsc_trace(rsc, "Moving %s from %s to %s", rsc->id, crm_str(current->details->uname), crm_str(chosen->details->uname)); is_moving = TRUE; need_stop = TRUE; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { if (pcmk_is_set(rsc->flags, pe_rsc_stop)) { need_stop = TRUE; pe_rsc_trace(rsc, "Recovering %s", rsc->id); } else { pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id); if (rsc->next_role == RSC_ROLE_MASTER) { need_promote = TRUE; } } } else if (pcmk_is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id); need_stop = TRUE; } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { pe_rsc_trace(rsc, "Creating start action for promoted resource %s", rsc->id); start = start_action(rsc, chosen, TRUE); if (!pcmk_is_set(start->flags, pe_action_optional)) { // Recovery of a promoted resource pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id); need_stop = TRUE; } } /* Create any additional actions required when bringing resource down and * back up to same level. */ role = rsc->role; while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s", (need_stop? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) { break; } role = next_role; } while ((rsc->role <= rsc->next_role) && (role != rsc->role) && !pcmk_is_set(rsc->flags, pe_rsc_block)) { bool required = need_stop; next_role = rsc_state_matrix[role][rsc->role]; if ((next_role == RSC_ROLE_MASTER) && need_promote) { required = true; } pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s", (required? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); if (rsc_action_matrix[role][next_role](rsc, chosen, !required, data_set) == FALSE) { break; } role = next_role; } role = rsc->role; /* Required steps from this role to the next */ while (role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; pe_rsc_trace(rsc, "Creating action to take %s from %s to %s (ending at %s)", rsc->id, role2text(role), role2text(next_role), role2text(rsc->next_role)); if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } if (pcmk_is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Not creating recurring monitors for blocked resource %s", rsc->id); } else if ((rsc->next_role != RSC_ROLE_STOPPED) || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Creating recurring monitors for %s resource %s", ((rsc->next_role == RSC_ROLE_STOPPED)? "unmanaged" : "active"), rsc->id); start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); Recurring_Stopped(rsc, start, chosen, data_set); } else { pe_rsc_trace(rsc, "Creating recurring monitors for inactive resource %s", rsc->id); Recurring_Stopped(rsc, NULL, NULL, data_set); } /* if we are stuck in a partial migration, where the target * of the partial migration no longer matches the chosen target. * A full stop/start is required */ if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) { pe_rsc_trace(rsc, "Not allowing partial migration of %s to continue", rsc->id); allow_migrate = FALSE; } else if (!is_moving || !pcmk_is_set(rsc->flags, pe_rsc_managed) || pcmk_any_flags_set(rsc->flags, pe_rsc_failed|pe_rsc_start_pending) || (current && current->details->unclean) || rsc->next_role < RSC_ROLE_STARTED) { allow_migrate = FALSE; } if (allow_migrate) { handle_migration_actions(rsc, current, chosen, data_set); } } static void rsc_avoids_remote_nodes(pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc) { node->weight = -INFINITY; } } } /*! * \internal * \brief Return allowed nodes as (possibly sorted) list * * Convert a resource's hash table of allowed nodes to a list. If printing to * stdout, sort the list, to keep action ID numbers consistent for regression * test output (while avoiding the performance hit on a live cluster). * * \param[in] rsc Resource to check for allowed nodes * \param[in] data_set Cluster working set * * \return List of resource's allowed nodes * \note Callers should take care not to rely on the list being sorted. */ static GList * allowed_nodes_as_list(pe_resource_t *rsc, pe_working_set_t *data_set) { GList *allowed_nodes = NULL; if (rsc->allowed_nodes) { allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes); } if (pcmk_is_set(data_set->flags, pe_flag_stdout)) { allowed_nodes = g_list_sort(allowed_nodes, sort_node_uname); } return allowed_nodes; } void native_internal_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { /* This function is on the critical path and worth optimizing as much as possible */ pe_resource_t *top = NULL; GList *allowed_nodes = NULL; bool check_unfencing = FALSE; bool check_utilization = FALSE; if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping native constraints for unmanaged resource: %s", rsc->id); return; } top = uber_parent(rsc); // Whether resource requires unfencing check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device) && pcmk_is_set(data_set->flags, pe_flag_enable_unfencing) && pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing); // Whether a non-default placement strategy is used check_utilization = (g_hash_table_size(rsc->utilization) > 0) && !pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei); // Order stops before starts (i.e. restart) custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional|pe_order_implies_then|pe_order_restart, data_set); // Promotable ordering: demote before stop, start before promote if (pcmk_is_set(top->flags, pe_rsc_promotable) || (rsc->role > RSC_ROLE_SLAVE)) { custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_implies_first_master, data_set); custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, data_set); } // Don't clear resource history if probing on same node custom_action_order(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, pe_order_same_node|pe_order_then_cancels_first, data_set); // Certain checks need allowed nodes if (check_unfencing || check_utilization || rsc->container) { allowed_nodes = allowed_nodes_as_list(rsc, data_set); } if (check_unfencing) { /* Check if the node needs to be unfenced first */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, data_set); crm_debug("Ordering any stops of %s before %s, and any starts after", rsc->id, unfence->uuid); /* * It would be more efficient to order clone resources once, * rather than order each instance, but ordering the instance * allows us to avoid unnecessary dependencies that might conflict * with user constraints. * * @TODO: This constraint can still produce a transition loop if the * resource has a stop scheduled on the node being unfenced, and * there is a user ordering constraint to start some other resource * (which will be ordered after the unfence) before stopping this * resource. An example is "start some slow-starting cloned service * before stopping an associated virtual IP that may be moving to * it": * stop this -> unfencing -> start that -> stop this */ custom_action_order(rsc, stop_key(rsc), NULL, NULL, strdup(unfence->uuid), unfence, pe_order_optional|pe_order_same_node, data_set); custom_action_order(NULL, strdup(unfence->uuid), unfence, rsc, start_key(rsc), NULL, pe_order_implies_then_on_node|pe_order_same_node, data_set); } } if (check_utilization) { GListPtr gIter = NULL; pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s", rsc->id, data_set->placement_strategy); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; char *load_stopped_task = crm_strdup_printf(LOAD_STOPPED "_%s", current->details->uname); pe_action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = pe__copy_node(current); update_action_flags(load_stopped, pe_action_optional | pe_action_clear, __func__, __LINE__); } custom_action_order(rsc, stop_key(rsc), NULL, NULL, load_stopped_task, load_stopped, pe_order_load, data_set); } for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *next = item->data; char *load_stopped_task = crm_strdup_printf(LOAD_STOPPED "_%s", next->details->uname); pe_action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = pe__copy_node(next); update_action_flags(load_stopped, pe_action_optional | pe_action_clear, __func__, __LINE__); } custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, start_key(rsc), NULL, pe_order_load, data_set); custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_load, data_set); free(load_stopped_task); } } if (rsc->container) { pe_resource_t *remote_rsc = NULL; if (rsc->is_remote_node) { // rsc is the implicit remote connection for a guest or bundle node /* Do not allow a guest resource to live on a Pacemaker Remote node, * to avoid nesting remotes. However, allow bundles to run on remote * nodes. */ if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { rsc_avoids_remote_nodes(rsc->container); } /* If someone cleans up a guest or bundle node's container, we will * likely schedule a (re-)probe of the container and recovery of the * connection. Order the connection stop after the container probe, * so that if we detect the container running, we will trigger a new * transition and avoid the unnecessary recovery. */ new_rsc_order(rsc->container, RSC_STATUS, rsc, RSC_STOP, pe_order_optional, data_set); /* A user can specify that a resource must start on a Pacemaker Remote * node by explicitly configuring it with the container=NODENAME * meta-attribute. This is of questionable merit, since location * constraints can accomplish the same thing. But we support it, so here * we check whether a resource (that is not itself a remote connection) * has container set to a remote node or guest node resource. */ } else if (rsc->container->is_remote_node) { remote_rsc = rsc->container; } else { remote_rsc = pe__resource_contains_guest_node(data_set, rsc->container); } if (remote_rsc) { /* Force the resource on the Pacemaker Remote node instead of * colocating the resource with the container resource. */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; if (node->details->remote_rsc != remote_rsc) { node->weight = -INFINITY; } } } else { /* This resource is either a filler for a container that does NOT * represent a Pacemaker Remote node, or a Pacemaker Remote * connection resource for a guest node or bundle. */ int score; crm_trace("Order and colocate %s relative to its container %s", rsc->id, rsc->container->id); custom_action_order(rsc->container, pcmk__op_key(rsc->container->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_implies_then|pe_order_runnable_left, data_set); custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc->container, pcmk__op_key(rsc->container->id, RSC_STOP, 0), NULL, pe_order_implies_first, data_set); if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { score = 10000; /* Highly preferred but not essential */ } else { score = INFINITY; /* Force them to run on the same host */ } pcmk__new_colocation("resource-with-container", NULL, score, rsc, rsc->container, NULL, NULL, true, data_set); } } if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) { /* don't allow remote nodes to run stonith devices * or remote connection resources.*/ rsc_avoids_remote_nodes(rsc); } g_list_free(allowed_nodes); } void native_rsc_colocation_lh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if (constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } pe_rsc_trace(rsc_lh, "Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint, data_set); } enum filter_colocation_res filter_colocation_constraint(pe_resource_t * rsc_lh, pe_resource_t * rsc_rh, pcmk__colocation_t *constraint, gboolean preview) { /* rh side must be allocated before we can process constraint */ if (!preview && pcmk_is_set(rsc_rh->flags, pe_rsc_provisional)) { return influence_nothing; } if ((constraint->role_lh >= RSC_ROLE_SLAVE) && rsc_lh->parent && pcmk_is_set(rsc_lh->parent->flags, pe_rsc_promotable) && !pcmk_is_set(rsc_lh->flags, pe_rsc_provisional)) { /* LH and RH resources have already been allocated, place the correct * priority on LH rsc for the given promotable clone resource role */ return influence_rsc_priority; } if (!preview && !pcmk_is_set(rsc_lh->flags, pe_rsc_provisional)) { // Log an error if we violated a mandatory colocation constraint const pe_node_t *rh_node = rsc_rh->allocated_to; if (rsc_lh->allocated_to == NULL) { // Dependent resource isn't allocated, so constraint doesn't matter return influence_nothing; } if (constraint->score >= INFINITY) { // Dependent resource must colocate with rh_node if ((rh_node == NULL) || (rh_node->details != rsc_lh->allocated_to->details)) { crm_err("%s must be colocated with %s but is not (%s vs. %s)", rsc_lh->id, rsc_rh->id, rsc_lh->allocated_to->details->uname, (rh_node? rh_node->details->uname : "unallocated")); } } else if (constraint->score <= -INFINITY) { // Dependent resource must anti-colocate with rh_node if ((rh_node != NULL) && (rsc_lh->allocated_to->details == rh_node->details)) { crm_err("%s and %s must be anti-colocated but are allocated " "to the same node (%s)", rsc_lh->id, rsc_rh->id, rh_node->details->uname); } } return influence_nothing; } if (constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { crm_trace("LH: Skipping constraint: \"%s\" state filter nextrole is %s", role2text(constraint->role_lh), role2text(rsc_lh->next_role)); return influence_nothing; } if (constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { crm_trace("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return influence_nothing; } if (constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { crm_trace("LH: Skipping negative constraint: \"%s\" state filter", role2text(constraint->role_lh)); return influence_nothing; } if (constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { crm_trace("RH: Skipping negative constraint: \"%s\" state filter", role2text(constraint->role_rh)); return influence_nothing; } return influence_rsc_location; } static void influence_priority(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, pcmk__colocation_t *constraint) { const char *rh_value = NULL; const char *lh_value = NULL; const char *attribute = CRM_ATTR_ID; int score_multiplier = 1; if (!rsc_rh->allocated_to || !rsc_lh->allocated_to) { return; } if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } lh_value = pe_node_attribute_raw(rsc_lh->allocated_to, attribute); rh_value = pe_node_attribute_raw(rsc_rh->allocated_to, attribute); if (!pcmk__str_eq(lh_value, rh_value, pcmk__str_casei)) { if(constraint->score == INFINITY && constraint->role_lh == RSC_ROLE_MASTER) { rsc_lh->priority = -INFINITY; } return; } if (constraint->role_rh && (constraint->role_rh != rsc_rh->next_role)) { return; } if (constraint->role_lh == RSC_ROLE_SLAVE) { score_multiplier = -1; } rsc_lh->priority = pe__add_scores(score_multiplier * constraint->score, rsc_lh->priority); } static void colocation_match(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, pcmk__colocation_t *constraint) { const char *attribute = CRM_ATTR_ID; const char *value = NULL; GHashTable *work = NULL; GHashTableIter iter; pe_node_t *node = NULL; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (rsc_rh->allocated_to) { value = pe_node_attribute_raw(rsc_rh->allocated_to, attribute); } else if (constraint->score < 0) { // Nothing to do (anti-colocation with something that is not running) return; } work = pcmk__copy_node_table(rsc_lh->allowed_nodes); g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (rsc_rh->allocated_to == NULL) { pe_rsc_trace(rsc_lh, "%s: %s@%s -= %d (%s inactive)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, rsc_rh->id); node->weight = pe__add_scores(-constraint->score, node->weight); } else if (pcmk__str_eq(pe_node_attribute_raw(node, attribute), value, pcmk__str_casei)) { if (constraint->score < CRM_SCORE_INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s@%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = pe__add_scores(constraint->score, node->weight); } } else if (constraint->score >= CRM_SCORE_INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s@%s -= %d (%s mismatch)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, attribute); node->weight = pe__add_scores(-constraint->score, node->weight); } } if (can_run_any(work) || constraint->score <= -INFINITY || constraint->score >= INFINITY) { g_hash_table_destroy(rsc_lh->allowed_nodes); rsc_lh->allowed_nodes = work; work = NULL; } else { pe_rsc_info(rsc_lh, "%s: Rolling back scores from %s (no available nodes)", rsc_lh->id, rsc_rh->id); } if (work) { g_hash_table_destroy(work); } } void native_rsc_colocation_rh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { enum filter_colocation_res filter_results; CRM_ASSERT(rsc_lh); CRM_ASSERT(rsc_rh); filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint, FALSE); pe_rsc_trace(rsc_lh, "%s %s with %s (%s, score=%d, filter=%d)", ((constraint->score > 0)? "Colocating" : "Anti-colocating"), rsc_lh->id, rsc_rh->id, constraint->id, constraint->score, filter_results); switch (filter_results) { case influence_rsc_priority: influence_priority(rsc_lh, rsc_rh, constraint); break; case influence_rsc_location: colocation_match(rsc_lh, rsc_rh, constraint); break; case influence_nothing: default: return; } } static gboolean filter_rsc_ticket(pe_resource_t * rsc_lh, rsc_ticket_t * rsc_ticket) { if (rsc_ticket->role_lh != RSC_ROLE_UNKNOWN && rsc_ticket->role_lh != rsc_lh->role) { pe_rsc_trace(rsc_lh, "LH: Skipping constraint: \"%s\" state filter", role2text(rsc_ticket->role_lh)); return FALSE; } return TRUE; } void rsc_ticket_constraint(pe_resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set) { if (rsc_ticket == NULL) { pe_err("rsc_ticket was NULL"); return; } if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", rsc_ticket->id); return; } if (rsc_ticket->ticket->granted && rsc_ticket->ticket->standby == FALSE) { return; } if (rsc_lh->children) { GListPtr gIter = rsc_lh->children; pe_rsc_trace(rsc_lh, "Processing ticket dependencies from %s", rsc_lh->id); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; rsc_ticket_constraint(child_rsc, rsc_ticket, data_set); } return; } pe_rsc_trace(rsc_lh, "%s: Processing ticket dependency on %s (%s, %s)", rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id, role2text(rsc_ticket->role_lh)); if ((rsc_ticket->ticket->granted == FALSE) && (rsc_lh->running_on != NULL)) { GListPtr gIter = NULL; switch (rsc_ticket->loss_policy) { case loss_ticket_stop: resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); break; case loss_ticket_demote: // Promotion score will be set to -INFINITY in promotion_order() if (rsc_ticket->role_lh != RSC_ROLE_MASTER) { resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); } break; case loss_ticket_fence: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pe_fence_node(data_set, node, "deadman ticket was lost", FALSE); } break; case loss_ticket_freeze: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } if (rsc_lh->running_on != NULL) { pe__clear_resource_flags(rsc_lh, pe_rsc_managed); pe__set_resource_flags(rsc_lh, pe_rsc_block); } break; } } else if (rsc_ticket->ticket->granted == FALSE) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__no_ticket__", data_set); } } else if (rsc_ticket->ticket->standby) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__ticket_standby__", data_set); } } } enum pe_action_flags native_action_flags(pe_action_t * action, pe_node_t * node) { return action->flags; } static inline bool is_primitive_action(pe_action_t *action) { return action && action->rsc && (action->rsc->variant == pe_native); } /*! * \internal * \brief Set action bits appropriately when pe_restart_order is used * * \param[in] first 'First' action in an ordering with pe_restart_order * \param[in] then 'Then' action in an ordering with pe_restart_order * \param[in] filter What ordering flags to care about * * \note pe_restart_order is set for "stop resource before starting it" and * "stop later group member before stopping earlier group member" */ static void handle_restart_ordering(pe_action_t *first, pe_action_t *then, enum pe_action_flags filter) { const char *reason = NULL; CRM_ASSERT(is_primitive_action(first)); CRM_ASSERT(is_primitive_action(then)); // We need to update the action in two cases: // ... if 'then' is required if (pcmk_is_set(filter, pe_action_optional) && !pcmk_is_set(then->flags, pe_action_optional)) { reason = "restart"; } /* ... if 'then' is unrunnable start of managed resource (if a resource * should restart but can't start, we still want to stop) */ if (pcmk_is_set(filter, pe_action_runnable) && !pcmk_is_set(then->flags, pe_action_runnable) && pcmk_is_set(then->rsc->flags, pe_rsc_managed) && pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)) { reason = "stop"; } if (reason == NULL) { return; } pe_rsc_trace(first->rsc, "Handling %s -> %s for %s", first->uuid, then->uuid, reason); // Make 'first' required if it is runnable if (pcmk_is_set(first->flags, pe_action_runnable)) { pe_action_implies(first, then, pe_action_optional); } // Make 'first' required if 'then' is required if (!pcmk_is_set(then->flags, pe_action_optional)) { pe_action_implies(first, then, pe_action_optional); } // Make 'first' unmigratable if 'then' is unmigratable if (!pcmk_is_set(then->flags, pe_action_migrate_runnable)) { pe_action_implies(first, then, pe_action_migrate_runnable); } // Make 'then' unrunnable if 'first' is required but unrunnable if (!pcmk_is_set(first->flags, pe_action_optional) && !pcmk_is_set(first->flags, pe_action_runnable)) { pe_action_implies(then, first, pe_action_runnable); } } enum pe_graph_flags native_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { /* flags == get_action_flags(first, then_node) called from update_action() */ enum pe_graph_flags changed = pe_graph_none; enum pe_action_flags then_flags = then->flags; enum pe_action_flags first_flags = first->flags; crm_trace( "Testing %s on %s (0x%.6x) with %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, then->uuid, then->flags); if (type & pe_order_asymmetrical) { pe_resource_t *then_rsc = then->rsc; enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0; if (!then_rsc) { /* ignore */ } else if ((then_rsc_role == RSC_ROLE_STOPPED) && pcmk__str_eq(then->task, RSC_STOP, pcmk__str_casei)) { /* ignore... if 'then' is supposed to be stopped after 'first', but * then is already stopped, there is nothing to be done when non-symmetrical. */ } else if ((then_rsc_role >= RSC_ROLE_STARTED) && pcmk__str_eq(then->task, RSC_START, pcmk__str_casei) && pcmk_is_set(then->flags, pe_action_optional) && then->node && pcmk__list_of_1(then_rsc->running_on) && then->node->details == ((pe_node_t *) then_rsc->running_on->data)->details) { /* Ignore. If 'then' is supposed to be started after 'first', but * 'then' is already started, there is nothing to be done when * asymmetrical -- unless the start is mandatory, which indicates * the resource is restarting, and the ordering is still needed. */ } else if (!(first->flags & pe_action_runnable)) { /* prevent 'then' action from happening if 'first' is not runnable and * 'then' has not yet occurred. */ pe_action_implies(then, first, pe_action_optional); pe_action_implies(then, first, pe_action_runnable); pe_rsc_trace(then->rsc, "Unset optional and runnable on %s", then->uuid); } else { /* ignore... then is allowed to start/stop if it wants to. */ } } if (type & pe_order_implies_first) { if (pcmk_is_set(filter, pe_action_optional) && !pcmk_is_set(flags /* Should be then_flags? */, pe_action_optional)) { // Needs pcmk_is_set(first_flags, pe_action_optional) too? pe_rsc_trace(first->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_optional); } if (pcmk_is_set(flags, pe_action_migrate_runnable) && !pcmk_is_set(then->flags, pe_action_migrate_runnable) && !pcmk_is_set(then->flags, pe_action_optional)) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_migrate_runnable); } } if (type & pe_order_implies_first_master) { if ((filter & pe_action_optional) && ((then->flags & pe_action_optional) == FALSE) && then->rsc && (then->rsc->role == RSC_ROLE_MASTER)) { pe_action_implies(first, then, pe_action_optional); if (pcmk_is_set(first->flags, pe_action_migrate_runnable) && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_migrate_runnable); } pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); } } if ((type & pe_order_implies_first_migratable) && pcmk_is_set(filter, pe_action_optional)) { if (((then->flags & pe_action_migrate_runnable) == FALSE) || ((then->flags & pe_action_runnable) == FALSE)) { pe_rsc_trace(then->rsc, "Unset runnable on %s because %s is neither runnable or migratable", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_runnable); } if ((then->flags & pe_action_optional) == 0) { pe_rsc_trace(then->rsc, "Unset optional on %s because %s is not optional", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_optional); } } if ((type & pe_order_pseudo_left) && pcmk_is_set(filter, pe_action_optional)) { if ((first->flags & pe_action_runnable) == FALSE) { pe_action_implies(then, first, pe_action_migrate_runnable); pe__clear_action_flags(then, pe_action_pseudo); pe_rsc_trace(then->rsc, "Unset pseudo on %s because %s is not runnable", then->uuid, first->uuid); } } if (pcmk_is_set(type, pe_order_runnable_left) && pcmk_is_set(filter, pe_action_runnable) && pcmk_is_set(then->flags, pe_action_runnable) && !pcmk_is_set(flags, pe_action_runnable)) { pe_rsc_trace(then->rsc, "Unset runnable on %s because of %s", then->uuid, first->uuid); pe_action_implies(then, first, pe_action_runnable); pe_action_implies(then, first, pe_action_migrate_runnable); } if (pcmk_is_set(type, pe_order_implies_then) && pcmk_is_set(filter, pe_action_optional) && pcmk_is_set(then->flags, pe_action_optional) && !pcmk_is_set(flags, pe_action_optional)) { /* in this case, treat migrate_runnable as if first is optional */ if (!pcmk_is_set(first->flags, pe_action_migrate_runnable)) { pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", then->uuid, first->uuid); pe_action_implies(then, first, pe_action_optional); } } if (pcmk_is_set(type, pe_order_restart)) { handle_restart_ordering(first, then, filter); } if (then_flags != then->flags) { pe__set_graph_flags(changed, first, pe_graph_updated_then); pe_rsc_trace(then->rsc, "Then: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", then->uuid, then->node ? then->node->details->uname : "[none]", then->flags, then_flags, first->uuid, first->flags); if(then->rsc && then->rsc->parent) { /* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */ update_action(then, data_set); } } if (first_flags != first->flags) { pe__set_graph_flags(changed, first, pe_graph_updated_first); pe_rsc_trace(first->rsc, "First: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, first_flags, then->uuid, then->flags); } return changed; } void native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GListPtr gIter = NULL; bool need_role = false; CRM_CHECK((constraint != NULL) && (rsc != NULL), return); // If a role was specified, ensure constraint is applicable need_role = (constraint->role_filter > RSC_ROLE_UNKNOWN); if (need_role && (constraint->role_filter != rsc->next_role)) { pe_rsc_trace(rsc, "Not applying %s to %s because role will be %s not %s", constraint->id, rsc->id, role2text(rsc->next_role), role2text(constraint->role_filter)); return; } if (constraint->node_list_rh == NULL) { pe_rsc_trace(rsc, "Not applying %s to %s because no nodes match", constraint->id, rsc->id); return; } pe_rsc_trace(rsc, "Applying %s%s%s to %s", constraint->id, (need_role? " for role " : ""), (need_role? role2text(constraint->role_filter) : ""), rsc->id); for (gIter = constraint->node_list_rh; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pe_node_t *other_node = NULL; other_node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (other_node != NULL) { pe_rsc_trace(rsc, "* + %d on %s", node->weight, node->details->uname); other_node->weight = pe__add_scores(other_node->weight, node->weight); } else { pe_rsc_trace(rsc, "* = %d on %s", node->weight, node->details->uname); other_node = pe__copy_node(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) other_node->details->id, other_node); } if (other_node->rsc_discover_mode < constraint->discover_mode) { if (constraint->discover_mode == pe_discover_exclusive) { rsc->exclusive_discover = TRUE; } /* exclusive > never > always... always is default */ other_node->rsc_discover_mode = constraint->discover_mode; } } } void native_expand(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; crm_trace("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } #define log_change(a, fmt, args...) do { \ if(a && a->reason && terminal) { \ printf(" * "fmt" \tdue to %s\n", ##args, a->reason); \ } else if(a && a->reason) { \ crm_notice(fmt" \tdue to %s", ##args, a->reason); \ } else if(terminal) { \ printf(" * "fmt"\n", ##args); \ } else { \ crm_notice(fmt, ##args); \ } \ } while(0) #define STOP_SANITY_ASSERT(lineno) do { \ if(current && current->details->unclean) { \ /* It will be a pseudo op */ \ } else if(stop == NULL) { \ crm_err("%s:%d: No stop action exists for %s", \ __func__, lineno, rsc->id); \ CRM_ASSERT(stop != NULL); \ } else if (pcmk_is_set(stop->flags, pe_action_optional)) { \ crm_err("%s:%d: Action %s is still optional", \ __func__, lineno, stop->uuid); \ CRM_ASSERT(!pcmk_is_set(stop->flags, pe_action_optional)); \ } \ } while(0) static void LogAction(const char *change, pe_resource_t *rsc, pe_node_t *origin, pe_node_t *destination, pe_action_t *action, pe_action_t *source, gboolean terminal) { int len = 0; char *reason = NULL; char *details = NULL; bool same_host = FALSE; bool same_role = FALSE; bool need_role = FALSE; static int rsc_width = 5; static int detail_width = 5; CRM_ASSERT(action); CRM_ASSERT(destination != NULL || origin != NULL); if(source == NULL) { source = action; } len = strlen(rsc->id); if(len > rsc_width) { rsc_width = len + 2; } if(rsc->role > RSC_ROLE_STARTED || rsc->next_role > RSC_ROLE_SLAVE) { need_role = TRUE; } if(origin != NULL && destination != NULL && origin->details == destination->details) { same_host = TRUE; } if(rsc->role == rsc->next_role) { same_role = TRUE; } if (need_role && (origin == NULL)) { /* Starting and promoting a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), destination->details->uname); } else if (origin == NULL) { /* Starting a resource */ details = crm_strdup_printf("%s", destination->details->uname); } else if (need_role && (destination == NULL)) { /* Stopping a promotable clone instance */ details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname); } else if (destination == NULL) { /* Stopping a resource */ details = crm_strdup_printf("%s", origin->details->uname); } else if (need_role && same_role && same_host) { /* Recovering, restarting or re-promoting a promotable clone instance */ details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname); } else if (same_role && same_host) { /* Recovering or Restarting a normal resource */ details = crm_strdup_printf("%s", origin->details->uname); } else if (need_role && same_role) { /* Moving a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", origin->details->uname, destination->details->uname, role2text(rsc->role)); } else if (same_role) { /* Moving a normal resource */ details = crm_strdup_printf("%s -> %s", origin->details->uname, destination->details->uname); } else if (same_host) { /* Promoting or demoting a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), origin->details->uname); } else { /* Moving and promoting/demoting */ details = crm_strdup_printf("%s %s -> %s %s", role2text(rsc->role), origin->details->uname, role2text(rsc->next_role), destination->details->uname); } len = strlen(details); if(len > detail_width) { detail_width = len; } if(source->reason && !pcmk_is_set(action->flags, pe_action_runnable)) { reason = crm_strdup_printf(" due to %s (blocked)", source->reason); } else if(source->reason) { reason = crm_strdup_printf(" due to %s", source->reason); } else if (!pcmk_is_set(action->flags, pe_action_runnable)) { reason = strdup(" blocked"); } else { reason = strdup(""); } if(terminal) { printf(" * %-8s %-*s ( %*s ) %s\n", change, rsc_width, rsc->id, detail_width, details, reason); } else { crm_notice(" * %-8s %-*s ( %*s ) %s", change, rsc_width, rsc->id, detail_width, details, reason); } free(details); free(reason); } void LogActions(pe_resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) { pe_node_t *next = NULL; pe_node_t *current = NULL; pe_node_t *start_node = NULL; pe_action_t *stop = NULL; pe_action_t *start = NULL; pe_action_t *demote = NULL; pe_action_t *promote = NULL; char *key = NULL; gboolean moving = FALSE; GListPtr possible_matches = NULL; if(rsc->variant == pe_container) { pcmk__bundle_log_actions(rsc, data_set, terminal); return; } if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; LogActions(child_rsc, data_set, terminal); } return; } next = rsc->allocated_to; if (rsc->running_on) { current = pe__current_node(rsc); if (rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered * We fiddle with the current role in native_create_actions() */ rsc->role = RSC_ROLE_STARTED; } } if ((current == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) { /* Don't log stopped orphans */ return; } if (!pcmk_is_set(rsc->flags, pe_rsc_managed) || (current == NULL && next == NULL)) { pe_rsc_info(rsc, "Leave %s\t(%s%s)", rsc->id, role2text(rsc->role), !pcmk_is_set(rsc->flags, pe_rsc_managed)? " unmanaged" : ""); return; } if (current != NULL && next != NULL && !pcmk__str_eq(current->details->id, next->details->id, pcmk__str_casei)) { moving = TRUE; } possible_matches = pe__resource_actions(rsc, next, RSC_START, FALSE); if (possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } if ((start == NULL) || !pcmk_is_set(start->flags, pe_action_runnable)) { start_node = NULL; } else { start_node = current; } possible_matches = pe__resource_actions(rsc, start_node, RSC_STOP, FALSE); if (possible_matches) { stop = possible_matches->data; g_list_free(possible_matches); } possible_matches = pe__resource_actions(rsc, next, RSC_PROMOTE, FALSE); if (possible_matches) { promote = possible_matches->data; g_list_free(possible_matches); } possible_matches = pe__resource_actions(rsc, next, RSC_DEMOTE, FALSE); if (possible_matches) { demote = possible_matches->data; g_list_free(possible_matches); } if (rsc->role == rsc->next_role) { pe_action_t *migrate_op = NULL; possible_matches = pe__resource_actions(rsc, next, RSC_MIGRATED, FALSE); if (possible_matches) { migrate_op = possible_matches->data; } CRM_CHECK(next != NULL,); if (next == NULL) { } else if ((migrate_op != NULL) && (current != NULL) && pcmk_is_set(migrate_op->flags, pe_action_runnable)) { LogAction("Migrate", rsc, current, next, start, NULL, terminal); } else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) { LogAction("Reload", rsc, current, next, start, NULL, terminal); } else if (start == NULL || pcmk_is_set(start->flags, pe_action_optional)) { if ((demote != NULL) && (promote != NULL) && !pcmk_is_set(demote->flags, pe_action_optional) && !pcmk_is_set(promote->flags, pe_action_optional)) { LogAction("Re-promote", rsc, current, next, promote, demote, terminal); } else { pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } } else if (!pcmk_is_set(start->flags, pe_action_runnable)) { LogAction("Stop", rsc, current, NULL, stop, (stop && stop->reason)? stop : start, terminal); STOP_SANITY_ASSERT(__LINE__); } else if (moving && current) { LogAction(pcmk_is_set(rsc->flags, pe_rsc_failed)? "Recover" : "Move", rsc, current, next, stop, NULL, terminal); } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { LogAction("Recover", rsc, current, NULL, stop, NULL, terminal); STOP_SANITY_ASSERT(__LINE__); } else { LogAction("Restart", rsc, current, next, start, NULL, terminal); /* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */ } g_list_free(possible_matches); return; } if(stop && (rsc->next_role == RSC_ROLE_STOPPED || (start && !pcmk_is_set(start->flags, pe_action_runnable)))) { GListPtr gIter = NULL; key = stop_key(rsc); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pe_action_t *stop_op = NULL; possible_matches = find_actions(rsc->actions, key, node); if (possible_matches) { stop_op = possible_matches->data; g_list_free(possible_matches); } if (stop_op && (stop_op->flags & pe_action_runnable)) { STOP_SANITY_ASSERT(__LINE__); } LogAction("Stop", rsc, node, NULL, stop_op, (stop_op && stop_op->reason)? stop_op : start, terminal); } free(key); } else if ((stop != NULL) && pcmk_all_flags_set(rsc->flags, pe_rsc_failed|pe_rsc_stop)) { /* 'stop' may be NULL if the failure was ignored */ LogAction("Recover", rsc, current, next, stop, start, terminal); STOP_SANITY_ASSERT(__LINE__); } else if (moving) { LogAction("Move", rsc, current, next, stop, NULL, terminal); STOP_SANITY_ASSERT(__LINE__); } else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) { LogAction("Reload", rsc, current, next, start, NULL, terminal); } else if (stop != NULL && !pcmk_is_set(stop->flags, pe_action_optional)) { LogAction("Restart", rsc, current, next, start, NULL, terminal); STOP_SANITY_ASSERT(__LINE__); } else if (rsc->role == RSC_ROLE_MASTER) { CRM_LOG_ASSERT(current != NULL); LogAction("Demote", rsc, current, next, demote, NULL, terminal); } else if(rsc->next_role == RSC_ROLE_MASTER) { CRM_LOG_ASSERT(next); LogAction("Promote", rsc, current, next, promote, NULL, terminal); } else if (rsc->role == RSC_ROLE_STOPPED && rsc->next_role > RSC_ROLE_STOPPED) { LogAction("Start", rsc, current, next, start, NULL, terminal); } } gboolean StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; pe_action_t *stop; if (rsc->partial_migration_target) { if (rsc->partial_migration_target->details == current->details) { pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname, next->details->uname, rsc->id); continue; } else { pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id); optional = FALSE; } } pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname); stop = stop_action(rsc, current, optional); if(rsc->allocated_to == NULL) { pe_action_set_reason(stop, "node availability", TRUE); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { update_action_flags(stop, pe_action_runnable | pe_action_clear, __func__, __LINE__); } if (pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) { pe_action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, FALSE, data_set); order_actions(stop, unfence, pe_order_implies_first); if (!node_has_been_unfenced(current)) { pe_proc_err("Stopping %s until %s can be unfenced", rsc->id, current->details->uname); } } } return TRUE; } static void order_after_unfencing(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order, pe_working_set_t *data_set) { /* When unfencing is in use, we order unfence actions before any probe or * start of resources that require unfencing, and also of fence devices. * * This might seem to violate the principle that fence devices require * only quorum. However, fence agents that unfence often don't have enough * information to even probe or start unless the node is first unfenced. */ if (is_unfence_device(rsc, data_set) || pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) { /* Start with an optional ordering. Requiring unfencing would result in * the node being unfenced, and all its resources being stopped, * whenever a new resource is added -- which would be highly suboptimal. */ pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, data_set); order_actions(unfence, action, order); if (!node_has_been_unfenced(node)) { // But unfencing is required if it has never been done char *reason = crm_strdup_printf("required by %s %s", rsc->id, action->task); trigger_unfencing(NULL, node, reason, NULL, data_set); free(reason); } } } gboolean StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { pe_action_t *start = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s on %s %d %d", rsc->id, next ? next->details->uname : "N/A", optional, next ? next->weight : 0); start = start_action(rsc, next, TRUE); order_after_unfencing(rsc, next, start, pe_order_implies_then, data_set); if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) { update_action_flags(start, pe_action_optional | pe_action_clear, __func__, __LINE__); } return TRUE; } gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; CRM_ASSERT(rsc); CRM_CHECK(next != NULL, return FALSE); pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname); action_list = pe__resource_actions(rsc, next, RSC_START, TRUE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *start = (pe_action_t *) gIter->data; if (!pcmk_is_set(start->flags, pe_action_runnable)) { runnable = FALSE; } } g_list_free(action_list); if (runnable) { promote_action(rsc, next, optional); return TRUE; } pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id); action_list = pe__resource_actions(rsc, next, RSC_PROMOTE, TRUE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *promote = (pe_action_t *) gIter->data; update_action_flags(promote, pe_action_runnable | pe_action_clear, __func__, __LINE__); } g_list_free(action_list); return TRUE; } gboolean DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A"); demote_action(rsc, current, optional); } return TRUE; } gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A"); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); return FALSE; } gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set) { if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if (node == NULL) { pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if (node->details->unclean || node->details->online == FALSE) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete_action(rsc, node, optional); new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, optional ? pe_order_implies_then : pe_order_optional, data_set); new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, optional ? pe_order_implies_then : pe_order_optional, data_set); return TRUE; } gboolean native_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete, gboolean force, pe_working_set_t * data_set) { enum pe_ordering flags = pe_order_optional; char *key = NULL; pe_action_t *probe = NULL; pe_node_t *running = NULL; pe_node_t *allowed = NULL; pe_resource_t *top = uber_parent(rsc); static const char *rc_master = NULL; static const char *rc_inactive = NULL; if (rc_inactive == NULL) { rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); rc_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); } CRM_CHECK(node != NULL, return FALSE); if (!force && !pcmk_is_set(data_set->flags, pe_flag_startup_probes)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id); return FALSE; } if (pe__is_guest_or_remote_node(node)) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because Pacemaker Remote nodes cannot run stonith agents", rsc->id, node->details->id); return FALSE; } else if (pe__is_guest_node(node) && pe__resource_contains_guest_node(data_set, rsc)) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because guest nodes cannot run resources containing guest nodes", rsc->id, node->details->id); return FALSE; } else if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because Pacemaker Remote nodes cannot host remote connections", rsc->id, node->details->id); return FALSE; } } if (rsc->children) { GListPtr gIter = NULL; gboolean any_created = FALSE; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set) || any_created; } return any_created; } else if ((rsc->container) && (!rsc->is_remote_node)) { pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id); return FALSE; } if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id); return FALSE; } // Check whether resource is already known on node if (!force && g_hash_table_lookup(rsc->known_on, node->details->id)) { pe_rsc_trace(rsc, "Skipping known: %s on %s", rsc->id, node->details->uname); return FALSE; } allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (rsc->exclusive_discover || top->exclusive_discover) { if (allowed == NULL) { /* exclusive discover is enabled and this node is not in the allowed list. */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, A", rsc->id, node->details->id); return FALSE; } else if (allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, B", rsc->id, node->details->id); return FALSE; } } if(allowed == NULL && node->rsc_discover_mode == pe_discover_never) { /* If this node was allowed to host this resource it would * have been explicitly added to the 'allowed_nodes' list. * However it wasn't and the node has discovery disabled, so * no need to probe for this resource. */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, C", rsc->id, node->details->id); return FALSE; } if (allowed && allowed->rsc_discover_mode == pe_discover_never) { /* this resource is marked as not needing to be discovered on this node */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, discovery mode", rsc->id, node->details->id); return FALSE; } if (pe__is_guest_node(node)) { pe_resource_t *remote = node->details->remote_rsc->container; if(remote->role == RSC_ROLE_STOPPED) { /* If the container is stopped, then we know anything that * might have been inside it is also stopped and there is * no need to probe. * * If we don't know the container's state on the target * either: * * - the container is running, the transition will abort * and we'll end up in a different case next time, or * * - the container is stopped * * Either way there is no need to probe. * */ if(remote->allocated_to && g_hash_table_lookup(remote->known_on, remote->allocated_to->details->id) == NULL) { /* For safety, we order the 'rsc' start after 'remote' * has been probed. * * Using 'top' helps for groups, but we may need to * follow the start's ordering chain backwards. */ custom_action_order(remote, pcmk__op_key(remote->id, RSC_STATUS, 0), NULL, top, pcmk__op_key(top->id, RSC_START, 0), NULL, pe_order_optional, data_set); } pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopped", rsc->id, node->details->id, remote->id); return FALSE; /* Here we really we want to check if remote->stop is required, * but that information doesn't exist yet */ } else if(node->details->remote_requires_reset || node->details->unclean || pcmk_is_set(remote->flags, pe_rsc_failed) || remote->next_role == RSC_ROLE_STOPPED || (remote->allocated_to && pe_find_node(remote->running_on, remote->allocated_to->details->uname) == NULL) ) { /* The container is stopping or restarting, don't start * 'rsc' until 'remote' stops as this also implies that * 'rsc' is stopped - avoiding the need to probe */ custom_action_order(remote, pcmk__op_key(remote->id, RSC_STOP, 0), NULL, top, pcmk__op_key(top->id, RSC_START, 0), NULL, pe_order_optional, data_set); pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopping, restarting or moving", rsc->id, node->details->id, remote->id); return FALSE; /* } else { * The container is running so there is no problem probing it */ } } key = pcmk__op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); update_action_flags(probe, pe_action_optional | pe_action_clear, __func__, __LINE__); order_after_unfencing(rsc, node, probe, pe_order_optional, data_set); /* * We need to know if it's running_on (not just known_on) this node * to correctly determine the target rc. */ running = pe_find_node_id(rsc->running_on, node->details->id); if (running == NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); } else if (rsc->role == RSC_ROLE_MASTER) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_master); } crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role), pcmk_is_set(probe->flags, pe_action_runnable), rsc->running_on); if (is_unfence_device(rsc, data_set) || !pe_rsc_is_clone(top)) { top = rsc; } else { crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id); } if (!pcmk_is_set(probe->flags, pe_action_runnable) && (rsc->running_on == NULL)) { /* Prevent the start from occurring if rsc isn't active, but * don't cause it to stop if it was active already */ pe__set_order_flags(flags, pe_order_runnable_left); } custom_action_order(rsc, NULL, probe, top, pcmk__op_key(top->id, RSC_START, 0), NULL, flags, data_set); /* Before any reloads, if they exist */ custom_action_order(rsc, NULL, probe, top, reload_key(rsc), NULL, pe_order_optional, data_set); #if 0 // complete is always null currently if (!is_unfence_device(rsc, data_set)) { /* Normally rsc.start depends on probe complete which depends * on rsc.probe. But this can't be the case for fence devices * with unfencing, as it would create graph loops. * * So instead we explicitly order 'rsc.probe then rsc.start' */ order_actions(probe, complete, pe_order_implies_then); } #endif return TRUE; } /*! * \internal * \brief Check whether a resource is known on a particular node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return TRUE if resource (or parent if an anonymous clone) is known */ static bool rsc_is_known_on(pe_resource_t *rsc, const pe_node_t *node) { if (pe_hash_table_lookup(rsc->known_on, node->details->id)) { return TRUE; } else if ((rsc->variant == pe_native) && pe_rsc_is_anon_clone(rsc->parent) && pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) { /* We check only the parent, not the uber-parent, because we cannot * assume that the resource is known if it is in an anonymously cloned * group (which may be only partially known). */ return TRUE; } return FALSE; } /*! * \internal * \brief Order a resource's start and promote actions relative to fencing * * \param[in] rsc Resource to be ordered * \param[in] stonith_op Fence action * \param[in] data_set Cluster information */ static void native_start_constraints(pe_resource_t * rsc, pe_action_t * stonith_op, pe_working_set_t * data_set) { pe_node_t *target; GListPtr gIter = NULL; CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; switch (action->needs) { case rsc_req_nothing: // Anything other than start or promote requires nothing break; case rsc_req_stonith: order_actions(stonith_op, action, pe_order_optional); break; case rsc_req_quorum: if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei) && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id) && !rsc_is_known_on(rsc, target)) { /* If we don't know the status of the resource on the node * we're about to shoot, we have to assume it may be active * there. Order the resource start after the fencing. This * is analogous to waiting for all the probes for a resource * to complete before starting it. * * The most likely explanation is that the DC died and took * its status with it. */ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(stonith_op, action, pe_order_optional | pe_order_runnable_left); } break; } } } static void native_stop_constraints(pe_resource_t * rsc, pe_action_t * stonith_op, pe_working_set_t * data_set) { GListPtr gIter = NULL; GListPtr action_list = NULL; bool order_implicit = false; pe_resource_t *top = uber_parent(rsc); pe_action_t *parent_stop = NULL; pe_node_t *target; CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; /* Get a list of stop actions potentially implied by the fencing */ action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE); /* If resource requires fencing, implicit actions must occur after fencing. * * Implied stops and demotes of resources running on guest nodes are always * ordered after fencing, even if the resource does not require fencing, * because guest node "fencing" is actually just a resource stop. */ if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing) || pe__is_guest_node(target)) { order_implicit = true; } if (action_list && order_implicit) { parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); } for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; // The stop would never complete, so convert it into a pseudo-action. update_action_flags(action, pe_action_pseudo|pe_action_runnable, __func__, __LINE__); if (order_implicit) { update_action_flags(action, pe_action_implied_by_stonith, __func__, __LINE__); /* Order the stonith before the parent stop (if any). * * Also order the stonith before the resource stop, unless the * resource is inside a bundle -- that would cause a graph loop. * We can rely on the parent stop's ordering instead. * * User constraints must not order a resource in a guest node * relative to the guest node container resource. The * pe_order_preserve flag marks constraints as generated by the * cluster and thus immune to that check (and is irrelevant if * target is not a guest). */ if (!pe_rsc_is_bundled(rsc)) { order_actions(stonith_op, action, pe_order_preserve); } order_actions(stonith_op, parent_stop, pe_order_preserve); } if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Stop of failed resource %s is implicit %s %s is fenced", rsc->id, (order_implicit? "after" : "because"), target->details->uname); } else { crm_info("%s is implicit %s %s is fenced", action->uuid, (order_implicit? "after" : "because"), target->details->uname); } if (pcmk_is_set(rsc->flags, pe_rsc_notify)) { /* Create a second notification that will be delivered * immediately after the node is fenced * * Basic problem: * - C is a clone active on the node to be shot and stopping on another * - R is a resource that depends on C * * + C.stop depends on R.stop * + C.stopped depends on STONITH * + C.notify depends on C.stopped * + C.healthy depends on C.notify * + R.stop depends on C.healthy * * The extra notification here changes * + C.healthy depends on C.notify * into: * + C.healthy depends on C.notify' * + C.notify' depends on STONITH' * thus breaking the loop */ create_secondary_notification(action, rsc, stonith_op, data_set); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(rA, rB) running on nodeX and B.stop has failed, A := stop healthy resource (rA.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependency and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). TODO: Break the "A requires B" dependency in update_action() and re-enable this block } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ } g_list_free(action_list); /* Get a list of demote actions potentially implied by the fencing */ action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (action->node->details->online == FALSE || action->node->details->unclean == TRUE || pcmk_is_set(rsc->flags, pe_rsc_failed)) { if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_info(rsc, "Demote of failed resource %s is implicit after %s is fenced", rsc->id, target->details->uname); } else { pe_rsc_info(rsc, "%s is implicit after %s is fenced", action->uuid, target->details->uname); } /* The demote would never complete and is now implied by the * fencing, so convert it into a pseudo-action. */ update_action_flags(action, pe_action_pseudo|pe_action_runnable, __func__, __LINE__); if (pe_rsc_is_bundled(rsc)) { /* Do nothing, let the recovery be ordered after the parent's implied stop */ } else if (order_implicit) { order_actions(stonith_op, action, pe_order_preserve|pe_order_optional); } } } g_list_free(action_list); } void rsc_stonith_ordering(pe_resource_t * rsc, pe_action_t * stonith_op, pe_working_set_t * data_set) { if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; rsc_stonith_ordering(child_rsc, stonith_op, data_set); } } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); } else { native_start_constraints(rsc, stonith_op, data_set); native_stop_constraints(rsc, stonith_op, data_set); } } void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set) { GListPtr gIter = NULL; pe_action_t *reload = NULL; if (rsc->children) { for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; ReloadRsc(child_rsc, node, data_set); } return; } else if (rsc->variant > pe_native) { /* Complex resource with no children */ return; } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "%s: unmanaged", rsc->id); return; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { /* We don't need to specify any particular actions here, normal failure * recovery will apply. */ pe_rsc_trace(rsc, "%s: preventing reload because failed", rsc->id); return; } else if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { /* If a resource's configuration changed while a start was pending, * force a full restart. */ pe_rsc_trace(rsc, "%s: preventing reload because start pending", rsc->id); stop_action(rsc, node, FALSE); return; } else if (node == NULL) { pe_rsc_trace(rsc, "%s: not active", rsc->id); return; } pe_rsc_trace(rsc, "Processing %s", rsc->id); pe__set_resource_flags(rsc, pe_rsc_reload); reload = custom_action( rsc, reload_key(rsc), CRMD_ACTION_RELOAD, node, FALSE, TRUE, data_set); pe_action_set_reason(reload, "resource definition change", FALSE); custom_action_order(NULL, NULL, reload, rsc, stop_key(rsc), NULL, pe_order_optional|pe_order_then_cancels_first, data_set); custom_action_order(NULL, NULL, reload, rsc, demote_key(rsc), NULL, pe_order_optional|pe_order_then_cancels_first, data_set); } void native_append_meta(pe_resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); pe_resource_t *parent; if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } for (parent = rsc; parent != NULL; parent = parent->parent) { if (parent->container) { crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id); } } } diff --git a/lib/pacemaker/pcmk_sched_utils.c b/lib/pacemaker/pcmk_sched_utils.c index 177f43eb48..6a0b5bc337 100644 --- a/lib/pacemaker/pcmk_sched_utils.c +++ b/lib/pacemaker/pcmk_sched_utils.c @@ -1,767 +1,767 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // lrmd_event_data_t #include #include pe__location_t * rsc2node_new(const char *id, pe_resource_t *rsc, int node_weight, const char *discover_mode, pe_node_t *foo_node, pe_working_set_t *data_set) { pe__location_t *new_con = NULL; if (rsc == NULL || id == NULL) { pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc); return NULL; } else if (foo_node == NULL) { CRM_CHECK(node_weight == 0, return NULL); } new_con = calloc(1, sizeof(pe__location_t)); if (new_con != NULL) { new_con->id = strdup(id); new_con->rsc_lh = rsc; new_con->node_list_rh = NULL; new_con->role_filter = RSC_ROLE_UNKNOWN; if (pcmk__str_eq(discover_mode, "always", pcmk__str_null_matches | pcmk__str_casei)) { new_con->discover_mode = pe_discover_always; } else if (pcmk__str_eq(discover_mode, "never", pcmk__str_casei)) { new_con->discover_mode = pe_discover_never; } else if (pcmk__str_eq(discover_mode, "exclusive", pcmk__str_casei)) { new_con->discover_mode = pe_discover_exclusive; rsc->exclusive_discover = TRUE; } else { pe_err("Invalid %s value %s in location constraint", XML_LOCATION_ATTR_DISCOVERY, discover_mode); } if (foo_node != NULL) { pe_node_t *copy = pe__copy_node(foo_node); copy->weight = node_weight; new_con->node_list_rh = g_list_prepend(NULL, copy); } data_set->placement_constraints = g_list_prepend(data_set->placement_constraints, new_con); rsc->rsc_location = g_list_prepend(rsc->rsc_location, new_con); } return new_con; } gboolean can_run_resources(const pe_node_t * node) { if (node == NULL) { return FALSE; } #if 0 if (node->weight < 0) { return FALSE; } #endif if (node->details->online == FALSE || node->details->shutdown || node->details->unclean || node->details->standby || node->details->maintenance) { crm_trace("%s: online=%d, unclean=%d, standby=%d, maintenance=%d", node->details->uname, node->details->online, node->details->unclean, node->details->standby, node->details->maintenance); return FALSE; } return TRUE; } /*! * \internal * \brief Copy a hash table of node objects * * \param[in] nodes Hash table to copy * * \return New copy of nodes (or NULL if nodes is NULL) */ GHashTable * pcmk__copy_node_table(GHashTable *nodes) { GHashTable *new_table = NULL; GHashTableIter iter; pe_node_t *node = NULL; if (nodes == NULL) { return NULL; } new_table = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { pe_node_t *new_node = pe__copy_node(node); g_hash_table_insert(new_table, (gpointer) new_node->details->id, new_node); } return new_table; } /*! * \internal * \brief Copy a list of node objects * * \param[in] list List to copy * \param[in] reset Set copies' scores to 0 * * \return New list of shallow copies of nodes in original list */ GList * pcmk__copy_node_list(const GList *list, bool reset) { GList *result = NULL; for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *new_node = NULL; pe_node_t *this_node = (pe_node_t *) gIter->data; new_node = pe__copy_node(this_node); if (reset) { new_node->weight = 0; } result = g_list_prepend(result, new_node); } return result; } struct node_weight_s { pe_node_t *active; pe_working_set_t *data_set; }; /* return -1 if 'a' is more preferred * return 1 if 'b' is more preferred */ static gint sort_node_weight(gconstpointer a, gconstpointer b, gpointer data) { const pe_node_t *node1 = (const pe_node_t *)a; const pe_node_t *node2 = (const pe_node_t *)b; struct node_weight_s *nw = data; int node1_weight = 0; int node2_weight = 0; int result = 0; if (a == NULL) { return 1; } if (b == NULL) { return -1; } node1_weight = node1->weight; node2_weight = node2->weight; if (can_run_resources(node1) == FALSE) { node1_weight = -INFINITY; } if (can_run_resources(node2) == FALSE) { node2_weight = -INFINITY; } if (node1_weight > node2_weight) { crm_trace("%s (%d) > %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return -1; } if (node1_weight < node2_weight) { crm_trace("%s (%d) < %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return 1; } crm_trace("%s (%d) == %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); if (pcmk__str_eq(nw->data_set->placement_strategy, "minimal", pcmk__str_casei)) { goto equal; } if (pcmk__str_eq(nw->data_set->placement_strategy, "balanced", pcmk__str_casei)) { result = compare_capacity(node1, node2); if (result < 0) { crm_trace("%s > %s : capacity (%d)", node1->details->uname, node2->details->uname, result); return -1; } else if (result > 0) { crm_trace("%s < %s : capacity (%d)", node1->details->uname, node2->details->uname, result); return 1; } } /* now try to balance resources across the cluster */ if (node1->details->num_resources < node2->details->num_resources) { crm_trace("%s (%d) > %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (node1->details->num_resources > node2->details->num_resources) { crm_trace("%s (%d) < %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } if (nw->active && nw->active->details == node1->details) { crm_trace("%s (%d) > %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (nw->active && nw->active->details == node2->details) { crm_trace("%s (%d) < %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } equal: crm_trace("%s = %s", node1->details->uname, node2->details->uname); return strcmp(node1->details->uname, node2->details->uname); } GList * sort_nodes_by_weight(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set) { struct node_weight_s nw = { active_node, data_set }; return g_list_sort_with_data(nodes, sort_node_weight, &nw); } void native_deallocate(pe_resource_t * rsc) { if (rsc->allocated_to) { pe_node_t *old = rsc->allocated_to; crm_info("Deallocating %s from %s", rsc->id, old->details->uname); pe__set_resource_flags(rsc, pe_rsc_provisional); rsc->allocated_to = NULL; old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc); old->details->num_resources--; /* old->count--; */ calculate_utilization(old->details->utilization, rsc->utilization, TRUE); free(old); } } gboolean -native_assign_node(pe_resource_t * rsc, GListPtr nodes, pe_node_t * chosen, gboolean force) +native_assign_node(pe_resource_t *rsc, pe_node_t *chosen, gboolean force) { CRM_ASSERT(rsc->variant == pe_native); if (force == FALSE && chosen != NULL) { bool unset = FALSE; if(chosen->weight < 0) { unset = TRUE; // Allow the graph to assume that the remote resource will come up } else if (!can_run_resources(chosen) && !pe__is_guest_node(chosen)) { unset = TRUE; } if(unset) { crm_debug("All nodes for resource %s are unavailable" ", unclean or shutting down (%s: %d, %d)", rsc->id, chosen->details->uname, can_run_resources(chosen), chosen->weight); pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability"); chosen = NULL; } } /* todo: update the old node for each resource to reflect its * new resource count */ native_deallocate(rsc); pe__clear_resource_flags(rsc, pe_rsc_provisional); if (chosen == NULL) { GListPtr gIter = NULL; char *rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); crm_debug("Could not allocate a node for %s", rsc->id); pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate"); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; const char *interval_ms_s = g_hash_table_lookup(op->meta, XML_LRM_ATTR_INTERVAL_MS); crm_debug("Processing %s", op->uuid); if(pcmk__str_eq(RSC_STOP, op->task, pcmk__str_casei)) { update_action_flags(op, pe_action_optional | pe_action_clear, __func__, __LINE__); } else if(pcmk__str_eq(RSC_START, op->task, pcmk__str_casei)) { update_action_flags(op, pe_action_runnable | pe_action_clear, __func__, __LINE__); //pe__set_resource_flags(rsc, pe_rsc_block); } else if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { if(pcmk__str_eq(rc_inactive, g_hash_table_lookup(op->meta, XML_ATTR_TE_TARGET_RC), pcmk__str_casei)) { /* This is a recurring monitor for the stopped state, leave it alone */ } else { /* Normal monitor operation, cancel it */ update_action_flags(op, pe_action_runnable | pe_action_clear, __func__, __LINE__); } } } free(rc_inactive); return FALSE; } crm_debug("Assigning %s to %s", chosen->details->uname, rsc->id); rsc->allocated_to = pe__copy_node(chosen); chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc); chosen->details->num_resources++; chosen->count++; calculate_utilization(chosen->details->utilization, rsc->utilization, FALSE); dump_rsc_utilization((show_utilization? LOG_STDOUT : LOG_TRACE), __func__, rsc, chosen); return TRUE; } void log_action(unsigned int log_level, const char *pre_text, pe_action_t * action, gboolean details) { const char *node_uname = NULL; const char *node_uuid = NULL; const char *desc = NULL; if (action == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (pcmk_is_set(action->flags, pe_action_pseudo)) { node_uname = NULL; node_uuid = NULL; } else if (action->node != NULL) { node_uname = action->node->details->uname; node_uuid = action->node->details->id; } else { node_uname = ""; node_uuid = NULL; } switch (text2task(action->task)) { case stonith_node: case shutdown_crm: if (pcmk_is_set(action->flags, pe_action_pseudo)) { desc = "Pseudo "; } else if (pcmk_is_set(action->flags, pe_action_optional)) { desc = "Optional "; } else if (!pcmk_is_set(action->flags, pe_action_runnable)) { desc = "!!Non-Startable!! "; } else if (pcmk_is_set(action->flags, pe_action_processed)) { desc = ""; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; default: if (pcmk_is_set(action->flags, pe_action_optional)) { desc = "Optional "; } else if (pcmk_is_set(action->flags, pe_action_pseudo)) { desc = "Pseudo "; } else if (!pcmk_is_set(action->flags, pe_action_runnable)) { desc = "!!Non-Startable!! "; } else if (pcmk_is_set(action->flags, pe_action_processed)) { desc = ""; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (action->rsc? action->rsc->id : ""), (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; } if (details) { GListPtr gIter = NULL; crm_trace("\t\t====== Preceding Actions"); gIter = action->actions_before; for (; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *other = (pe_action_wrapper_t *) gIter->data; log_action(log_level + 1, "\t\t", other->action, FALSE); } crm_trace("\t\t====== Subsequent Actions"); gIter = action->actions_after; for (; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *other = (pe_action_wrapper_t *) gIter->data; log_action(log_level + 1, "\t\t", other->action, FALSE); } crm_trace("\t\t====== End"); } else { crm_trace("\t\t(before=%d, after=%d)", g_list_length(action->actions_before), g_list_length(action->actions_after)); } } gboolean can_run_any(GHashTable * nodes) { GHashTableIter iter; pe_node_t *node = NULL; if (nodes == NULL) { return FALSE; } g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (can_run_resources(node) && node->weight >= 0) { return TRUE; } } return FALSE; } pe_action_t * create_pseudo_resource_op(pe_resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set) { pe_action_t *action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL, optional, TRUE, data_set); update_action_flags(action, pe_action_pseudo, __func__, __LINE__); update_action_flags(action, pe_action_runnable, __func__, __LINE__); if(runnable) { update_action_flags(action, pe_action_runnable, __func__, __LINE__); } return action; } /*! * \internal * \brief Create an executor cancel op * * \param[in] rsc Resource of action to cancel * \param[in] task Name of action to cancel * \param[in] interval_ms Interval of action to cancel * \param[in] node Node of action to cancel * \param[in] data_set Working set of cluster * * \return Created op */ pe_action_t * pe_cancel_op(pe_resource_t *rsc, const char *task, guint interval_ms, pe_node_t *node, pe_working_set_t *data_set) { pe_action_t *cancel_op; char *interval_ms_s = crm_strdup_printf("%u", interval_ms); // @TODO dangerous if possible to schedule another action with this key char *key = pcmk__op_key(rsc->id, task, interval_ms); cancel_op = custom_action(rsc, key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(cancel_op->task); cancel_op->task = strdup(RSC_CANCEL); free(cancel_op->cancel_task); cancel_op->cancel_task = strdup(task); add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task); add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s); free(interval_ms_s); return cancel_op; } /*! * \internal * \brief Create a shutdown op for a scheduler transition * * \param[in] node Node being shut down * \param[in] data_set Working set of cluster * * \return Created op */ pe_action_t * sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set) { char *shutdown_id = crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN, node->details->uname); pe_action_t *shutdown_op = custom_action(NULL, shutdown_id, CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set); crm_notice("Scheduling shutdown of node %s", node->details->uname); shutdown_constraints(node, shutdown_op, data_set); add_hash_param(shutdown_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); return shutdown_op; } static char * generate_transition_magic(const char *transition_key, int op_status, int op_rc) { CRM_CHECK(transition_key != NULL, return NULL); return crm_strdup_printf("%d:%d;%s", op_status, op_rc, transition_key); } static void append_digest(lrmd_event_data_t *op, xmlNode *update, const char *version, const char *magic, int level) { /* this will enable us to later determine that the * resource's parameters have changed and we should force * a restart */ char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); pcmk__filter_op_for_digest(args_xml); digest = calculate_operation_digest(args_xml, version); #if 0 if (level < get_crm_log_level() && op->interval_ms == 0 && pcmk__str_eq(op->op_type, CRMD_ACTION_START, pcmk__str_none)) { char *digest_source = dump_xml_unformatted(args_xml); do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n", digest, ID(update), magic, digest_source); free(digest_source); } #endif crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest); free_xml(args_xml); free(digest); } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" /*! * \internal * \brief Create XML for resource operation history update * * \param[in,out] parent Parent XML node to add to * \param[in,out] op Operation event data * \param[in] caller_version DC feature set * \param[in] target_rc Expected result of operation * \param[in] node Name of node on which operation was performed * \param[in] origin Arbitrary description of update source * \param[in] level A log message will be logged at this level * * \return Newly created XML node for history update */ xmlNode * pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op, const char *caller_version, int target_rc, const char *node, const char *origin, int level) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; CRM_CHECK(op != NULL, return NULL); do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)", origin, op->rsc_id, op->op_type, services_lrm_status_str(op->op_status), op->interval_ms); crm_trace("DC version: %s", caller_version); task = op->op_type; /* Record a successful reload as a start, and a failed reload as a monitor, * to make life easier for the scheduler when determining the current state. */ if (pcmk__str_eq(task, "reload", pcmk__str_none)) { if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_START; } else { task = CRMD_ACTION_STATUS; } } key = pcmk__op_key(op->rsc_id, task, op->interval_ms); if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = pcmk__notify_key(op->rsc_id, n_type, n_task); if (op->op_status != PCMK_LRM_OP_PENDING) { /* Ignore notify errors. * * @TODO It might be better to keep the correct result here, and * ignore it in process_graph_event(). */ op->op_status = PCMK_LRM_OP_DONE; op->rc = 0; } } else if (did_rsc_op_fail(op, target_rc)) { op_id = pcmk__op_key(op->rsc_id, "last_failure", 0); if (op->interval_ms == 0) { // Ensure 'last' gets updated, in case record-pending is true op_id_additional = pcmk__op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval_ms > 0) { op_id = strdup(key); } else { op_id = pcmk__op_key(op->rsc_id, "last", 0); } again: xml_op = pcmk__xe_match(parent, XML_LRM_TAG_RSC_OP, XML_ATTR_ID, op_id); if (xml_op == NULL) { xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for: " PCMK__OP_FMT " %d from %s", op->rsc_id, op->op_type, op->interval_ms, op->call_id, origin); local_user_data = pcmk__transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if(magic == NULL) { magic = generate_transition_magic(op->user_data, op->op_status, op->rc); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason); crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */ crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms); if (compare_version("2.1", caller_version) <= 0) { if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { crm_trace("Timing data (" PCMK__OP_FMT "): last=%u change=%u exec=%u queue=%u", op->rsc_id, op->op_type, op->interval_ms, op->t_run, op->t_rcchange, op->exec_time, op->queue_time); if (op->interval_ms == 0) { crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE, (long long) op->t_run); // @COMPAT last-run is deprecated crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_RUN, (long long) op->t_run); } else if(op->t_rcchange) { /* last-run is not accurate for recurring ops */ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE, (long long) op->t_rcchange); } else { /* ...but is better than nothing otherwise */ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE, (long long) op->t_run); } crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time); crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time); } } if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* * Record migrate_source and migrate_target always for migrate ops. */ const char *name = XML_LRM_ATTR_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = XML_LRM_ATTR_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } append_digest(op, xml_op, caller_version, magic, LOG_DEBUG); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; }