Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F2020062
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
125 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h
index cd5a27bbff..853d0712a7 100644
--- a/include/crm/pengine/pe_types.h
+++ b/include/crm/pengine/pe_types.h
@@ -1,521 +1,521 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PE_TYPES__H
# define PE_TYPES__H
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \file
* \brief Data types for cluster status
* \ingroup pengine
*/
# include <stdbool.h> // bool
# include <sys/types.h> // time_t
# include <glib.h> // gboolean, guint, GList, GHashTable
# include <crm/crm.h> // GListPtr
# include <crm/common/iso8601.h>
# include <crm/pengine/common.h>
typedef struct pe_node_s pe_node_t;
typedef struct pe_action_s pe_action_t;
typedef struct pe_resource_s pe_resource_t;
typedef struct pe_working_set_s pe_working_set_t;
enum pe_obj_types {
pe_unknown = -1,
pe_native = 0,
pe_group = 1,
pe_clone = 2,
pe_container = 3,
};
typedef struct resource_object_functions_s {
gboolean (*unpack) (pe_resource_t*, pe_working_set_t*);
pe_resource_t *(*find_rsc) (pe_resource_t *parent, const char *search,
const pe_node_t *node, int flags);
/* parameter result must be free'd */
char *(*parameter) (pe_resource_t*, pe_node_t*, gboolean, const char*,
pe_working_set_t*);
void (*print) (pe_resource_t*, const char*, long, void*);
gboolean (*active) (pe_resource_t*, gboolean);
enum rsc_role_e (*state) (const pe_resource_t*, gboolean);
pe_node_t *(*location) (const pe_resource_t*, GList**, int);
void (*free) (pe_resource_t*);
void (*count) (pe_resource_t*);
} resource_object_functions_t;
typedef struct resource_alloc_functions_s resource_alloc_functions_t;
enum pe_quorum_policy {
no_quorum_freeze,
no_quorum_stop,
no_quorum_ignore,
no_quorum_suicide
};
enum node_type {
node_ping,
node_member,
node_remote
};
//! \deprecated will be removed in a future release
enum pe_restart {
pe_restart_restart,
pe_restart_ignore
};
//! Determine behavior of pe_find_resource_with_flags()
enum pe_find {
pe_find_renamed = 0x001, //!< match resource ID or LRM history ID
pe_find_anon = 0x002, //!< match base name of anonymous clone instances
pe_find_clone = 0x004, //!< match only clone instances
pe_find_current = 0x008, //!< match resource active on specified node
pe_find_inactive = 0x010, //!< match resource not running anywhere
pe_find_any = 0x020, //!< match base name of any clone instance
};
// @TODO Make these an enum
# define pe_flag_have_quorum 0x00000001ULL
# define pe_flag_symmetric_cluster 0x00000002ULL
# define pe_flag_maintenance_mode 0x00000008ULL
# define pe_flag_stonith_enabled 0x00000010ULL
# define pe_flag_have_stonith_resource 0x00000020ULL
# define pe_flag_enable_unfencing 0x00000040ULL
# define pe_flag_concurrent_fencing 0x00000080ULL
# define pe_flag_stop_rsc_orphans 0x00000100ULL
# define pe_flag_stop_action_orphans 0x00000200ULL
# define pe_flag_stop_everything 0x00000400ULL
# define pe_flag_start_failure_fatal 0x00001000ULL
# define pe_flag_remove_after_stop 0x00002000ULL
# define pe_flag_startup_fencing 0x00004000ULL
# define pe_flag_shutdown_lock 0x00008000ULL
# define pe_flag_startup_probes 0x00010000ULL
# define pe_flag_have_status 0x00020000ULL
# define pe_flag_have_remote_nodes 0x00040000ULL
# define pe_flag_quick_location 0x00100000ULL
# define pe_flag_sanitized 0x00200000ULL
# define pe_flag_stdout 0x00400000ULL
//! Don't count total, disabled and blocked resource instances
# define pe_flag_no_counts 0x00800000ULL
/*! Skip deprecated code that is kept solely for backward API compatibility.
* (Internal code should always set this.)
*/
# define pe_flag_no_compat 0x01000000ULL
struct pe_working_set_s {
xmlNode *input;
crm_time_t *now;
/* options extracted from the input */
char *dc_uuid;
pe_node_t *dc_node;
const char *stonith_action;
const char *placement_strategy;
unsigned long long flags;
int stonith_timeout;
enum pe_quorum_policy no_quorum_policy;
GHashTable *config_hash;
GHashTable *tickets;
// Actions for which there can be only one (e.g. fence nodeX)
GHashTable *singletons;
GListPtr nodes;
GListPtr resources;
GListPtr placement_constraints;
GListPtr ordering_constraints;
GListPtr colocation_constraints;
GListPtr ticket_constraints;
GListPtr actions;
xmlNode *failed;
xmlNode *op_defaults;
xmlNode *rsc_defaults;
/* stats */
int num_synapse;
- int max_valid_nodes;
+ int max_valid_nodes; //! Deprecated (will be removed in a future release)
int order_id;
int action_id;
/* final output */
xmlNode *graph;
GHashTable *template_rsc_sets;
const char *localhost;
GHashTable *tags;
int blocked_resources;
int disabled_resources;
GList *param_check; // History entries that need to be checked
GList *stop_needed; // Containers that need stop actions
time_t recheck_by; // Hint to controller to re-run scheduler by this time
int ninstances; // Total number of resource instances
guint shutdown_lock;// How long (seconds) to lock resources to shutdown node
};
enum pe_check_parameters {
/* Clear fail count if parameters changed for un-expired start or monitor
* last_failure.
*/
pe_check_last_failure,
/* Clear fail count if parameters changed for start, monitor, promote, or
* migrate_from actions for active resources.
*/
pe_check_active,
};
struct pe_node_shared_s {
const char *id;
const char *uname;
enum node_type type;
/* @TODO convert these flags into a bitfield */
gboolean online;
gboolean standby;
gboolean standby_onfail;
gboolean pending;
gboolean unclean;
gboolean unseen;
gboolean shutdown;
gboolean expected_up;
gboolean is_dc;
gboolean maintenance;
gboolean rsc_discovery_enabled;
gboolean remote_requires_reset;
gboolean remote_was_fenced;
gboolean remote_maintenance; /* what the remote-rsc is thinking */
gboolean unpacked;
int num_resources;
pe_resource_t *remote_rsc;
GListPtr running_rsc; /* pe_resource_t* */
GListPtr allocated_rsc; /* pe_resource_t* */
GHashTable *attrs; /* char* => char* */
GHashTable *utilization;
GHashTable *digest_cache; //!< cache of calculated resource digests
};
struct pe_node_s {
int weight;
gboolean fixed;
int count;
struct pe_node_shared_s *details;
int rsc_discover_mode;
};
# define pe_rsc_orphan 0x00000001ULL
# define pe_rsc_managed 0x00000002ULL
# define pe_rsc_block 0x00000004ULL
# define pe_rsc_orphan_container_filler 0x00000008ULL
# define pe_rsc_notify 0x00000010ULL
# define pe_rsc_unique 0x00000020ULL
# define pe_rsc_fence_device 0x00000040ULL
# define pe_rsc_promotable 0x00000080ULL
# define pe_rsc_provisional 0x00000100ULL
# define pe_rsc_allocating 0x00000200ULL
# define pe_rsc_merging 0x00000400ULL
# define pe_rsc_reload 0x00002000ULL
# define pe_rsc_allow_remote_remotes 0x00004000ULL
# define pe_rsc_failed 0x00010000ULL
# define pe_rsc_runnable 0x00040000ULL
# define pe_rsc_start_pending 0x00080000ULL
# define pe_rsc_starting 0x00100000ULL
# define pe_rsc_stopping 0x00200000ULL
# define pe_rsc_allow_migrate 0x00800000ULL
# define pe_rsc_failure_ignored 0x01000000ULL
# define pe_rsc_maintenance 0x04000000ULL
# define pe_rsc_is_container 0x08000000ULL
# define pe_rsc_needs_quorum 0x10000000ULL
# define pe_rsc_needs_fencing 0x20000000ULL
# define pe_rsc_needs_unfencing 0x40000000ULL
enum pe_graph_flags {
pe_graph_none = 0x00000,
pe_graph_updated_first = 0x00001,
pe_graph_updated_then = 0x00002,
pe_graph_disable = 0x00004,
};
/* *INDENT-OFF* */
enum pe_action_flags {
pe_action_pseudo = 0x00001,
pe_action_runnable = 0x00002,
pe_action_optional = 0x00004,
pe_action_print_always = 0x00008,
pe_action_have_node_attrs = 0x00010,
pe_action_implied_by_stonith = 0x00040,
pe_action_migrate_runnable = 0x00080,
pe_action_dumped = 0x00100,
pe_action_processed = 0x00200,
pe_action_clear = 0x00400,
pe_action_dangle = 0x00800,
/* This action requires one or more of its dependencies to be runnable.
* We use this to clear the runnable flag before checking dependencies.
*/
pe_action_requires_any = 0x01000,
pe_action_reschedule = 0x02000,
pe_action_tracking = 0x04000,
pe_action_dedup = 0x08000, //! Internal state tracking when creating graph
pe_action_dc = 0x10000, //! Action may run on DC instead of target
};
/* *INDENT-ON* */
struct pe_resource_s {
char *id;
char *clone_name;
xmlNode *xml;
xmlNode *orig_xml;
xmlNode *ops_xml;
pe_working_set_t *cluster;
pe_resource_t *parent;
enum pe_obj_types variant;
void *variant_opaque;
resource_object_functions_t *fns;
resource_alloc_functions_t *cmds;
enum rsc_recovery_type recovery_type;
// @TODO only pe_restart_restart is of interest, so merge into flags
enum pe_restart restart_type; //!< \deprecated will be removed in future release
int priority;
int stickiness;
int sort_index;
int failure_timeout;
int migration_threshold;
guint remote_reconnect_ms;
char *pending_task;
unsigned long long flags;
// @TODO merge these into flags
gboolean is_remote_node;
gboolean exclusive_discover;
//!@{
//! This field should be treated as internal to Pacemaker
GListPtr rsc_cons_lhs; // List of rsc_colocation_t*
GListPtr rsc_cons; // List of rsc_colocation_t*
GListPtr rsc_location; // List of pe__location_t*
GListPtr actions; // List of pe_action_t*
GListPtr rsc_tickets; // List of rsc_ticket*
//!@}
pe_node_t *allocated_to;
pe_node_t *partial_migration_target;
pe_node_t *partial_migration_source;
GListPtr running_on; /* pe_node_t* */
GHashTable *known_on; /* pe_node_t* */
GHashTable *allowed_nodes; /* pe_node_t* */
enum rsc_role_e role;
enum rsc_role_e next_role;
GHashTable *meta;
GHashTable *parameters;
GHashTable *utilization;
GListPtr children; /* pe_resource_t* */
GListPtr dangling_migrations; /* pe_node_t* */
pe_resource_t *container;
GListPtr fillers;
pe_node_t *pending_node; // Node on which pending_task is happening
pe_node_t *lock_node; // Resource is shutdown-locked to this node
time_t lock_time; // When shutdown lock started
#if ENABLE_VERSIONED_ATTRS
xmlNode *versioned_parameters;
#endif
};
#if ENABLE_VERSIONED_ATTRS
// Used as action->action_details if action->rsc is not NULL
typedef struct pe_rsc_action_details_s {
xmlNode *versioned_parameters;
xmlNode *versioned_meta;
} pe_rsc_action_details_t;
#endif
struct pe_action_s {
int id;
int priority;
pe_resource_t *rsc;
pe_node_t *node;
xmlNode *op_entry;
char *task;
char *uuid;
char *cancel_task;
char *reason;
enum pe_action_flags flags;
enum rsc_start_requirement needs;
enum action_fail_response on_fail;
enum rsc_role_e fail_role;
GHashTable *meta;
GHashTable *extra;
/*
* These two varables are associated with the constraint logic
* that involves first having one or more actions runnable before
* then allowing this action to execute.
*
* These varables are used with features such as 'clone-min' which
* requires at minimum X number of cloned instances to be running
* before an order dependency can run. Another option that uses
* this is 'require-all=false' in ordering constrants. This option
* says "only require one instance of a resource to start before
* allowing dependencies to start" -- basically, require-all=false is
* the same as clone-min=1.
*/
/* current number of known runnable actions in the before list. */
int runnable_before;
/* the number of "before" runnable actions required for this action
* to be considered runnable */
int required_runnable_before;
GListPtr actions_before; /* pe_action_wrapper_t* */
GListPtr actions_after; /* pe_action_wrapper_t* */
/* Some of the above fields could be moved to the details,
* except for API backward compatibility.
*/
void *action_details; // varies by type of action
};
typedef struct pe_ticket_s {
char *id;
gboolean granted;
time_t last_granted;
gboolean standby;
GHashTable *state;
} pe_ticket_t;
typedef struct pe_tag_s {
char *id;
GListPtr refs;
} pe_tag_t;
//! Internal tracking for transition graph creation
enum pe_link_state {
pe_link_not_dumped, //! Internal tracking for transition graph creation
pe_link_dumped, //! Internal tracking for transition graph creation
pe_link_dup, //! \deprecated No longer used by Pacemaker
};
enum pe_discover_e {
pe_discover_always = 0,
pe_discover_never,
pe_discover_exclusive,
};
/* *INDENT-OFF* */
enum pe_ordering {
pe_order_none = 0x0, /* deleted */
pe_order_optional = 0x1, /* pure ordering, nothing implied */
pe_order_apply_first_non_migratable = 0x2, /* Only apply this constraint's ordering if first is not migratable. */
pe_order_implies_first = 0x10, /* If 'then' is required, ensure 'first' is too */
pe_order_implies_then = 0x20, /* If 'first' is required, ensure 'then' is too */
pe_order_implies_first_master = 0x40, /* Imply 'first' is required when 'then' is required and then's rsc holds Master role. */
/* first requires then to be both runnable and migrate runnable. */
pe_order_implies_first_migratable = 0x80,
pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */
pe_order_pseudo_left = 0x200, /* 'then' can only be pseudo if 'first' is runnable */
pe_order_implies_then_on_node = 0x400, /* If 'first' is required on 'nodeX',
* ensure instances of 'then' on 'nodeX' are too.
* Only really useful if 'then' is a clone and 'first' is not
*/
pe_order_probe = 0x800, /* If 'first->rsc' is
* - running but about to stop, ignore the constraint
* - otherwise, behave as runnable_left
*/
pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */
pe_order_stonith_stop = 0x2000, /* only applies if the action is non-pseudo */
pe_order_serialize_only = 0x4000, /* serialize */
pe_order_same_node = 0x8000, /* applies only if 'first' and 'then' are on same node */
pe_order_implies_first_printed = 0x10000, /* Like ..implies_first but only ensures 'first' is printed, not mandatory */
pe_order_implies_then_printed = 0x20000, /* Like ..implies_then but only ensures 'then' is printed, not mandatory */
pe_order_asymmetrical = 0x100000, /* Indicates asymmetrical one way ordering constraint. */
pe_order_load = 0x200000, /* Only relevant if... */
pe_order_one_or_more = 0x400000, /* 'then' is runnable only if one or more of its dependencies are too */
pe_order_anti_colocation = 0x800000,
pe_order_preserve = 0x1000000, /* Hack for breaking user ordering constraints with container resources */
pe_order_then_cancels_first = 0x2000000, // if 'then' becomes required, 'first' becomes optional
pe_order_trace = 0x4000000, /* test marker */
};
/* *INDENT-ON* */
typedef struct pe_action_wrapper_s {
enum pe_ordering type;
enum pe_link_state state;
pe_action_t *action;
} pe_action_wrapper_t;
// Deprecated type aliases
typedef struct pe_action_s action_t; //!< \deprecated Use pe_action_t instead
typedef struct pe_action_wrapper_s action_wrapper_t; //!< \deprecated Use pe_action_wrapper_t instead
typedef struct pe_node_s node_t; //!< \deprecated Use pe_node_t instead
typedef struct pe_resource_s resource_t; //!< \deprecated Use pe_resource_t instead
typedef struct pe_tag_s tag_t; //!< \deprecated Use pe_tag_t instead
typedef struct pe_ticket_s ticket_t; //!< \deprecated Use pe_ticket_t instead
typedef enum pe_quorum_policy no_quorum_policy_t; //!< \deprecated Use enum pe_quorum_policy instead
#ifdef __cplusplus
}
#endif
#endif // PE_TYPES__H
diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c
index 57e9942194..736a404d07 100644
--- a/lib/pacemaker/pcmk_sched_allocate.c
+++ b/lib/pacemaker/pcmk_sched_allocate.c
@@ -1,3072 +1,3073 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <glib.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
CRM_TRACE_INIT_DATA(pacemaker);
void set_alloc_actions(pe_working_set_t * data_set);
extern void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set);
extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set);
static void apply_remote_node_ordering(pe_working_set_t *data_set);
static enum remote_connection_state get_remote_node_state(pe_node_t *node);
enum remote_connection_state {
remote_state_unknown = 0,
remote_state_alive = 1,
remote_state_resting = 2,
remote_state_failed = 3,
remote_state_stopped = 4
};
static const char *
state2text(enum remote_connection_state state)
{
switch (state) {
case remote_state_unknown:
return "unknown";
case remote_state_alive:
return "alive";
case remote_state_resting:
return "resting";
case remote_state_failed:
return "failed";
case remote_state_stopped:
return "stopped";
}
return "impossible";
}
resource_alloc_functions_t resource_class_alloc_functions[] = {
{
pcmk__native_merge_weights,
pcmk__native_allocate,
native_create_actions,
native_create_probe,
native_internal_constraints,
native_rsc_colocation_lh,
native_rsc_colocation_rh,
native_rsc_location,
native_action_flags,
native_update_actions,
native_expand,
native_append_meta,
},
{
pcmk__group_merge_weights,
pcmk__group_allocate,
group_create_actions,
native_create_probe,
group_internal_constraints,
group_rsc_colocation_lh,
group_rsc_colocation_rh,
group_rsc_location,
group_action_flags,
group_update_actions,
group_expand,
group_append_meta,
},
{
pcmk__native_merge_weights,
pcmk__clone_allocate,
clone_create_actions,
clone_create_probe,
clone_internal_constraints,
clone_rsc_colocation_lh,
clone_rsc_colocation_rh,
clone_rsc_location,
clone_action_flags,
pcmk__multi_update_actions,
clone_expand,
clone_append_meta,
},
{
pcmk__native_merge_weights,
pcmk__bundle_allocate,
pcmk__bundle_create_actions,
pcmk__bundle_create_probe,
pcmk__bundle_internal_constraints,
pcmk__bundle_rsc_colocation_lh,
pcmk__bundle_rsc_colocation_rh,
pcmk__bundle_rsc_location,
pcmk__bundle_action_flags,
pcmk__multi_update_actions,
pcmk__bundle_expand,
pcmk__bundle_append_meta,
}
};
gboolean
update_action_flags(action_t * action, enum pe_action_flags flags, const char *source, int line)
{
static unsigned long calls = 0;
gboolean changed = FALSE;
gboolean clear = is_set(flags, pe_action_clear);
enum pe_action_flags last = action->flags;
if (clear) {
action->flags = crm_clear_bit(source, line, action->uuid, action->flags, flags);
} else {
action->flags = crm_set_bit(source, line, action->uuid, action->flags, flags);
}
if (last != action->flags) {
calls++;
changed = TRUE;
/* Useful for tracking down _who_ changed a specific flag */
/* CRM_ASSERT(calls != 534); */
clear_bit(flags, pe_action_clear);
crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)",
action->uuid, action->node ? action->node->details->uname : "[none]",
clear ? "un-" : "", flags, last, action->flags, calls, source);
}
return changed;
}
static gboolean
check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry,
gboolean active_here, pe_working_set_t * data_set)
{
int attr_lpc = 0;
gboolean force_restart = FALSE;
gboolean delete_resource = FALSE;
gboolean changed = FALSE;
const char *value = NULL;
const char *old_value = NULL;
const char *attr_list[] = {
XML_ATTR_TYPE,
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER
};
for (; attr_lpc < DIMOF(attr_list); attr_lpc++) {
value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
if (value == old_value /* i.e. NULL */
|| crm_str_eq(value, old_value, TRUE)) {
continue;
}
changed = TRUE;
trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
if (active_here) {
force_restart = TRUE;
crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
rsc->id, node->details->uname, attr_list[attr_lpc],
crm_str(old_value), crm_str(value));
}
}
if (force_restart) {
/* make sure the restart happens */
stop_action(rsc, node, FALSE);
set_bit(rsc->flags, pe_rsc_start_pending);
delete_resource = TRUE;
} else if (changed) {
delete_resource = TRUE;
}
return delete_resource;
}
static void
CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node,
const char *reason, pe_working_set_t * data_set)
{
guint interval_ms = 0;
action_t *cancel = NULL;
const char *task = NULL;
const char *call_id = NULL;
const char *interval_ms_s = NULL;
CRM_CHECK(xml_op != NULL, return);
CRM_CHECK(active_node != NULL, return);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
crm_info("Action " CRM_OP_FMT " on %s will be stopped: %s",
rsc->id, task, interval_ms,
active_node->details->uname, (reason? reason : "unknown"));
cancel = pe_cancel_op(rsc, task, interval_ms, active_node, data_set);
add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
}
static gboolean
check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op,
pe_working_set_t * data_set)
{
char *key = NULL;
guint interval_ms = 0;
const char *interval_ms_s = NULL;
const op_digest_cache_t *digest_data = NULL;
gboolean did_change = FALSE;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *digest_secure = NULL;
CRM_CHECK(active_node != NULL, return FALSE);
interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
if (interval_ms > 0) {
xmlNode *op_match = NULL;
/* we need to reconstruct the key because of the way we used to construct resource IDs */
key = pcmk__op_key(rsc->id, task, interval_ms);
pe_rsc_trace(rsc, "Checking parameters for %s", key);
op_match = find_rsc_op_entry(rsc, key);
if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) {
CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
free(key);
return TRUE;
} else if (op_match == NULL) {
pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
free(key);
return TRUE;
}
free(key);
key = NULL;
}
crm_trace("Testing " CRM_OP_FMT " on %s",
rsc->id, task, interval_ms, active_node->details->uname);
if ((interval_ms == 0) && safe_str_eq(task, RSC_STATUS)) {
/* Reload based on the start action not a probe */
task = RSC_START;
} else if ((interval_ms == 0) && safe_str_eq(task, RSC_MIGRATED)) {
/* Reload based on the start action not a migrate */
task = RSC_START;
} else if ((interval_ms == 0) && safe_str_eq(task, RSC_PROMOTE)) {
/* Reload based on the start action not a promote */
task = RSC_START;
}
digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
if(is_set(data_set->flags, pe_flag_sanitized)) {
digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
}
if(digest_data->rc != RSC_DIGEST_MATCH
&& digest_secure
&& digest_data->digest_secure_calc
&& strcmp(digest_data->digest_secure_calc, digest_secure) == 0) {
if (is_set(data_set->flags, pe_flag_stdout)) {
printf("Only 'private' parameters to " CRM_OP_FMT " on %s changed: %s\n",
rsc->id, task, interval_ms, active_node->details->uname,
crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
}
} else if (digest_data->rc == RSC_DIGEST_RESTART) {
/* Changes that force a restart */
pe_action_t *required = NULL;
did_change = TRUE;
key = pcmk__op_key(rsc->id, task, interval_ms);
crm_log_xml_info(digest_data->params_restart, "params:restart");
required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
"resource definition change", pe_action_optional, TRUE);
trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set);
} else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
/* Changes that can potentially be handled by a reload */
const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
did_change = TRUE;
trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set);
crm_log_xml_info(digest_data->params_all, "params:reload");
key = pcmk__op_key(rsc->id, task, interval_ms);
if (interval_ms > 0) {
action_t *op = NULL;
#if 0
/* Always reload/restart the entire resource */
ReloadRsc(rsc, active_node, data_set);
#else
/* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set);
set_bit(op->flags, pe_action_reschedule);
#endif
} else if (digest_restart) {
pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
/* Reload this resource */
ReloadRsc(rsc, active_node, data_set);
free(key);
} else {
pe_action_t *required = NULL;
pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id);
/* Re-send the start/demote/promote op
* Recurring ops will be detected independently
*/
required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
"resource definition change", pe_action_optional, TRUE);
}
}
return did_change;
}
/*!
* \internal
* \brief Do deferred action checks after allocation
*
* \param[in] data_set Working set for cluster
*/
static void
check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op,
enum pe_check_parameters check, pe_working_set_t *data_set)
{
const char *reason = NULL;
op_digest_cache_t *digest_data = NULL;
switch (check) {
case pe_check_active:
if (check_action_definition(rsc, node, rsc_op, data_set)
&& pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
data_set)) {
reason = "action definition changed";
}
break;
case pe_check_last_failure:
digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set);
switch (digest_data->rc) {
case RSC_DIGEST_UNKNOWN:
crm_trace("Resource %s history entry %s on %s has no digest to compare",
rsc->id, ID(rsc_op), node->details->id);
break;
case RSC_DIGEST_MATCH:
break;
default:
reason = "resource parameters have changed";
break;
}
break;
}
if (reason) {
pe__clear_failcount(rsc, node, reason, data_set);
}
}
static void
check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
int offset = -1;
guint interval_ms = 0;
int stop_index = 0;
int start_index = 0;
const char *task = NULL;
const char *interval_ms_s = NULL;
xmlNode *rsc_op = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
CRM_CHECK(node != NULL, return);
if (is_set(rsc->flags, pe_rsc_orphan)) {
resource_t *parent = uber_parent(rsc);
if(parent == NULL
|| pe_rsc_is_clone(parent) == FALSE
|| is_set(parent->flags, pe_rsc_unique)) {
pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
DeleteRsc(rsc, node, FALSE, data_set);
} else {
pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
}
return;
} else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
DeleteRsc(rsc, node, FALSE, data_set);
}
pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
rsc->id, node->details->uname);
return;
}
pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
DeleteRsc(rsc, node, FALSE, data_set);
}
for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_prepend(op_list, rsc_op);
}
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
offset++;
if (start_index < stop_index) {
/* stopped */
continue;
} else if (offset < start_index) {
/* action occurred prior to a start */
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
if ((interval_ms > 0) &&
(is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) {
// Maintenance mode cancels recurring operations
CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
} else if ((interval_ms > 0)
|| safe_str_eq(task, RSC_STATUS)
|| safe_str_eq(task, RSC_START)
|| safe_str_eq(task, RSC_PROMOTE)
|| safe_str_eq(task, RSC_MIGRATED)) {
/* If a resource operation failed, and the operation's definition
* has changed, clear any fail count so they can be retried fresh.
*/
if (pe__bundle_needs_remote_name(rsc)) {
/* We haven't allocated resources to nodes yet, so if the
* REMOTE_CONTAINER_HACK is used, we may calculate the digest
* based on the literal "#uname" value rather than the properly
* substituted value. That would mistakenly make the action
* definition appear to have been changed. Defer the check until
* later in this case.
*/
pe__add_param_check(rsc_op, rsc, node, pe_check_active,
data_set);
} else if (check_action_definition(rsc, node, rsc_op, data_set)
&& pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
data_set)) {
pe__clear_failcount(rsc, node, "action definition changed",
data_set);
}
}
}
g_list_free(sorted_op_list);
}
static GListPtr
find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones,
gboolean partial, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
gboolean match = FALSE;
if (id == NULL) {
return NULL;
}
if (rsc == NULL) {
if (data_set == NULL) {
return NULL;
}
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
result = find_rsc_list(result, child, id, renamed_clones, partial,
NULL);
}
return result;
}
if (partial) {
if (strstr(rsc->id, id)) {
match = TRUE;
} else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
match = TRUE;
}
} else {
if (strcmp(rsc->id, id) == 0) {
match = TRUE;
} else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
match = TRUE;
}
}
if (match) {
result = g_list_prepend(result, rsc);
}
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
}
}
return result;
}
static void
check_actions(pe_working_set_t * data_set)
{
const char *id = NULL;
node_t *node = NULL;
xmlNode *lrm_rscs = NULL;
xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
xmlNode *node_state = NULL;
for (node_state = __xml_first_child_element(status); node_state != NULL;
node_state = __xml_next_element(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
id = crm_element_value(node_state, XML_ATTR_ID);
lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
node = pe_find_node_id(data_set->nodes, id);
if (node == NULL) {
continue;
/* Still need to check actions for a maintenance node to cancel existing monitor operations */
} else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
crm_trace("Skipping param check for %s: can't run resources",
node->details->uname);
continue;
}
crm_trace("Processing node %s", node->details->uname);
if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
xmlNode *rsc_entry = NULL;
for (rsc_entry = __xml_first_child_element(lrm_rscs);
rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
if (xml_has_children(rsc_entry)) {
GListPtr gIter = NULL;
GListPtr result = NULL;
const char *rsc_id = ID(rsc_entry);
CRM_CHECK(rsc_id != NULL, return);
result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
for (gIter = result; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (rsc->variant != pe_native) {
continue;
}
check_actions_for(rsc_entry, rsc, node, data_set);
}
g_list_free(result);
}
}
}
}
}
}
}
static void
apply_placement_constraints(pe_working_set_t * data_set)
{
for (GList *gIter = data_set->placement_constraints;
gIter != NULL; gIter = gIter->next) {
pe__location_t *cons = gIter->data;
cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
}
}
static gboolean
failcount_clear_action_exists(node_t * node, resource_t * rsc)
{
gboolean rc = FALSE;
GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE);
if (list) {
rc = TRUE;
}
g_list_free(list);
return rc;
}
/*!
* \internal
* \brief Force resource away if failures hit migration threshold
*
* \param[in,out] rsc Resource to check for failures
* \param[in,out] node Node to check for failures
* \param[in,out] data_set Cluster working set to update
*/
static void
check_migration_threshold(resource_t *rsc, node_t *node,
pe_working_set_t *data_set)
{
int fail_count, countdown;
resource_t *failed;
/* Migration threshold of 0 means never force away */
if (rsc->migration_threshold == 0) {
return;
}
// If we're ignoring failures, also ignore the migration threshold
if (is_set(rsc->flags, pe_rsc_failure_ignored)) {
return;
}
/* If there are no failures, there's no need to force away */
fail_count = pe_get_failcount(node, rsc, NULL,
pe_fc_effective|pe_fc_fillers, NULL,
data_set);
if (fail_count <= 0) {
return;
}
/* How many more times recovery will be tried on this node */
countdown = QB_MAX(rsc->migration_threshold - fail_count, 0);
/* If failed resource has a parent, we'll force the parent away */
failed = rsc;
if (is_not_set(rsc->flags, pe_rsc_unique)) {
failed = uber_parent(rsc);
}
if (countdown == 0) {
resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
crm_warn("Forcing %s away from %s after %d failures (max=%d)",
failed->id, node->details->uname, fail_count,
rsc->migration_threshold);
} else {
crm_info("%s can fail %d more times on %s before being forced off",
failed->id, countdown, node->details->uname);
}
}
static void
common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
{
if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
common_apply_stickiness(child_rsc, node, data_set);
}
return;
}
if (is_set(rsc->flags, pe_rsc_managed)
&& rsc->stickiness != 0 && pcmk__list_of_1(rsc->running_on)) {
node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (current == NULL) {
} else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
resource_t *sticky_rsc = rsc;
resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
" (node=%s, weight=%d)", sticky_rsc->id,
node->details->uname, rsc->stickiness);
} else {
GHashTableIter iter;
node_t *nIter = NULL;
pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
" and node %s is not explicitly allowed", rsc->id, node->details->uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
}
}
}
/* Check the migration threshold only if a failcount clear action
* has not already been placed for this resource on the node.
* There is no sense in potentially forcing the resource from this
* node if the failcount is being reset anyway.
*
* @TODO A clear_failcount operation can be scheduled in stage4() via
* check_actions_for(), or in stage5() via check_params(). This runs in
* stage2(), so it cannot detect those, meaning we might check the migration
* threshold when we shouldn't -- worst case, we stop or move the resource,
* then move it back next transition.
*/
if (failcount_clear_action_exists(node, rsc) == FALSE) {
check_migration_threshold(rsc, node, data_set);
}
}
void
complex_set_cmds(resource_t * rsc)
{
GListPtr gIter = rsc->children;
rsc->cmds = &resource_class_alloc_functions[rsc->variant];
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
complex_set_cmds(child_rsc);
}
}
void
set_alloc_actions(pe_working_set_t * data_set)
{
GListPtr gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
complex_set_cmds(rsc);
}
}
static void
calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
{
const char *key = (const char *)gKey;
const char *value = (const char *)gValue;
int *system_health = (int *)user_data;
if (!gKey || !gValue || !user_data) {
return;
}
if (crm_starts_with(key, "#health")) {
int score;
/* Convert the value into an integer */
score = char2score(value);
/* Add it to the running total */
*system_health = merge_weights(score, *system_health);
}
}
static gboolean
apply_system_health(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
int base_health = 0;
if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) {
/* Prevent any accidental health -> score translation */
node_score_red = 0;
node_score_yellow = 0;
node_score_green = 0;
return TRUE;
} else if (safe_str_eq(health_strategy, "migrate-on-red")) {
/* Resources on nodes which have health values of red are
* weighted away from that node.
*/
node_score_red = -INFINITY;
node_score_yellow = 0;
node_score_green = 0;
} else if (safe_str_eq(health_strategy, "only-green")) {
/* Resources on nodes which have health values of red or yellow
* are forced away from that node.
*/
node_score_red = -INFINITY;
node_score_yellow = -INFINITY;
node_score_green = 0;
} else if (safe_str_eq(health_strategy, "progressive")) {
/* Same as the above, but use the r/y/g scores provided by the user
* Defaults are provided by the pe_prefs table
* Also, custom health "base score" can be used
*/
base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0");
} else if (safe_str_eq(health_strategy, "custom")) {
/* Requires the admin to configure the rsc_location constaints for
* processing the stored health scores
*/
/* TODO: Check for the existence of appropriate node health constraints */
return TRUE;
} else {
crm_err("Unknown node health strategy: %s", health_strategy);
return FALSE;
}
crm_info("Applying automated node health strategy: %s", health_strategy);
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
int system_health = base_health;
node_t *node = (node_t *) gIter->data;
/* Search through the node hash table for system health entries. */
g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
crm_info(" Node %s has an combined system health of %d",
node->details->uname, system_health);
/* If the health is non-zero, then create a new rsc2node so that the
* weight will be added later on.
*/
if (system_health != 0) {
GListPtr gIter2 = data_set->resources;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
}
}
}
return TRUE;
}
gboolean
stage0(pe_working_set_t * data_set)
{
xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
if (data_set->input == NULL) {
return FALSE;
}
if (is_set(data_set->flags, pe_flag_have_status) == FALSE) {
crm_trace("Calculating status");
cluster_status(data_set);
}
set_alloc_actions(data_set);
apply_system_health(data_set);
unpack_constraints(cib_constraints, data_set);
return TRUE;
}
/*
* Check nodes for resources started outside of the LRM
*/
gboolean
probe_resources(pe_working_set_t * data_set)
{
action_t *probe_node_complete = NULL;
for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
if (node->details->online == FALSE) {
if (pe__is_remote_node(node) && node->details->remote_rsc
&& (get_remote_node_state(node) == remote_state_failed)) {
pe_fence_node(data_set, node, "the connection is unrecoverable");
}
continue;
} else if (node->details->unclean) {
continue;
} else if (node->details->rsc_discovery_enabled == FALSE) {
/* resource discovery is disabled for this node */
continue;
}
if (probed != NULL && crm_is_true(probed) == FALSE) {
action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
continue;
}
for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set);
}
}
return TRUE;
}
static void
rsc_discover_filter(resource_t *rsc, node_t *node)
{
GListPtr gIter = rsc->children;
resource_t *top = uber_parent(rsc);
node_t *match;
if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) {
return;
}
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
rsc_discover_filter(child_rsc, node);
}
match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match && match->rsc_discover_mode != pe_discover_exclusive) {
match->weight = -INFINITY;
}
}
static time_t
shutdown_time(pe_node_t *node, pe_working_set_t *data_set)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
time_t result = 0;
if (shutdown) {
errno = 0;
result = (time_t) crm_int_helper(shutdown, NULL);
if (errno != 0) {
result = 0;
}
}
return result? result : get_effective_time(data_set);
}
static void
apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set)
{
const char *class;
// Only primitives and (uncloned) groups may be locked
if (rsc->variant == pe_group) {
for (GList *item = rsc->children; item != NULL;
item = item->next) {
apply_shutdown_lock((pe_resource_t *) item->data, data_set);
}
} else if (rsc->variant != pe_native) {
return;
}
// Fence devices and remote connections can't be locked
class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if ((class == NULL) || !strcmp(class, PCMK_RESOURCE_CLASS_STONITH)
|| pe__resource_is_remote_conn(rsc, data_set)) {
return;
}
if (rsc->lock_node != NULL) {
// The lock was obtained from resource history
if (rsc->running_on != NULL) {
/* The resource was started elsewhere even though it is now
* considered locked. This shouldn't be possible, but as a
* failsafe, we don't want to disturb the resource now.
*/
pe_rsc_info(rsc,
"Cancelling shutdown lock because %s is already active",
rsc->id);
pe__clear_resource_history(rsc, rsc->lock_node, data_set);
rsc->lock_node = NULL;
rsc->lock_time = 0;
}
// Only a resource active on exactly one node can be locked
} else if (pcmk__list_of_1(rsc->running_on)) {
pe_node_t *node = rsc->running_on->data;
if (node->details->shutdown) {
if (node->details->unclean) {
pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
rsc->id, node->details->uname);
} else {
rsc->lock_node = node;
rsc->lock_time = shutdown_time(node, data_set);
}
}
}
if (rsc->lock_node == NULL) {
// No lock needed
return;
}
if (data_set->shutdown_lock > 0) {
time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock;
pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
rsc->id, rsc->lock_node->details->uname,
(long long) lock_expiration);
pe__update_recheck_time(++lock_expiration, data_set);
} else {
pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
rsc->id, rsc->lock_node->details->uname);
}
// If resource is locked to one node, ban it from all other nodes
for (GList *item = data_set->nodes; item != NULL; item = item->next) {
pe_node_t *node = item->data;
if (strcmp(node->details->uname, rsc->lock_node->details->uname)) {
resource_location(rsc, node, -CRM_SCORE_INFINITY,
XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set);
}
}
}
/*
* \internal
* \brief Stage 2 of cluster status: apply node-specific criteria
*
* Count known nodes, and apply location constraints, stickiness, and exclusive
* resource discovery.
*/
gboolean
stage2(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
apply_shutdown_lock((pe_resource_t *) gIter->data, data_set);
}
}
- for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
- node_t *node = (node_t *) gIter->data;
-
- if (node == NULL) {
- /* error */
-
- } else if (node->weight >= 0.0 /* global weight */
- && node->details->online && node->details->type != node_ping) {
- data_set->max_valid_nodes++;
+ if (is_not_set(data_set->flags, pe_flag_no_compat)) {
+ // @COMPAT API backward compatibility
+ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
+ pe_node_t *node = (pe_node_t *) gIter->data;
+
+ if (node && (node->weight >= 0) && node->details->online
+ && (node->details->type != node_ping)) {
+ data_set->max_valid_nodes++;
+ }
}
}
+ crm_trace("Applying placement constraints");
apply_placement_constraints(data_set);
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
GListPtr gIter2 = NULL;
node_t *node = (node_t *) gIter->data;
gIter2 = data_set->resources;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
common_apply_stickiness(rsc, node, data_set);
rsc_discover_filter(rsc, node);
}
}
return TRUE;
}
/*
* Create internal resource constraints before allocation
*/
gboolean
stage3(pe_working_set_t * data_set)
{
GListPtr gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
rsc->cmds->internal_constraints(rsc, data_set);
}
return TRUE;
}
/*
* Check for orphaned or redefined actions
*/
gboolean
stage4(pe_working_set_t * data_set)
{
check_actions(data_set);
return TRUE;
}
static void *
convert_const_pointer(const void *ptr)
{
/* Worst function ever */
return (void *)ptr;
}
static gint
sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
{
int rc = 0;
int r1_weight = -INFINITY;
int r2_weight = -INFINITY;
const char *reason = "existence";
const GListPtr nodes = (GListPtr) data;
const resource_t *resource1 = a;
const resource_t *resource2 = b;
node_t *r1_node = NULL;
node_t *r2_node = NULL;
GListPtr gIter = NULL;
GHashTable *r1_nodes = NULL;
GHashTable *r2_nodes = NULL;
if (a == NULL && b == NULL) {
goto done;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
reason = "priority";
r1_weight = resource1->priority;
r2_weight = resource2->priority;
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
reason = "no node list";
if (nodes == NULL) {
goto done;
}
r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1),
resource1->id, NULL, NULL, 1,
pe_weights_forward | pe_weights_init);
pe__show_node_weights(true, NULL, resource1->id, r1_nodes);
r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2),
resource2->id, NULL, NULL, 1,
pe_weights_forward | pe_weights_init);
pe__show_node_weights(true, NULL, resource2->id, r2_nodes);
/* Current location score */
reason = "current location";
r1_weight = -INFINITY;
r2_weight = -INFINITY;
if (resource1->running_on) {
r1_node = pe__current_node(resource1);
r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id);
if (r1_node != NULL) {
r1_weight = r1_node->weight;
}
}
if (resource2->running_on) {
r2_node = pe__current_node(resource2);
r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id);
if (r2_node != NULL) {
r2_weight = r2_node->weight;
}
}
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
reason = "score";
for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
r1_node = NULL;
r2_node = NULL;
r1_weight = -INFINITY;
if (r1_nodes) {
r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
}
if (r1_node) {
r1_weight = r1_node->weight;
}
r2_weight = -INFINITY;
if (r2_nodes) {
r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
}
if (r2_node) {
r2_weight = r2_node->weight;
}
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
}
done:
crm_trace("%s (%d) on %s %c %s (%d) on %s: %s",
resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a",
rc < 0 ? '>' : rc > 0 ? '<' : '=',
resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason);
if (r1_nodes) {
g_hash_table_destroy(r1_nodes);
}
if (r2_nodes) {
g_hash_table_destroy(r2_nodes);
}
return rc;
}
static void
allocate_resources(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
/* Allocate remote connection resources first (which will also allocate
* any colocation dependencies). If the connection is migrating, always
* prefer the partial migration target.
*/
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (rsc->is_remote_node == FALSE) {
continue;
}
pe_rsc_trace(rsc, "Allocating remote connection resource '%s'",
rsc->id);
rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set);
}
}
/* now do the rest of the resources */
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (rsc->is_remote_node == TRUE) {
continue;
}
pe_rsc_trace(rsc, "Allocating resource '%s'", rsc->id);
rsc->cmds->allocate(rsc, NULL, data_set);
}
}
/* We always use pe_order_preserve with these convenience functions to exempt
* internally generated constraints from the prohibition of user constraints
* involving remote connection resources.
*
* The start ordering additionally uses pe_order_runnable_left so that the
* specified action is not runnable if the start is not runnable.
*/
static inline void
order_start_then_action(resource_t *lh_rsc, action_t *rh_action,
enum pe_ordering extra, pe_working_set_t *data_set)
{
if (lh_rsc && rh_action && data_set) {
custom_action_order(lh_rsc, start_key(lh_rsc), NULL,
rh_action->rsc, NULL, rh_action,
pe_order_preserve | pe_order_runnable_left | extra,
data_set);
}
}
static inline void
order_action_then_stop(action_t *lh_action, resource_t *rh_rsc,
enum pe_ordering extra, pe_working_set_t *data_set)
{
if (lh_action && rh_rsc && data_set) {
custom_action_order(lh_action->rsc, NULL, lh_action,
rh_rsc, stop_key(rh_rsc), NULL,
pe_order_preserve | extra, data_set);
}
}
// Clear fail counts for orphaned rsc on all online nodes
static void
cleanup_orphans(resource_t * rsc, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node->details->online
&& pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
data_set)) {
pe_action_t *clear_op = NULL;
clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
data_set);
/* We can't use order_action_then_stop() here because its
* pe_order_preserve breaks things
*/
custom_action_order(clear_op->rsc, NULL, clear_op,
rsc, stop_key(rsc), NULL,
pe_order_optional, data_set);
}
}
}
gboolean
stage5(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
int log_prio = show_utilization? LOG_STDOUT : utilization_log_level;
if (safe_str_neq(data_set->placement_strategy, "default")) {
GListPtr nodes = g_list_copy(data_set->nodes);
nodes = sort_nodes_by_weight(nodes, NULL, data_set);
data_set->resources =
g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
g_list_free(nodes);
}
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
dump_node_capacity(log_prio, "Original", node);
}
crm_trace("Allocating services");
/* Take (next) highest resource, assign it and create its actions */
allocate_resources(data_set);
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
dump_node_capacity(log_prio, "Remaining", node);
}
// Process deferred action checks
pe__foreach_param_check(data_set, check_params);
pe__free_param_checks(data_set);
if (is_set(data_set->flags, pe_flag_startup_probes)) {
crm_trace("Calculating needed probes");
/* This code probably needs optimization
* ptest -x with 100 nodes, 100 clones and clone-max=100:
With probes:
ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
36s
ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
Without probes:
ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
*/
probe_resources(data_set);
}
crm_trace("Handle orphans");
if (is_set(data_set->flags, pe_flag_stop_rsc_orphans)) {
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
/* There's no need to recurse into rsc->children because those
* should just be unallocated clone instances.
*/
if (is_set(rsc->flags, pe_rsc_orphan)) {
cleanup_orphans(rsc, data_set);
}
}
}
crm_trace("Creating actions");
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
rsc->cmds->create_actions(rsc, data_set);
}
crm_trace("Creating done");
return TRUE;
}
static gboolean
is_managed(const resource_t * rsc)
{
GListPtr gIter = rsc->children;
if (is_set(rsc->flags, pe_rsc_managed)) {
return TRUE;
}
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
if (is_managed(child_rsc)) {
return TRUE;
}
}
return FALSE;
}
static gboolean
any_managed_resources(pe_working_set_t * data_set)
{
GListPtr gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (is_managed(rsc)) {
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Create pseudo-op for guest node fence, and order relative to it
*
* \param[in] node Guest node to fence
* \param[in] data_set Working set of CIB state
*/
static void
fence_guest(pe_node_t *node, pe_working_set_t *data_set)
{
resource_t *container = node->details->remote_rsc->container;
pe_action_t *stop = NULL;
pe_action_t *stonith_op = NULL;
/* The fence action is just a label; we don't do anything differently for
* off vs. reboot. We specify it explicitly, rather than let it default to
* cluster's default action, because we are not _initiating_ fencing -- we
* are creating a pseudo-event to describe fencing that is already occurring
* by other means (container recovery).
*/
const char *fence_action = "off";
/* Check whether guest's container resource has any explicit stop or
* start (the stop may be implied by fencing of the guest's host).
*/
if (container) {
stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL);
if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) {
fence_action = "reboot";
}
}
/* Create a fence pseudo-event, so we have an event to order actions
* against, and the controller can always detect it.
*/
stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", data_set);
update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable,
__FUNCTION__, __LINE__);
/* We want to imply stops/demotes after the guest is stopped, not wait until
* it is restarted, so we always order pseudo-fencing after stop, not start
* (even though start might be closer to what is done for a real reboot).
*/
if(stop && is_set(stop->flags, pe_action_pseudo)) {
pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, data_set);
crm_info("Implying guest node %s is down (action %d) after %s fencing",
node->details->uname, stonith_op->id, stop->node->details->uname);
order_actions(parent_stonith_op, stonith_op,
pe_order_runnable_left|pe_order_implies_then);
} else if (stop) {
order_actions(stop, stonith_op,
pe_order_runnable_left|pe_order_implies_then);
crm_info("Implying guest node %s is down (action %d) "
"after container %s is stopped (action %d)",
node->details->uname, stonith_op->id,
container->id, stop->id);
} else {
/* If we're fencing the guest node but there's no stop for the guest
* resource, we must think the guest is already stopped. However, we may
* think so because its resource history was just cleaned. To avoid
* unnecessarily considering the guest node down if it's really up,
* order the pseudo-fencing after any stop of the connection resource,
* which will be ordered after any container (re-)probe.
*/
stop = find_first_action(node->details->remote_rsc->actions, NULL,
RSC_STOP, NULL);
if (stop) {
order_actions(stop, stonith_op, pe_order_optional);
crm_info("Implying guest node %s is down (action %d) "
"after connection is stopped (action %d)",
node->details->uname, stonith_op->id, stop->id);
} else {
/* Not sure why we're fencing, but everything must already be
* cleanly stopped.
*/
crm_info("Implying guest node %s is down (action %d) ",
node->details->uname, stonith_op->id);
}
}
/* Order/imply other actions relative to pseudo-fence as with real fence */
pcmk__order_vs_fence(stonith_op, data_set);
}
/*
* Create dependencies for stonith and shutdown operations
*/
gboolean
stage6(pe_working_set_t * data_set)
{
action_t *dc_down = NULL;
action_t *stonith_op = NULL;
gboolean integrity_lost = FALSE;
gboolean need_stonith = TRUE;
GListPtr gIter;
GListPtr stonith_ops = NULL;
GList *shutdown_ops = NULL;
/* Remote ordering constraints need to happen prior to calculating fencing
* because it is one more place we will mark the node as dirty.
*
* A nice side effect of doing them early is that apply_*_ordering() can be
* simpler because pe_fence_node() has already done some of the work.
*/
crm_trace("Creating remote ordering constraints");
apply_remote_node_ordering(data_set);
crm_trace("Processing fencing and shutdown cases");
if (any_managed_resources(data_set) == FALSE) {
crm_notice("Delaying fencing operations until there are resources to manage");
need_stonith = FALSE;
}
/* Check each node for stonith/shutdown */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
/* Guest nodes are "fenced" by recovering their container resource,
* so handle them separately.
*/
if (pe__is_guest_node(node)) {
if (node->details->remote_requires_reset && need_stonith
&& pe_can_fence(data_set, node)) {
fence_guest(node, data_set);
}
continue;
}
stonith_op = NULL;
if (node->details->unclean
&& need_stonith && pe_can_fence(data_set, node)) {
stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", data_set);
pe_warn("Scheduling Node %s for STONITH", node->details->uname);
pcmk__order_vs_fence(stonith_op, data_set);
if (node->details->is_dc) {
// Remember if the DC is being fenced
dc_down = stonith_op;
} else {
if (is_not_set(data_set->flags, pe_flag_concurrent_fencing)
&& (stonith_ops != NULL)) {
/* Concurrent fencing is disabled, so order each non-DC
* fencing in a chain. If there is any DC fencing or
* shutdown, it will be ordered after the last action in the
* chain later.
*/
order_actions((pe_action_t *) stonith_ops->data,
stonith_op, pe_order_optional);
}
// Remember all non-DC fencing actions in a separate list
stonith_ops = g_list_prepend(stonith_ops, stonith_op);
}
} else if (node->details->online && node->details->shutdown &&
/* TODO define what a shutdown op means for a remote node.
* For now we do not send shutdown operations for remote nodes, but
* if we can come up with a good use for this in the future, we will. */
pe__is_guest_or_remote_node(node) == FALSE) {
action_t *down_op = sched_shutdown_op(node, data_set);
if (node->details->is_dc) {
// Remember if the DC is being shut down
dc_down = down_op;
} else {
// Remember non-DC shutdowns for later ordering
shutdown_ops = g_list_prepend(shutdown_ops, down_op);
}
}
if (node->details->unclean && stonith_op == NULL) {
integrity_lost = TRUE;
pe_warn("Node %s is unclean!", node->details->uname);
}
}
if (integrity_lost) {
if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
} else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) {
crm_notice("Cannot fence unclean nodes until quorum is"
" attained (or no-quorum-policy is set to ignore)");
}
}
if (dc_down != NULL) {
/* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
* DC elections. However, we don't want to order non-DC shutdowns before
* a DC *fencing*, because even though we don't want a node that's
* shutting down to become DC, the DC fencing could be ordered before a
* clone stop that's also ordered before the shutdowns, thus leading to
* a graph loop.
*/
if (safe_str_eq(dc_down->task, CRM_OP_SHUTDOWN)) {
for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) {
action_t *node_stop = (action_t *) gIter->data;
crm_debug("Ordering shutdown on %s before %s on DC %s",
node_stop->node->details->uname,
dc_down->task, dc_down->node->details->uname);
order_actions(node_stop, dc_down, pe_order_optional);
}
}
// Order any non-DC fencing before any DC fencing or shutdown
if (is_set(data_set->flags, pe_flag_concurrent_fencing)) {
/* With concurrent fencing, order each non-DC fencing action
* separately before any DC fencing or shutdown.
*/
for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) {
order_actions((pe_action_t *) gIter->data, dc_down,
pe_order_optional);
}
} else if (stonith_ops) {
/* Without concurrent fencing, the non-DC fencing actions are
* already ordered relative to each other, so we just need to order
* the DC fencing after the last action in the chain (which is the
* first item in the list).
*/
order_actions((pe_action_t *) stonith_ops->data, dc_down,
pe_order_optional);
}
}
g_list_free(stonith_ops);
g_list_free(shutdown_ops);
return TRUE;
}
/*
* Determine the sets of independent actions and the correct order for the
* actions in each set.
*
* Mark dependencies of un-runnable actions un-runnable
*
*/
static GListPtr
find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key)
{
GListPtr list = NULL;
list = find_actions(actions, original_key, NULL);
if (list == NULL) {
/* we're potentially searching a child of the original resource */
char *key = NULL;
char *task = NULL;
guint interval_ms = 0;
if (parse_op_key(original_key, NULL, &task, &interval_ms)) {
key = pcmk__op_key(rsc->id, task, interval_ms);
list = find_actions(actions, key, NULL);
} else {
crm_err("search key: %s", original_key);
}
free(key);
free(task);
}
return list;
}
static void
rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc,
pe__ordering_t *order)
{
GListPtr gIter = NULL;
GListPtr rh_actions = NULL;
action_t *rh_action = NULL;
enum pe_ordering type;
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(order != NULL, return);
type = order->type;
rh_action = order->rh_action;
crm_trace("Processing RH of ordering constraint %d", order->id);
if (rh_action != NULL) {
rh_actions = g_list_prepend(NULL, rh_action);
} else if (rsc != NULL) {
rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
}
if (rh_actions == NULL) {
pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
" ignoring", rsc->id, order->rh_action_task);
if (lh_action) {
pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
}
return;
}
if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) {
pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
order->rh_action_task);
clear_bit(type, pe_order_implies_then);
}
gIter = rh_actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *rh_action_iter = (action_t *) gIter->data;
if (lh_action) {
order_actions(lh_action, rh_action_iter, type);
} else if (type & pe_order_implies_then) {
update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
} else {
crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
}
}
g_list_free(rh_actions);
}
static void
rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order,
pe_working_set_t *data_set)
{
GListPtr gIter = NULL;
GListPtr lh_actions = NULL;
action_t *lh_action = order->lh_action;
resource_t *rh_rsc = order->rh_rsc;
crm_trace("Processing LH of ordering constraint %d", order->id);
CRM_ASSERT(lh_rsc != NULL);
if (lh_action != NULL) {
lh_actions = g_list_prepend(NULL, lh_action);
} else {
lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
}
if (lh_actions == NULL && lh_rsc != rh_rsc) {
char *key = NULL;
char *op_type = NULL;
guint interval_ms = 0;
parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms);
key = pcmk__op_key(lh_rsc->id, op_type, interval_ms);
if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) {
free(key);
pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
} else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) {
free(key);
pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
} else {
pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
lh_actions = g_list_prepend(NULL, lh_action);
}
free(op_type);
}
gIter = lh_actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *lh_action_iter = (action_t *) gIter->data;
if (rh_rsc == NULL && order->rh_action) {
rh_rsc = order->rh_action->rsc;
}
if (rh_rsc) {
rsc_order_then(lh_action_iter, rh_rsc, order);
} else if (order->rh_action) {
order_actions(lh_action_iter, order->rh_action, order->type);
}
}
g_list_free(lh_actions);
}
extern void update_colo_start_chain(pe_action_t *action,
pe_working_set_t *data_set);
static int
is_recurring_action(action_t *action)
{
const char *interval_ms_s = g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS);
guint interval_ms = crm_parse_ms(interval_ms_s);
return (interval_ms > 0);
}
static void
apply_container_ordering(action_t *action, pe_working_set_t *data_set)
{
/* VMs are also classified as containers for these purposes... in
* that they both involve a 'thing' running on a real or remote
* cluster node.
*
* This allows us to be smarter about the type and extent of
* recovery actions required in various scenarios
*/
resource_t *remote_rsc = NULL;
resource_t *container = NULL;
enum action_tasks task = text2task(action->task);
CRM_ASSERT(action->rsc);
CRM_ASSERT(action->node);
CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc);
container = remote_rsc->container;
CRM_ASSERT(container);
if(is_set(container->flags, pe_rsc_failed)) {
pe_fence_node(data_set, action->node, "container failed");
}
crm_trace("Order %s action %s relative to %s%s for %s%s",
action->task, action->uuid,
is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
remote_rsc->id,
is_set(container->flags, pe_rsc_failed)? "failed " : "",
container->id);
if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
|| safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
/* Migration ops map to "no_action", but we need to apply the same
* ordering as for stop or demote (see get_router_node()).
*/
task = stop_rsc;
}
switch (task) {
case start_rsc:
case action_promote:
/* Force resource recovery if the container is recovered */
order_start_then_action(container, action, pe_order_implies_then,
data_set);
/* Wait for the connection resource to be up too */
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
break;
case stop_rsc:
case action_demote:
if (is_set(container->flags, pe_rsc_failed)) {
/* When the container representing a guest node fails, any stop
* or demote actions for resources running on the guest node
* are implied by the container stopping. This is similar to
* how fencing operations work for cluster nodes and remote
* nodes.
*/
} else {
/* Ensure the operation happens before the connection is brought
* down.
*
* If we really wanted to, we could order these after the
* connection start, IFF the container's current role was
* stopped (otherwise we re-introduce an ordering loop when the
* connection is restarting).
*/
order_action_then_stop(action, remote_rsc, pe_order_none,
data_set);
}
break;
default:
/* Wait for the connection resource to be up */
if (is_recurring_action(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
if(task != no_action) {
order_start_then_action(remote_rsc, action,
pe_order_implies_then, data_set);
}
} else {
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
}
break;
}
}
static enum remote_connection_state
get_remote_node_state(pe_node_t *node)
{
resource_t *remote_rsc = NULL;
node_t *cluster_node = NULL;
CRM_ASSERT(node);
remote_rsc = node->details->remote_rsc;
CRM_ASSERT(remote_rsc);
cluster_node = pe__current_node(remote_rsc);
/* If the cluster node the remote connection resource resides on
* is unclean or went offline, we can't process any operations
* on that remote node until after it starts elsewhere.
*/
if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) {
/* The connection resource is not going to run anywhere */
if (cluster_node && cluster_node->details->unclean) {
/* The remote connection is failed because its resource is on a
* failed node and can't be recovered elsewhere, so we must fence.
*/
return remote_state_failed;
}
if (is_not_set(remote_rsc->flags, pe_rsc_failed)) {
/* Connection resource is cleanly stopped */
return remote_state_stopped;
}
/* Connection resource is failed */
if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
&& remote_rsc->remote_reconnect_ms
&& node->details->remote_was_fenced
&& !pe__shutdown_requested(node)) {
/* We won't know whether the connection is recoverable until the
* reconnect interval expires and we reattempt connection.
*/
return remote_state_unknown;
}
/* The remote connection is in a failed state. If there are any
* resources known to be active on it (stop) or in an unknown state
* (probe), we must assume the worst and fence it.
*/
return remote_state_failed;
} else if (cluster_node == NULL) {
/* Connection is recoverable but not currently running anywhere, see if we can recover it first */
return remote_state_unknown;
} else if(cluster_node->details->unclean == TRUE
|| cluster_node->details->online == FALSE) {
/* Connection is running on a dead node, see if we can recover it first */
return remote_state_resting;
} else if (pcmk__list_of_multiple(remote_rsc->running_on)
&& remote_rsc->partial_migration_source
&& remote_rsc->partial_migration_target) {
/* We're in the middle of migrating a connection resource,
* wait until after the resource migrates before performing
* any actions.
*/
return remote_state_resting;
}
return remote_state_alive;
}
/*!
* \internal
* \brief Order actions on remote node relative to actions for the connection
*/
static void
apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
{
resource_t *remote_rsc = NULL;
enum action_tasks task = text2task(action->task);
enum remote_connection_state state = get_remote_node_state(action->node);
enum pe_ordering order_opts = pe_order_none;
if (action->rsc == NULL) {
return;
}
CRM_ASSERT(action->node);
CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc);
crm_trace("Order %s action %s relative to %s%s (state: %s)",
action->task, action->uuid,
is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
remote_rsc->id, state2text(state));
if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
|| safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
/* Migration ops map to "no_action", but we need to apply the same
* ordering as for stop or demote (see get_router_node()).
*/
task = stop_rsc;
}
switch (task) {
case start_rsc:
case action_promote:
order_opts = pe_order_none;
if (state == remote_state_failed) {
/* Force recovery, by making this action required */
order_opts |= pe_order_implies_then;
}
/* Ensure connection is up before running this action */
order_start_then_action(remote_rsc, action, order_opts, data_set);
break;
case stop_rsc:
if(state == remote_state_alive) {
order_action_then_stop(action, remote_rsc,
pe_order_implies_first, data_set);
} else if(state == remote_state_failed) {
/* The resource is active on the node, but since we don't have a
* valid connection, the only way to stop the resource is by
* fencing the node. There is no need to order the stop relative
* to the remote connection, since the stop will become implied
* by the fencing.
*/
pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable");
} else if(remote_rsc->next_role == RSC_ROLE_STOPPED) {
/* State must be remote_state_unknown or remote_state_stopped.
* Since the connection is not coming back up in this
* transition, stop this resource first.
*/
order_action_then_stop(action, remote_rsc,
pe_order_implies_first, data_set);
} else {
/* The connection is going to be started somewhere else, so
* stop this resource after that completes.
*/
order_start_then_action(remote_rsc, action, pe_order_none, data_set);
}
break;
case action_demote:
/* Only order this demote relative to the connection start if the
* connection isn't being torn down. Otherwise, the demote would be
* blocked because the connection start would not be allowed.
*/
if(state == remote_state_resting || state == remote_state_unknown) {
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
} /* Otherwise we can rely on the stop ordering */
break;
default:
/* Wait for the connection resource to be up */
if (is_recurring_action(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
order_start_then_action(remote_rsc, action,
pe_order_implies_then, data_set);
} else {
node_t *cluster_node = pe__current_node(remote_rsc);
if(task == monitor_rsc && state == remote_state_failed) {
/* We would only be here if we do not know the
* state of the resource on the remote node.
* Since we have no way to find out, it is
* necessary to fence the node.
*/
pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable");
}
if(cluster_node && state == remote_state_stopped) {
/* The connection is currently up, but is going
* down permanently.
*
* Make sure we check services are actually
* stopped _before_ we let the connection get
* closed
*/
order_action_then_stop(action, remote_rsc,
pe_order_runnable_left, data_set);
} else {
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
}
}
break;
}
}
static void
apply_remote_node_ordering(pe_working_set_t *data_set)
{
if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
return;
}
for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
resource_t *remote = NULL;
// We are only interested in resource actions
if (action->rsc == NULL) {
continue;
}
/* Special case: If we are clearing the failcount of an actual
* remote connection resource, then make sure this happens before
* any start of the resource in this transition.
*/
if (action->rsc->is_remote_node &&
safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) {
custom_action_order(action->rsc,
NULL,
action,
action->rsc,
pcmk__op_key(action->rsc->id, RSC_START, 0),
NULL,
pe_order_optional,
data_set);
continue;
}
// We are only interested in actions allocated to a node
if (action->node == NULL) {
continue;
}
if (!pe__is_guest_or_remote_node(action->node)) {
continue;
}
/* We are only interested in real actions.
*
* @TODO This is probably wrong; pseudo-actions might be converted to
* real actions and vice versa later in update_actions() at the end of
* stage7().
*/
if (is_set(action->flags, pe_action_pseudo)) {
continue;
}
remote = action->node->details->remote_rsc;
if (remote == NULL) {
// Orphaned
continue;
}
/* Another special case: if a resource is moving to a Pacemaker Remote
* node, order the stop on the original node after any start of the
* remote connection. This ensures that if the connection fails to
* start, we leave the resource running on the original node.
*/
if (safe_str_eq(action->task, RSC_START)) {
for (GList *item = action->rsc->actions; item != NULL;
item = item->next) {
pe_action_t *rsc_action = item->data;
if ((rsc_action->node->details != action->node->details)
&& safe_str_eq(rsc_action->task, RSC_STOP)) {
custom_action_order(remote, start_key(remote), NULL,
action->rsc, NULL, rsc_action,
pe_order_optional, data_set);
}
}
}
/* The action occurs across a remote connection, so create
* ordering constraints that guarantee the action occurs while the node
* is active (after start, before stop ... things like that).
*
* This is somewhat brittle in that we need to make sure the results of
* this ordering are compatible with the result of get_router_node().
* It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
* of this logic rather than action2xml().
*/
if (remote->container) {
crm_trace("Container ordering for %s", action->uuid);
apply_container_ordering(action, data_set);
} else {
crm_trace("Remote ordering for %s", action->uuid);
apply_remote_ordering(action, data_set);
}
}
}
static gboolean
order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action)
{
/* No need to probe the resource on the node that is being
* unfenced. Otherwise it might introduce transition loop
* since probe will be performed after the node is
* unfenced.
*/
if (safe_str_eq(rh_action->task, CRM_OP_FENCE)
&& probe->node && rh_action->node
&& probe->node->details == rh_action->node->details) {
const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action");
if (safe_str_eq(op, "on")) {
return TRUE;
}
}
// Shutdown waits for probe to complete only if it's on the same node
if ((safe_str_eq(rh_action->task, CRM_OP_SHUTDOWN))
&& probe->node && rh_action->node
&& probe->node->details != rh_action->node->details) {
return TRUE;
}
return FALSE;
}
static void
order_first_probes_imply_stops(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
pe__ordering_t *order = gIter->data;
enum pe_ordering order_type = pe_order_optional;
pe_resource_t *lh_rsc = order->lh_rsc;
pe_resource_t *rh_rsc = order->rh_rsc;
pe_action_t *lh_action = order->lh_action;
pe_action_t *rh_action = order->rh_action;
const char *lh_action_task = order->lh_action_task;
const char *rh_action_task = order->rh_action_task;
GListPtr probes = NULL;
GListPtr rh_actions = NULL;
GListPtr pIter = NULL;
if (lh_rsc == NULL) {
continue;
} else if (rh_rsc && lh_rsc == rh_rsc) {
continue;
}
if (lh_action == NULL && lh_action_task == NULL) {
continue;
}
if (rh_action == NULL && rh_action_task == NULL) {
continue;
}
/* Technically probe is expected to return "not running", which could be
* the alternative of stop action if the status of the resource is
* unknown yet.
*/
if (lh_action && safe_str_neq(lh_action->task, RSC_STOP)) {
continue;
} else if (lh_action == NULL
&& lh_action_task
&& crm_ends_with(lh_action_task, "_" RSC_STOP "_0") == FALSE) {
continue;
}
/* Do not probe the resource inside of a stopping container. Otherwise
* it might introduce transition loop since probe will be performed
* after the container starts again.
*/
if (rh_rsc && lh_rsc->container == rh_rsc) {
if (rh_action && safe_str_eq(rh_action->task, RSC_STOP)) {
continue;
} else if (rh_action == NULL && rh_action_task
&& crm_ends_with(rh_action_task,"_" RSC_STOP "_0")) {
continue;
}
}
if (order->type == pe_order_none) {
continue;
}
// Preserve the order options for future filtering
if (is_set(order->type, pe_order_apply_first_non_migratable)) {
set_bit(order_type, pe_order_apply_first_non_migratable);
}
if (is_set(order->type, pe_order_same_node)) {
set_bit(order_type, pe_order_same_node);
}
// Keep the order types for future filtering
if (order->type == pe_order_anti_colocation
|| order->type == pe_order_load) {
order_type = order->type;
}
probes = pe__resource_actions(lh_rsc, NULL, RSC_STATUS, FALSE);
if (probes == NULL) {
continue;
}
if (rh_action) {
rh_actions = g_list_prepend(rh_actions, rh_action);
} else if (rh_rsc && rh_action_task) {
rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL);
}
if (rh_actions == NULL) {
g_list_free(probes);
continue;
}
crm_trace("Processing for LH probe based on ordering constraint %s -> %s"
" (id=%d, type=%.6x)",
lh_action ? lh_action->uuid : lh_action_task,
rh_action ? rh_action->uuid : rh_action_task,
order->id, order->type);
for (pIter = probes; pIter != NULL; pIter = pIter->next) {
pe_action_t *probe = (pe_action_t *) pIter->data;
GListPtr rIter = NULL;
for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) {
pe_action_t *rh_action_iter = (pe_action_t *) rIter->data;
if (order_first_probe_unneeded(probe, rh_action_iter)) {
continue;
}
order_actions(probe, rh_action_iter, order_type);
}
}
g_list_free(rh_actions);
g_list_free(probes);
}
}
static void
order_first_probe_then_restart_repromote(pe_action_t * probe,
pe_action_t * after,
pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
bool interleave = FALSE;
pe_resource_t *compatible_rsc = NULL;
if (probe == NULL
|| probe->rsc == NULL
|| probe->rsc->variant != pe_native) {
return;
}
if (after == NULL
// Avoid running into any possible loop
|| is_set(after->flags, pe_action_tracking)) {
return;
}
if (safe_str_neq(probe->task, RSC_STATUS)) {
return;
}
pe_set_action_bit(after, pe_action_tracking);
crm_trace("Processing based on %s %s -> %s %s",
probe->uuid,
probe->node ? probe->node->details->uname: "",
after->uuid,
after->node ? after->node->details->uname : "");
if (after->rsc
/* Better not build a dependency directly with a clone/group.
* We are going to proceed through the ordering chain and build
* dependencies with its children.
*/
&& after->rsc->variant == pe_native
&& probe->rsc != after->rsc) {
GListPtr then_actions = NULL;
enum pe_ordering probe_order_type = pe_order_optional;
if (safe_str_eq(after->task, RSC_START)) {
then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, FALSE);
} else if (safe_str_eq(after->task, RSC_PROMOTE)) {
then_actions = pe__resource_actions(after->rsc, NULL, RSC_DEMOTE, FALSE);
}
for (gIter = then_actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *then = (pe_action_t *) gIter->data;
// Skip any pseudo action which for example is implied by fencing
if (is_set(then->flags, pe_action_pseudo)) {
continue;
}
order_actions(probe, then, probe_order_type);
}
g_list_free(then_actions);
}
if (after->rsc
&& after->rsc->variant > pe_group) {
const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
XML_RSC_ATTR_INTERLEAVE);
interleave = crm_is_true(interleave_s);
if (interleave) {
/* For an interleaved clone, we should build a dependency only
* with the relevant clone child.
*/
compatible_rsc = find_compatible_child(probe->rsc,
after->rsc,
RSC_ROLE_UNKNOWN,
FALSE, data_set);
}
}
for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) {
pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) gIter->data;
/* pe_order_implies_then is the reason why a required A.start
* implies/enforces B.start to be required too, which is the cause of
* B.restart/re-promote.
*
* Not sure about pe_order_implies_then_on_node though. It's now only
* used for unfencing case, which tends to introduce transition
* loops...
*/
if (is_not_set(after_wrapper->type, pe_order_implies_then)) {
/* The order type between a group/clone and its child such as
* B.start-> B_child.start is:
* pe_order_implies_first_printed | pe_order_runnable_left
*
* Proceed through the ordering chain and build dependencies with
* its children.
*/
if (after->rsc == NULL
|| after->rsc->variant < pe_group
|| probe->rsc->parent == after->rsc
|| after_wrapper->action->rsc == NULL
|| after_wrapper->action->rsc->variant > pe_group
|| after->rsc != after_wrapper->action->rsc->parent) {
continue;
}
/* Proceed to the children of a group or a non-interleaved clone.
* For an interleaved clone, proceed only to the relevant child.
*/
if (after->rsc->variant > pe_group
&& interleave == TRUE
&& (compatible_rsc == NULL
|| compatible_rsc != after_wrapper->action->rsc)) {
continue;
}
}
crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)",
after->uuid,
after->node ? after->node->details->uname: "",
after_wrapper->action->uuid,
after_wrapper->action->node ? after_wrapper->action->node->details->uname : "",
after_wrapper->type);
order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
}
}
static void clear_actions_tracking_flag(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (is_set(action->flags, pe_action_tracking)) {
pe_clear_action_bit(action, pe_action_tracking);
}
}
}
static void
order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
GListPtr probes = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t * child = (pe_resource_t *) gIter->data;
order_first_rsc_probes(child, data_set);
}
if (rsc->variant != pe_native) {
return;
}
probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
for (gIter = probes; gIter != NULL; gIter= gIter->next) {
pe_action_t *probe = (pe_action_t *) gIter->data;
GListPtr aIter = NULL;
for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) {
pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) aIter->data;
order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
clear_actions_tracking_flag(data_set);
}
}
g_list_free(probes);
}
static void
order_first_probes(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
order_first_rsc_probes(rsc, data_set);
}
order_first_probes_imply_stops(data_set);
}
static void
order_then_probes(pe_working_set_t * data_set)
{
#if 0
GListPtr gIter = NULL;
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
/* Given "A then B", we would prefer to wait for A to be
* started before probing B.
*
* If A was a filesystem on which the binaries and data for B
* lived, it would have been useful if the author of B's agent
* could assume that A is running before B.monitor will be
* called.
*
* However we can't _only_ probe once A is running, otherwise
* we'd not detect the state of B if A could not be started
* for some reason.
*
* In practice however, we cannot even do an opportunistic
* version of this because B may be moving:
*
* B.probe -> B.start
* B.probe -> B.stop
* B.stop -> B.start
* A.stop -> A.start
* A.start -> B.probe
*
* So far so good, but if we add the result of this code:
*
* B.stop -> A.stop
*
* Then we get a loop:
*
* B.probe -> B.stop -> A.stop -> A.start -> B.probe
*
* We could kill the 'B.probe -> B.stop' dependency, but that
* could mean stopping B "too" soon, because B.start must wait
* for the probes to complete.
*
* Another option is to allow it only if A is a non-unique
* clone with clone-max == node-max (since we'll never be
* moving it). However, we could still be stopping one
* instance at the same time as starting another.
* The complexity of checking for allowed conditions combined
* with the ever narrowing usecase suggests that this code
* should remain disabled until someone gets smarter.
*/
action_t *start = NULL;
GListPtr actions = NULL;
GListPtr probes = NULL;
actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
if (actions) {
start = actions->data;
g_list_free(actions);
}
if(start == NULL) {
crm_err("No start action for %s", rsc->id);
continue;
}
probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
for (actions = start->actions_before; actions != NULL; actions = actions->next) {
action_wrapper_t *before = (action_wrapper_t *) actions->data;
GListPtr pIter = NULL;
action_t *first = before->action;
resource_t *first_rsc = first->rsc;
if(first->required_runnable_before) {
GListPtr clone_actions = NULL;
for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) {
before = (action_wrapper_t *) clone_actions->data;
crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid);
CRM_ASSERT(before->action->rsc);
first_rsc = before->action->rsc;
break;
}
} else if(safe_str_neq(first->task, RSC_START)) {
crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
}
if(first_rsc == NULL) {
continue;
} else if(uber_parent(first_rsc) == uber_parent(start->rsc)) {
crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
continue;
} else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) {
crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
continue;
}
crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant);
for (pIter = probes; pIter != NULL; pIter = pIter->next) {
action_t *probe = (action_t *) pIter->data;
crm_err("Ordering %s before %s", first->uuid, probe->uuid);
order_actions(first, probe, pe_order_optional);
}
}
}
#endif
}
static void
order_probes(pe_working_set_t * data_set)
{
order_first_probes(data_set);
order_then_probes(data_set);
}
gboolean
stage7(pe_working_set_t * data_set)
{
GList *gIter = NULL;
crm_trace("Applying ordering constraints");
/* Don't ask me why, but apparently they need to be processed in
* the order they were created in... go figure
*
* Also g_list_append() has horrendous performance characteristics
* So we need to use g_list_prepend() and then reverse the list here
*/
data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
pe__ordering_t *order = gIter->data;
resource_t *rsc = order->lh_rsc;
crm_trace("Applying ordering constraint: %d", order->id);
if (rsc != NULL) {
crm_trace("rsc_action-to-*");
rsc_order_first(rsc, order, data_set);
continue;
}
rsc = order->rh_rsc;
if (rsc != NULL) {
crm_trace("action-to-rsc_action");
rsc_order_then(order->lh_action, rsc, order);
} else {
crm_trace("action-to-action");
order_actions(order->lh_action, order->rh_action, order->type);
}
}
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
update_colo_start_chain(action, data_set);
}
crm_trace("Ordering probes");
order_probes(data_set);
crm_trace("Updating %d actions", g_list_length(data_set->actions));
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
update_action(action, data_set);
}
// Check for invalid orderings
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
pe_action_wrapper_t *input = NULL;
for (GList *input_iter = action->actions_before;
input_iter != NULL; input_iter = input_iter->next) {
input = (pe_action_wrapper_t *) input_iter->data;
if (pcmk__ordering_is_invalid(action, input)) {
input->type = pe_order_none;
}
}
}
LogNodeActions(data_set, FALSE);
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
LogActions(rsc, data_set, FALSE);
}
return TRUE;
}
static int transition_id = -1;
/*!
* \internal
* \brief Log a message after calculating a transition
*
* \param[in] filename Where transition input is stored
*/
void
pcmk__log_transition_summary(const char *filename)
{
if (was_processing_error) {
crm_err("Calculated transition %d (with errors), saving inputs in %s",
transition_id, filename);
} else if (was_processing_warning) {
crm_warn("Calculated transition %d (with warnings), saving inputs in %s",
transition_id, filename);
} else {
crm_notice("Calculated transition %d, saving inputs in %s",
transition_id, filename);
}
if (crm_config_error) {
crm_notice("Configuration errors found during scheduler processing,"
" please run \"crm_verify -L\" to identify issues");
}
}
/*
* Create a dependency graph to send to the transitioner (via the controller)
*/
gboolean
stage8(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
const char *value = NULL;
transition_id++;
crm_trace("Creating transition graph %d.", transition_id);
data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
value = pe_pref(data_set->config_hash, "cluster-delay");
crm_xml_add(data_set->graph, "cluster-delay", value);
value = pe_pref(data_set->config_hash, "stonith-timeout");
crm_xml_add(data_set->graph, "stonith-timeout", value);
crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
if (is_set(data_set->flags, pe_flag_start_failure_fatal)) {
crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
} else {
crm_xml_add(data_set->graph, "failed-start-offset", "1");
}
value = pe_pref(data_set->config_hash, "batch-limit");
crm_xml_add(data_set->graph, "batch-limit", value);
crm_xml_add_int(data_set->graph, "transition_id", transition_id);
value = pe_pref(data_set->config_hash, "migration-limit");
if (crm_int_helper(value, NULL) > 0) {
crm_xml_add(data_set->graph, "migration-limit", value);
}
if (data_set->recheck_by > 0) {
char *recheck_epoch = NULL;
recheck_epoch = crm_strdup_printf("%llu",
(long long) data_set->recheck_by);
crm_xml_add(data_set->graph, "recheck-by", recheck_epoch);
free(recheck_epoch);
}
/* errors...
slist_iter(action, action_t, action_list, lpc,
if(action->optional == FALSE && action->runnable == FALSE) {
print_action("Ignoring", action, TRUE);
}
);
*/
/* The following code will de-duplicate action inputs, so nothing past this
* should rely on the action input type flags retaining their original
* values.
*/
gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
rsc->cmds->expand(rsc, data_set);
}
crm_log_xml_trace(data_set->graph, "created resource-driven action list");
/* pseudo action to distribute list of nodes with maintenance state update */
add_maintenance_update(data_set);
/* catch any non-resource specific actions */
crm_trace("processing non-resource actions");
gIter = data_set->actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (action->rsc
&& action->node
&& action->node->details->shutdown
&& is_not_set(action->rsc->flags, pe_rsc_maintenance)
&& is_not_set(action->flags, pe_action_optional)
&& is_not_set(action->flags, pe_action_runnable)
&& crm_str_eq(action->task, RSC_STOP, TRUE)
) {
/* Eventually we should just ignore the 'fence' case
* But for now it's the best way to detect (in CTS) when
* CIB resource updates are being lost
*/
if (is_set(data_set->flags, pe_flag_have_quorum)
|| data_set->no_quorum_policy == no_quorum_ignore) {
crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)",
action->node->details->unclean ? "fence" : "shut down",
action->node->details->uname, action->rsc->id,
is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked",
is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "",
action->uuid);
}
}
graph_element_from_action(action, data_set);
}
crm_log_xml_trace(data_set->graph, "created generic action list");
crm_trace("Created transition graph %d.", transition_id);
return TRUE;
}
void
LogNodeActions(pe_working_set_t * data_set, gboolean terminal)
{
GListPtr gIter = NULL;
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
char *node_name = NULL;
char *task = NULL;
action_t *action = (action_t *) gIter->data;
if (action->rsc != NULL) {
continue;
} else if (is_set(action->flags, pe_action_optional)) {
continue;
}
if (pe__is_guest_node(action->node)) {
node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id);
} else if(action->node) {
node_name = crm_strdup_printf("%s", action->node->details->uname);
}
if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
task = strdup("Shutdown");
} else if (safe_str_eq(action->task, CRM_OP_FENCE)) {
const char *op = g_hash_table_lookup(action->meta, "stonith_action");
task = crm_strdup_printf("Fence (%s)", op);
}
if(task == NULL) {
/* Nothing to report */
} else if(terminal && action->reason) {
printf(" * %s %s '%s'\n", task, node_name, action->reason);
} else if(terminal) {
printf(" * %s %s\n", task, node_name);
} else if(action->reason) {
crm_notice(" * %s %s '%s'\n", task, node_name, action->reason);
} else {
crm_notice(" * %s %s\n", task, node_name);
}
free(node_name);
free(task);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Dec 23, 12:23 PM (1 d, 13 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1128331
Default Alt Text
(125 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment