Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h
index b6052254b6..3569adb537 100644
--- a/include/crm/pengine/pe_types.h
+++ b/include/crm/pengine/pe_types.h
@@ -1,501 +1,503 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PE_TYPES__H
# define PE_TYPES__H
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \file
* \brief Data types for cluster status
* \ingroup pengine
*/
# include <stdbool.h> // bool
# include <sys/types.h> // time_t
# include <glib.h> // gboolean, guint, GList, GHashTable
# include <crm/crm.h> // GListPtr
# include <crm/common/iso8601.h>
# include <crm/pengine/common.h>
typedef struct pe_node_s pe_node_t;
typedef struct pe_action_s pe_action_t;
typedef struct pe_resource_s pe_resource_t;
typedef struct pe_working_set_s pe_working_set_t;
enum pe_obj_types {
pe_unknown = -1,
pe_native = 0,
pe_group = 1,
pe_clone = 2,
pe_container = 3,
};
typedef struct resource_object_functions_s {
gboolean (*unpack) (pe_resource_t*, pe_working_set_t*);
pe_resource_t *(*find_rsc) (pe_resource_t *parent, const char *search,
const pe_node_t *node, int flags);
/* parameter result must be free'd */
char *(*parameter) (pe_resource_t*, pe_node_t*, gboolean, const char*,
pe_working_set_t*);
void (*print) (pe_resource_t*, const char*, long, void*);
gboolean (*active) (pe_resource_t*, gboolean);
enum rsc_role_e (*state) (const pe_resource_t*, gboolean);
pe_node_t *(*location) (const pe_resource_t*, GList**, int);
void (*free) (pe_resource_t*);
} resource_object_functions_t;
typedef struct resource_alloc_functions_s resource_alloc_functions_t;
enum pe_quorum_policy {
no_quorum_freeze,
no_quorum_stop,
no_quorum_ignore,
no_quorum_suicide
};
enum node_type {
node_ping,
node_member,
node_remote
};
//! \deprecated will be removed in a future release
enum pe_restart {
pe_restart_restart,
pe_restart_ignore
};
//! Determine behavior of pe_find_resource_with_flags()
enum pe_find {
pe_find_renamed = 0x001, //!< match resource ID or LRM history ID
pe_find_anon = 0x002, //!< match base name of anonymous clone instances
pe_find_clone = 0x004, //!< match only clone instances
pe_find_current = 0x008, //!< match resource active on specified node
pe_find_inactive = 0x010, //!< match resource not running anywhere
pe_find_any = 0x020, //!< match base name of any clone instance
};
# define pe_flag_have_quorum 0x00000001ULL
# define pe_flag_symmetric_cluster 0x00000002ULL
# define pe_flag_maintenance_mode 0x00000008ULL
# define pe_flag_stonith_enabled 0x00000010ULL
# define pe_flag_have_stonith_resource 0x00000020ULL
# define pe_flag_enable_unfencing 0x00000040ULL
# define pe_flag_concurrent_fencing 0x00000080ULL
# define pe_flag_stop_rsc_orphans 0x00000100ULL
# define pe_flag_stop_action_orphans 0x00000200ULL
# define pe_flag_stop_everything 0x00000400ULL
# define pe_flag_start_failure_fatal 0x00001000ULL
# define pe_flag_remove_after_stop 0x00002000ULL
# define pe_flag_startup_fencing 0x00004000ULL
# define pe_flag_startup_probes 0x00010000ULL
# define pe_flag_have_status 0x00020000ULL
# define pe_flag_have_remote_nodes 0x00040000ULL
# define pe_flag_quick_location 0x00100000ULL
# define pe_flag_sanitized 0x00200000ULL
# define pe_flag_stdout 0x00400000ULL
struct pe_working_set_s {
xmlNode *input;
crm_time_t *now;
/* options extracted from the input */
char *dc_uuid;
pe_node_t *dc_node;
const char *stonith_action;
const char *placement_strategy;
unsigned long long flags;
int stonith_timeout;
enum pe_quorum_policy no_quorum_policy;
GHashTable *config_hash;
GHashTable *tickets;
// Actions for which there can be only one (e.g. fence nodeX)
GHashTable *singletons;
GListPtr nodes;
GListPtr resources;
GListPtr placement_constraints;
GListPtr ordering_constraints;
GListPtr colocation_constraints;
GListPtr ticket_constraints;
GListPtr actions;
xmlNode *failed;
xmlNode *op_defaults;
xmlNode *rsc_defaults;
/* stats */
int num_synapse;
int max_valid_nodes;
int order_id;
int action_id;
/* final output */
xmlNode *graph;
GHashTable *template_rsc_sets;
const char *localhost;
GHashTable *tags;
int blocked_resources;
int disabled_resources;
GList *param_check; // History entries that need to be checked
GList *stop_needed; // Containers that need stop actions
time_t recheck_by; // Hint to controller to re-run scheduler by this time
};
enum pe_check_parameters {
/* Clear fail count if parameters changed for un-expired start or monitor
* last_failure.
*/
pe_check_last_failure,
/* Clear fail count if parameters changed for start, monitor, promote, or
* migrate_from actions for active resources.
*/
pe_check_active,
};
struct pe_node_shared_s {
const char *id;
const char *uname;
enum node_type type;
/* @TODO convert these flags into a bitfield */
gboolean online;
gboolean standby;
gboolean standby_onfail;
gboolean pending;
gboolean unclean;
gboolean unseen;
gboolean shutdown;
gboolean expected_up;
gboolean is_dc;
gboolean maintenance;
gboolean rsc_discovery_enabled;
gboolean remote_requires_reset;
gboolean remote_was_fenced;
gboolean remote_maintenance; /* what the remote-rsc is thinking */
gboolean unpacked;
int num_resources;
pe_resource_t *remote_rsc;
GListPtr running_rsc; /* pe_resource_t* */
GListPtr allocated_rsc; /* pe_resource_t* */
GHashTable *attrs; /* char* => char* */
GHashTable *utilization;
GHashTable *digest_cache; //!< cache of calculated resource digests
};
struct pe_node_s {
int weight;
gboolean fixed;
int count;
struct pe_node_shared_s *details;
int rsc_discover_mode;
};
# define pe_rsc_orphan 0x00000001ULL
# define pe_rsc_managed 0x00000002ULL
# define pe_rsc_block 0x00000004ULL
# define pe_rsc_orphan_container_filler 0x00000008ULL
# define pe_rsc_notify 0x00000010ULL
# define pe_rsc_unique 0x00000020ULL
# define pe_rsc_fence_device 0x00000040ULL
# define pe_rsc_promotable 0x00000080ULL
# define pe_rsc_provisional 0x00000100ULL
# define pe_rsc_allocating 0x00000200ULL
# define pe_rsc_merging 0x00000400ULL
# define pe_rsc_reload 0x00002000ULL
# define pe_rsc_allow_remote_remotes 0x00004000ULL
# define pe_rsc_failed 0x00010000ULL
# define pe_rsc_runnable 0x00040000ULL
# define pe_rsc_start_pending 0x00080000ULL
# define pe_rsc_starting 0x00100000ULL
# define pe_rsc_stopping 0x00200000ULL
# define pe_rsc_allow_migrate 0x00800000ULL
# define pe_rsc_failure_ignored 0x01000000ULL
# define pe_rsc_maintenance 0x04000000ULL
# define pe_rsc_is_container 0x08000000ULL
# define pe_rsc_needs_quorum 0x10000000ULL
# define pe_rsc_needs_fencing 0x20000000ULL
# define pe_rsc_needs_unfencing 0x40000000ULL
enum pe_graph_flags {
pe_graph_none = 0x00000,
pe_graph_updated_first = 0x00001,
pe_graph_updated_then = 0x00002,
pe_graph_disable = 0x00004,
};
/* *INDENT-OFF* */
enum pe_action_flags {
pe_action_pseudo = 0x00001,
pe_action_runnable = 0x00002,
pe_action_optional = 0x00004,
pe_action_print_always = 0x00008,
pe_action_have_node_attrs = 0x00010,
pe_action_implied_by_stonith = 0x00040,
pe_action_migrate_runnable = 0x00080,
pe_action_dumped = 0x00100,
pe_action_processed = 0x00200,
pe_action_clear = 0x00400,
pe_action_dangle = 0x00800,
/* This action requires one or more of its dependencies to be runnable.
* We use this to clear the runnable flag before checking dependencies.
*/
pe_action_requires_any = 0x01000,
pe_action_reschedule = 0x02000,
pe_action_tracking = 0x04000,
+ pe_action_dedup = 0x08000, //! Internal state tracking when creating graph
};
/* *INDENT-ON* */
struct pe_resource_s {
char *id;
char *clone_name;
xmlNode *xml;
xmlNode *orig_xml;
xmlNode *ops_xml;
pe_working_set_t *cluster;
pe_resource_t *parent;
enum pe_obj_types variant;
void *variant_opaque;
resource_object_functions_t *fns;
resource_alloc_functions_t *cmds;
enum rsc_recovery_type recovery_type;
// @TODO only pe_restart_restart is of interest, so merge into flags
enum pe_restart restart_type; //!< \deprecated will be removed in future release
int priority;
int stickiness;
int sort_index;
int failure_timeout;
int migration_threshold;
guint remote_reconnect_ms;
char *pending_task;
unsigned long long flags;
// @TODO merge these into flags
gboolean is_remote_node;
gboolean exclusive_discover;
//!@{
//! This field should be treated as internal to Pacemaker
GListPtr rsc_cons_lhs; // List of rsc_colocation_t*
GListPtr rsc_cons; // List of rsc_colocation_t*
GListPtr rsc_location; // List of pe__location_t*
GListPtr actions; // List of pe_action_t*
GListPtr rsc_tickets; // List of rsc_ticket*
//!@}
pe_node_t *allocated_to;
pe_node_t *partial_migration_target;
pe_node_t *partial_migration_source;
GListPtr running_on; /* pe_node_t* */
GHashTable *known_on; /* pe_node_t* */
GHashTable *allowed_nodes; /* pe_node_t* */
enum rsc_role_e role;
enum rsc_role_e next_role;
GHashTable *meta;
GHashTable *parameters;
GHashTable *utilization;
GListPtr children; /* pe_resource_t* */
GListPtr dangling_migrations; /* pe_node_t* */
pe_resource_t *container;
GListPtr fillers;
pe_node_t *pending_node; // Node on which pending_task is happening
#if ENABLE_VERSIONED_ATTRS
xmlNode *versioned_parameters;
#endif
};
#if ENABLE_VERSIONED_ATTRS
// Used as action->action_details if action->rsc is not NULL
typedef struct pe_rsc_action_details_s {
xmlNode *versioned_parameters;
xmlNode *versioned_meta;
} pe_rsc_action_details_t;
#endif
struct pe_action_s {
int id;
int priority;
pe_resource_t *rsc;
pe_node_t *node;
xmlNode *op_entry;
char *task;
char *uuid;
char *cancel_task;
char *reason;
enum pe_action_flags flags;
enum rsc_start_requirement needs;
enum action_fail_response on_fail;
enum rsc_role_e fail_role;
GHashTable *meta;
GHashTable *extra;
/*
* These two varables are associated with the constraint logic
* that involves first having one or more actions runnable before
* then allowing this action to execute.
*
* These varables are used with features such as 'clone-min' which
* requires at minimum X number of cloned instances to be running
* before an order dependency can run. Another option that uses
* this is 'require-all=false' in ordering constrants. This option
* says "only require one instance of a resource to start before
* allowing dependencies to start" -- basically, require-all=false is
* the same as clone-min=1.
*/
/* current number of known runnable actions in the before list. */
int runnable_before;
/* the number of "before" runnable actions required for this action
* to be considered runnable */
int required_runnable_before;
GListPtr actions_before; /* pe_action_wrapper_t* */
GListPtr actions_after; /* pe_action_wrapper_t* */
/* Some of the above fields could be moved to the details,
* except for API backward compatibility.
*/
void *action_details; // varies by type of action
};
typedef struct pe_ticket_s {
char *id;
gboolean granted;
time_t last_granted;
gboolean standby;
GHashTable *state;
} pe_ticket_t;
typedef struct pe_tag_s {
char *id;
GListPtr refs;
} pe_tag_t;
+//! Internal tracking for transition graph creation
enum pe_link_state {
- pe_link_not_dumped,
- pe_link_dumped,
- pe_link_dup,
+ pe_link_not_dumped, //! Internal tracking for transition graph creation
+ pe_link_dumped, //! Internal tracking for transition graph creation
+ pe_link_dup, //! \deprecated No longer used by Pacemaker
};
enum pe_discover_e {
pe_discover_always = 0,
pe_discover_never,
pe_discover_exclusive,
};
/* *INDENT-OFF* */
enum pe_ordering {
pe_order_none = 0x0, /* deleted */
pe_order_optional = 0x1, /* pure ordering, nothing implied */
pe_order_apply_first_non_migratable = 0x2, /* Only apply this constraint's ordering if first is not migratable. */
pe_order_implies_first = 0x10, /* If 'then' is required, ensure 'first' is too */
pe_order_implies_then = 0x20, /* If 'first' is required, ensure 'then' is too */
pe_order_implies_first_master = 0x40, /* Imply 'first' is required when 'then' is required and then's rsc holds Master role. */
/* first requires then to be both runnable and migrate runnable. */
pe_order_implies_first_migratable = 0x80,
pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */
pe_order_pseudo_left = 0x200, /* 'then' can only be pseudo if 'first' is runnable */
pe_order_implies_then_on_node = 0x400, /* If 'first' is required on 'nodeX',
* ensure instances of 'then' on 'nodeX' are too.
* Only really useful if 'then' is a clone and 'first' is not
*/
pe_order_probe = 0x800, /* If 'first->rsc' is
* - running but about to stop, ignore the constraint
* - otherwise, behave as runnable_left
*/
pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */
pe_order_stonith_stop = 0x2000, /* only applies if the action is non-pseudo */
pe_order_serialize_only = 0x4000, /* serialize */
pe_order_same_node = 0x8000, /* applies only if 'first' and 'then' are on same node */
pe_order_implies_first_printed = 0x10000, /* Like ..implies_first but only ensures 'first' is printed, not mandatory */
pe_order_implies_then_printed = 0x20000, /* Like ..implies_then but only ensures 'then' is printed, not mandatory */
pe_order_asymmetrical = 0x100000, /* Indicates asymmetrical one way ordering constraint. */
pe_order_load = 0x200000, /* Only relevant if... */
pe_order_one_or_more = 0x400000, /* 'then' is runnable only if one or more of its dependencies are too */
pe_order_anti_colocation = 0x800000,
pe_order_preserve = 0x1000000, /* Hack for breaking user ordering constraints with container resources */
pe_order_then_cancels_first = 0x2000000, // if 'then' becomes required, 'first' becomes optional
pe_order_trace = 0x4000000, /* test marker */
};
/* *INDENT-ON* */
typedef struct pe_action_wrapper_s {
enum pe_ordering type;
enum pe_link_state state;
pe_action_t *action;
} pe_action_wrapper_t;
// Deprecated type aliases
typedef struct pe_action_s action_t; //!< \deprecated Use pe_action_t instead
typedef struct pe_action_wrapper_s action_wrapper_t; //!< \deprecated Use pe_action_wrapper_t instead
typedef struct pe_node_s node_t; //!< \deprecated Use pe_node_t instead
typedef struct pe_resource_s resource_t; //!< \deprecated Use pe_resource_t instead
typedef struct pe_tag_s tag_t; //!< \deprecated Use pe_tag_t instead
typedef struct pe_ticket_s ticket_t; //!< \deprecated Use pe_ticket_t instead
typedef enum pe_quorum_policy no_quorum_policy_t; //!< \deprecated Use enum pe_quorum_policy instead
#ifdef __cplusplus
}
#endif
#endif // PE_TYPES__H
diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c
index 3cee904811..c5a6f4cbeb 100644
--- a/lib/pacemaker/pcmk_sched_allocate.c
+++ b/lib/pacemaker/pcmk_sched_allocate.c
@@ -1,2971 +1,2976 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <glib.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
CRM_TRACE_INIT_DATA(pe_allocate);
void set_alloc_actions(pe_working_set_t * data_set);
extern void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set);
extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set);
static void apply_remote_node_ordering(pe_working_set_t *data_set);
static enum remote_connection_state get_remote_node_state(pe_node_t *node);
enum remote_connection_state {
remote_state_unknown = 0,
remote_state_alive = 1,
remote_state_resting = 2,
remote_state_failed = 3,
remote_state_stopped = 4
};
static const char *
state2text(enum remote_connection_state state)
{
switch (state) {
case remote_state_unknown:
return "unknown";
case remote_state_alive:
return "alive";
case remote_state_resting:
return "resting";
case remote_state_failed:
return "failed";
case remote_state_stopped:
return "stopped";
}
return "impossible";
}
resource_alloc_functions_t resource_class_alloc_functions[] = {
{
native_merge_weights,
native_color,
native_create_actions,
native_create_probe,
native_internal_constraints,
native_rsc_colocation_lh,
native_rsc_colocation_rh,
native_rsc_location,
native_action_flags,
native_update_actions,
native_expand,
native_append_meta,
},
{
group_merge_weights,
group_color,
group_create_actions,
native_create_probe,
group_internal_constraints,
group_rsc_colocation_lh,
group_rsc_colocation_rh,
group_rsc_location,
group_action_flags,
group_update_actions,
group_expand,
group_append_meta,
},
{
clone_merge_weights,
clone_color,
clone_create_actions,
clone_create_probe,
clone_internal_constraints,
clone_rsc_colocation_lh,
clone_rsc_colocation_rh,
clone_rsc_location,
clone_action_flags,
pcmk__multi_update_actions,
clone_expand,
clone_append_meta,
},
{
pcmk__bundle_merge_weights,
pcmk__bundle_color,
pcmk__bundle_create_actions,
pcmk__bundle_create_probe,
pcmk__bundle_internal_constraints,
pcmk__bundle_rsc_colocation_lh,
pcmk__bundle_rsc_colocation_rh,
pcmk__bundle_rsc_location,
pcmk__bundle_action_flags,
pcmk__multi_update_actions,
pcmk__bundle_expand,
pcmk__bundle_append_meta,
}
};
gboolean
update_action_flags(action_t * action, enum pe_action_flags flags, const char *source, int line)
{
static unsigned long calls = 0;
gboolean changed = FALSE;
gboolean clear = is_set(flags, pe_action_clear);
enum pe_action_flags last = action->flags;
if (clear) {
action->flags = crm_clear_bit(source, line, action->uuid, action->flags, flags);
} else {
action->flags = crm_set_bit(source, line, action->uuid, action->flags, flags);
}
if (last != action->flags) {
calls++;
changed = TRUE;
/* Useful for tracking down _who_ changed a specific flag */
/* CRM_ASSERT(calls != 534); */
clear_bit(flags, pe_action_clear);
crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)",
action->uuid, action->node ? action->node->details->uname : "[none]",
clear ? "un-" : "", flags, last, action->flags, calls, source);
}
return changed;
}
static gboolean
check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry,
gboolean active_here, pe_working_set_t * data_set)
{
int attr_lpc = 0;
gboolean force_restart = FALSE;
gboolean delete_resource = FALSE;
gboolean changed = FALSE;
const char *value = NULL;
const char *old_value = NULL;
const char *attr_list[] = {
XML_ATTR_TYPE,
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER
};
for (; attr_lpc < DIMOF(attr_list); attr_lpc++) {
value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
if (value == old_value /* i.e. NULL */
|| crm_str_eq(value, old_value, TRUE)) {
continue;
}
changed = TRUE;
trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
if (active_here) {
force_restart = TRUE;
crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
rsc->id, node->details->uname, attr_list[attr_lpc],
crm_str(old_value), crm_str(value));
}
}
if (force_restart) {
/* make sure the restart happens */
stop_action(rsc, node, FALSE);
set_bit(rsc->flags, pe_rsc_start_pending);
delete_resource = TRUE;
} else if (changed) {
delete_resource = TRUE;
}
return delete_resource;
}
static void
CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node,
const char *reason, pe_working_set_t * data_set)
{
guint interval_ms = 0;
action_t *cancel = NULL;
const char *task = NULL;
const char *call_id = NULL;
const char *interval_ms_s = NULL;
CRM_CHECK(xml_op != NULL, return);
CRM_CHECK(active_node != NULL, return);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
crm_info("Action " CRM_OP_FMT " on %s will be stopped: %s",
rsc->id, task, interval_ms,
active_node->details->uname, (reason? reason : "unknown"));
cancel = pe_cancel_op(rsc, task, interval_ms, active_node, data_set);
add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
}
static gboolean
check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op,
pe_working_set_t * data_set)
{
char *key = NULL;
guint interval_ms = 0;
const char *interval_ms_s = NULL;
const op_digest_cache_t *digest_data = NULL;
gboolean did_change = FALSE;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *digest_secure = NULL;
CRM_CHECK(active_node != NULL, return FALSE);
interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
if (interval_ms > 0) {
xmlNode *op_match = NULL;
/* we need to reconstruct the key because of the way we used to construct resource IDs */
key = generate_op_key(rsc->id, task, interval_ms);
pe_rsc_trace(rsc, "Checking parameters for %s", key);
op_match = find_rsc_op_entry(rsc, key);
if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) {
CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
free(key);
return TRUE;
} else if (op_match == NULL) {
pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
free(key);
return TRUE;
}
free(key);
key = NULL;
}
crm_trace("Testing " CRM_OP_FMT " on %s",
rsc->id, task, interval_ms, active_node->details->uname);
if ((interval_ms == 0) && safe_str_eq(task, RSC_STATUS)) {
/* Reload based on the start action not a probe */
task = RSC_START;
} else if ((interval_ms == 0) && safe_str_eq(task, RSC_MIGRATED)) {
/* Reload based on the start action not a migrate */
task = RSC_START;
} else if ((interval_ms == 0) && safe_str_eq(task, RSC_PROMOTE)) {
/* Reload based on the start action not a promote */
task = RSC_START;
}
digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
if(is_set(data_set->flags, pe_flag_sanitized)) {
digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
}
if(digest_data->rc != RSC_DIGEST_MATCH
&& digest_secure
&& digest_data->digest_secure_calc
&& strcmp(digest_data->digest_secure_calc, digest_secure) == 0) {
if (is_set(data_set->flags, pe_flag_stdout)) {
printf("Only 'private' parameters to " CRM_OP_FMT " on %s changed: %s\n",
rsc->id, task, interval_ms, active_node->details->uname,
crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
}
} else if (digest_data->rc == RSC_DIGEST_RESTART) {
/* Changes that force a restart */
pe_action_t *required = NULL;
did_change = TRUE;
key = generate_op_key(rsc->id, task, interval_ms);
crm_log_xml_info(digest_data->params_restart, "params:restart");
required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
"resource definition change", pe_action_optional, TRUE);
trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set);
} else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
/* Changes that can potentially be handled by a reload */
const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
did_change = TRUE;
trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set);
crm_log_xml_info(digest_data->params_all, "params:reload");
key = generate_op_key(rsc->id, task, interval_ms);
if (interval_ms > 0) {
action_t *op = NULL;
#if 0
/* Always reload/restart the entire resource */
ReloadRsc(rsc, active_node, data_set);
#else
/* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set);
set_bit(op->flags, pe_action_reschedule);
#endif
} else if (digest_restart) {
pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
/* Reload this resource */
ReloadRsc(rsc, active_node, data_set);
free(key);
} else {
pe_action_t *required = NULL;
pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id);
/* Re-send the start/demote/promote op
* Recurring ops will be detected independently
*/
required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
"resource definition change", pe_action_optional, TRUE);
}
}
return did_change;
}
/*!
* \internal
* \brief Do deferred action checks after allocation
*
* \param[in] data_set Working set for cluster
*/
static void
check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op,
enum pe_check_parameters check, pe_working_set_t *data_set)
{
const char *reason = NULL;
op_digest_cache_t *digest_data = NULL;
switch (check) {
case pe_check_active:
if (check_action_definition(rsc, node, rsc_op, data_set)
&& pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
data_set)) {
reason = "action definition changed";
}
break;
case pe_check_last_failure:
digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set);
switch (digest_data->rc) {
case RSC_DIGEST_UNKNOWN:
crm_trace("Resource %s history entry %s on %s has no digest to compare",
rsc->id, ID(rsc_op), node->details->id);
break;
case RSC_DIGEST_MATCH:
break;
default:
reason = "resource parameters have changed";
break;
}
break;
}
if (reason) {
pe__clear_failcount(rsc, node, reason, data_set);
}
}
static void
check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
int offset = -1;
guint interval_ms = 0;
int stop_index = 0;
int start_index = 0;
const char *task = NULL;
const char *interval_ms_s = NULL;
xmlNode *rsc_op = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
CRM_CHECK(node != NULL, return);
if (is_set(rsc->flags, pe_rsc_orphan)) {
resource_t *parent = uber_parent(rsc);
if(parent == NULL
|| pe_rsc_is_clone(parent) == FALSE
|| is_set(parent->flags, pe_rsc_unique)) {
pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
DeleteRsc(rsc, node, FALSE, data_set);
} else {
pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
}
return;
} else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
DeleteRsc(rsc, node, FALSE, data_set);
}
pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
rsc->id, node->details->uname);
return;
}
pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
DeleteRsc(rsc, node, FALSE, data_set);
}
for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_prepend(op_list, rsc_op);
}
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
offset++;
if (start_index < stop_index) {
/* stopped */
continue;
} else if (offset < start_index) {
/* action occurred prior to a start */
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
if ((interval_ms > 0) &&
(is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) {
// Maintenance mode cancels recurring operations
CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
} else if ((interval_ms > 0)
|| safe_str_eq(task, RSC_STATUS)
|| safe_str_eq(task, RSC_START)
|| safe_str_eq(task, RSC_PROMOTE)
|| safe_str_eq(task, RSC_MIGRATED)) {
/* If a resource operation failed, and the operation's definition
* has changed, clear any fail count so they can be retried fresh.
*/
if (pe__bundle_needs_remote_name(rsc)) {
/* We haven't allocated resources to nodes yet, so if the
* REMOTE_CONTAINER_HACK is used, we may calculate the digest
* based on the literal "#uname" value rather than the properly
* substituted value. That would mistakenly make the action
* definition appear to have been changed. Defer the check until
* later in this case.
*/
pe__add_param_check(rsc_op, rsc, node, pe_check_active,
data_set);
} else if (check_action_definition(rsc, node, rsc_op, data_set)
&& pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
data_set)) {
pe__clear_failcount(rsc, node, "action definition changed",
data_set);
}
}
}
g_list_free(sorted_op_list);
}
static GListPtr
find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones,
gboolean partial, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
gboolean match = FALSE;
if (id == NULL) {
return NULL;
}
if (rsc == NULL) {
if (data_set == NULL) {
return NULL;
}
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
result = find_rsc_list(result, child, id, renamed_clones, partial,
NULL);
}
return result;
}
if (partial) {
if (strstr(rsc->id, id)) {
match = TRUE;
} else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
match = TRUE;
}
} else {
if (strcmp(rsc->id, id) == 0) {
match = TRUE;
} else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
match = TRUE;
}
}
if (match) {
result = g_list_prepend(result, rsc);
}
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
}
}
return result;
}
static void
check_actions(pe_working_set_t * data_set)
{
const char *id = NULL;
node_t *node = NULL;
xmlNode *lrm_rscs = NULL;
xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
xmlNode *node_state = NULL;
for (node_state = __xml_first_child_element(status); node_state != NULL;
node_state = __xml_next_element(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
id = crm_element_value(node_state, XML_ATTR_ID);
lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
node = pe_find_node_id(data_set->nodes, id);
if (node == NULL) {
continue;
/* Still need to check actions for a maintenance node to cancel existing monitor operations */
} else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
crm_trace("Skipping param check for %s: can't run resources",
node->details->uname);
continue;
}
crm_trace("Processing node %s", node->details->uname);
if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
xmlNode *rsc_entry = NULL;
for (rsc_entry = __xml_first_child_element(lrm_rscs);
rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
if (xml_has_children(rsc_entry)) {
GListPtr gIter = NULL;
GListPtr result = NULL;
const char *rsc_id = ID(rsc_entry);
CRM_CHECK(rsc_id != NULL, return);
result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
for (gIter = result; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (rsc->variant != pe_native) {
continue;
}
check_actions_for(rsc_entry, rsc, node, data_set);
}
g_list_free(result);
}
}
}
}
}
}
}
static gboolean
apply_placement_constraints(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
crm_trace("Applying constraints...");
for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
pe__location_t *cons = gIter->data;
cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
}
return TRUE;
}
static gboolean
failcount_clear_action_exists(node_t * node, resource_t * rsc)
{
gboolean rc = FALSE;
GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE);
if (list) {
rc = TRUE;
}
g_list_free(list);
return rc;
}
/*!
* \internal
* \brief Force resource away if failures hit migration threshold
*
* \param[in,out] rsc Resource to check for failures
* \param[in,out] node Node to check for failures
* \param[in,out] data_set Cluster working set to update
*/
static void
check_migration_threshold(resource_t *rsc, node_t *node,
pe_working_set_t *data_set)
{
int fail_count, countdown;
resource_t *failed;
/* Migration threshold of 0 means never force away */
if (rsc->migration_threshold == 0) {
return;
}
// If we're ignoring failures, also ignore the migration threshold
if (is_set(rsc->flags, pe_rsc_failure_ignored)) {
return;
}
/* If there are no failures, there's no need to force away */
fail_count = pe_get_failcount(node, rsc, NULL,
pe_fc_effective|pe_fc_fillers, NULL,
data_set);
if (fail_count <= 0) {
return;
}
/* How many more times recovery will be tried on this node */
countdown = QB_MAX(rsc->migration_threshold - fail_count, 0);
/* If failed resource has a parent, we'll force the parent away */
failed = rsc;
if (is_not_set(rsc->flags, pe_rsc_unique)) {
failed = uber_parent(rsc);
}
if (countdown == 0) {
resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
crm_warn("Forcing %s away from %s after %d failures (max=%d)",
failed->id, node->details->uname, fail_count,
rsc->migration_threshold);
} else {
crm_info("%s can fail %d more times on %s before being forced off",
failed->id, countdown, node->details->uname);
}
}
static void
common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
{
if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
common_apply_stickiness(child_rsc, node, data_set);
}
return;
}
if (is_set(rsc->flags, pe_rsc_managed)
&& rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) {
node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (current == NULL) {
} else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
resource_t *sticky_rsc = rsc;
resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
" (node=%s, weight=%d)", sticky_rsc->id,
node->details->uname, rsc->stickiness);
} else {
GHashTableIter iter;
node_t *nIter = NULL;
pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
" and node %s is not explicitly allowed", rsc->id, node->details->uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
}
}
}
/* Check the migration threshold only if a failcount clear action
* has not already been placed for this resource on the node.
* There is no sense in potentially forcing the resource from this
* node if the failcount is being reset anyway.
*
* @TODO A clear_failcount operation can be scheduled in stage4() via
* check_actions_for(), or in stage5() via check_params(). This runs in
* stage2(), so it cannot detect those, meaning we might check the migration
* threshold when we shouldn't -- worst case, we stop or move the resource,
* then move it back next transition.
*/
if (failcount_clear_action_exists(node, rsc) == FALSE) {
check_migration_threshold(rsc, node, data_set);
}
}
void
complex_set_cmds(resource_t * rsc)
{
GListPtr gIter = rsc->children;
rsc->cmds = &resource_class_alloc_functions[rsc->variant];
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
complex_set_cmds(child_rsc);
}
}
void
set_alloc_actions(pe_working_set_t * data_set)
{
GListPtr gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
complex_set_cmds(rsc);
}
}
static void
calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
{
const char *key = (const char *)gKey;
const char *value = (const char *)gValue;
int *system_health = (int *)user_data;
if (!gKey || !gValue || !user_data) {
return;
}
if (crm_starts_with(key, "#health")) {
int score;
/* Convert the value into an integer */
score = char2score(value);
/* Add it to the running total */
*system_health = merge_weights(score, *system_health);
}
}
static gboolean
apply_system_health(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
int base_health = 0;
if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) {
/* Prevent any accidental health -> score translation */
node_score_red = 0;
node_score_yellow = 0;
node_score_green = 0;
return TRUE;
} else if (safe_str_eq(health_strategy, "migrate-on-red")) {
/* Resources on nodes which have health values of red are
* weighted away from that node.
*/
node_score_red = -INFINITY;
node_score_yellow = 0;
node_score_green = 0;
} else if (safe_str_eq(health_strategy, "only-green")) {
/* Resources on nodes which have health values of red or yellow
* are forced away from that node.
*/
node_score_red = -INFINITY;
node_score_yellow = -INFINITY;
node_score_green = 0;
} else if (safe_str_eq(health_strategy, "progressive")) {
/* Same as the above, but use the r/y/g scores provided by the user
* Defaults are provided by the pe_prefs table
* Also, custom health "base score" can be used
*/
base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0");
} else if (safe_str_eq(health_strategy, "custom")) {
/* Requires the admin to configure the rsc_location constaints for
* processing the stored health scores
*/
/* TODO: Check for the existence of appropriate node health constraints */
return TRUE;
} else {
crm_err("Unknown node health strategy: %s", health_strategy);
return FALSE;
}
crm_info("Applying automated node health strategy: %s", health_strategy);
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
int system_health = base_health;
node_t *node = (node_t *) gIter->data;
/* Search through the node hash table for system health entries. */
g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
crm_info(" Node %s has an combined system health of %d",
node->details->uname, system_health);
/* If the health is non-zero, then create a new rsc2node so that the
* weight will be added later on.
*/
if (system_health != 0) {
GListPtr gIter2 = data_set->resources;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
}
}
}
return TRUE;
}
gboolean
stage0(pe_working_set_t * data_set)
{
xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
if (data_set->input == NULL) {
return FALSE;
}
if (is_set(data_set->flags, pe_flag_have_status) == FALSE) {
crm_trace("Calculating status");
cluster_status(data_set);
}
set_alloc_actions(data_set);
apply_system_health(data_set);
unpack_constraints(cib_constraints, data_set);
return TRUE;
}
/*
* Check nodes for resources started outside of the LRM
*/
gboolean
probe_resources(pe_working_set_t * data_set)
{
action_t *probe_node_complete = NULL;
for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
if (node->details->online == FALSE) {
if (pe__is_remote_node(node) && node->details->remote_rsc
&& (get_remote_node_state(node) == remote_state_failed)) {
pe_fence_node(data_set, node, "the connection is unrecoverable");
}
continue;
} else if (node->details->unclean) {
continue;
} else if (node->details->rsc_discovery_enabled == FALSE) {
/* resource discovery is disabled for this node */
continue;
}
if (probed != NULL && crm_is_true(probed) == FALSE) {
action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
continue;
}
for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set);
}
}
return TRUE;
}
static void
rsc_discover_filter(resource_t *rsc, node_t *node)
{
GListPtr gIter = rsc->children;
resource_t *top = uber_parent(rsc);
node_t *match;
if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) {
return;
}
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
rsc_discover_filter(child_rsc, node);
}
match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match && match->rsc_discover_mode != pe_discover_exclusive) {
match->weight = -INFINITY;
}
}
/*
* Count how many valid nodes we have (so we know the maximum number of
* colors we can resolve).
*
* Apply node constraints (i.e. filter the "allowed_nodes" part of resources)
*/
gboolean
stage2(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
crm_trace("Applying placement constraints");
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node == NULL) {
/* error */
} else if (node->weight >= 0.0 /* global weight */
&& node->details->online && node->details->type != node_ping) {
data_set->max_valid_nodes++;
}
}
apply_placement_constraints(data_set);
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
GListPtr gIter2 = NULL;
node_t *node = (node_t *) gIter->data;
gIter2 = data_set->resources;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
common_apply_stickiness(rsc, node, data_set);
rsc_discover_filter(rsc, node);
}
}
return TRUE;
}
/*
* Create internal resource constraints before allocation
*/
gboolean
stage3(pe_working_set_t * data_set)
{
GListPtr gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
rsc->cmds->internal_constraints(rsc, data_set);
}
return TRUE;
}
/*
* Check for orphaned or redefined actions
*/
gboolean
stage4(pe_working_set_t * data_set)
{
check_actions(data_set);
return TRUE;
}
static void *
convert_const_pointer(const void *ptr)
{
/* Worst function ever */
return (void *)ptr;
}
static gint
sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
{
int rc = 0;
int r1_weight = -INFINITY;
int r2_weight = -INFINITY;
const char *reason = "existence";
const GListPtr nodes = (GListPtr) data;
const resource_t *resource1 = a;
const resource_t *resource2 = b;
node_t *r1_node = NULL;
node_t *r2_node = NULL;
GListPtr gIter = NULL;
GHashTable *r1_nodes = NULL;
GHashTable *r2_nodes = NULL;
if (a == NULL && b == NULL) {
goto done;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
reason = "priority";
r1_weight = resource1->priority;
r2_weight = resource2->priority;
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
reason = "no node list";
if (nodes == NULL) {
goto done;
}
r1_nodes = rsc_merge_weights(convert_const_pointer(resource1),
resource1->id, NULL, NULL, 1,
pe_weights_forward | pe_weights_init);
dump_node_scores(LOG_TRACE, NULL, resource1->id, r1_nodes);
r2_nodes = rsc_merge_weights(convert_const_pointer(resource2),
resource2->id, NULL, NULL, 1,
pe_weights_forward | pe_weights_init);
dump_node_scores(LOG_TRACE, NULL, resource2->id, r2_nodes);
/* Current location score */
reason = "current location";
r1_weight = -INFINITY;
r2_weight = -INFINITY;
if (resource1->running_on) {
r1_node = pe__current_node(resource1);
r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id);
if (r1_node != NULL) {
r1_weight = r1_node->weight;
}
}
if (resource2->running_on) {
r2_node = pe__current_node(resource2);
r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id);
if (r2_node != NULL) {
r2_weight = r2_node->weight;
}
}
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
reason = "score";
for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
r1_node = NULL;
r2_node = NULL;
r1_weight = -INFINITY;
if (r1_nodes) {
r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
}
if (r1_node) {
r1_weight = r1_node->weight;
}
r2_weight = -INFINITY;
if (r2_nodes) {
r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
}
if (r2_node) {
r2_weight = r2_node->weight;
}
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
}
done:
crm_trace("%s (%d) on %s %c %s (%d) on %s: %s",
resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a",
rc < 0 ? '>' : rc > 0 ? '<' : '=',
resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason);
if (r1_nodes) {
g_hash_table_destroy(r1_nodes);
}
if (r2_nodes) {
g_hash_table_destroy(r2_nodes);
}
return rc;
}
static void
allocate_resources(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
/* Force remote connection resources to be allocated first. This
* also forces any colocation dependencies to be allocated as well */
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (rsc->is_remote_node == FALSE) {
continue;
}
pe_rsc_trace(rsc, "Allocating: %s", rsc->id);
/* For remote node connection resources, always prefer the partial
* migration target during resource allocation, if the rsc is in the
* middle of a migration.
*/
rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set);
}
}
/* now do the rest of the resources */
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (rsc->is_remote_node == TRUE) {
continue;
}
pe_rsc_trace(rsc, "Allocating: %s", rsc->id);
rsc->cmds->allocate(rsc, NULL, data_set);
}
}
/* We always use pe_order_preserve with these convenience functions to exempt
* internally generated constraints from the prohibition of user constraints
* involving remote connection resources.
*
* The start ordering additionally uses pe_order_runnable_left so that the
* specified action is not runnable if the start is not runnable.
*/
static inline void
order_start_then_action(resource_t *lh_rsc, action_t *rh_action,
enum pe_ordering extra, pe_working_set_t *data_set)
{
if (lh_rsc && rh_action && data_set) {
custom_action_order(lh_rsc, start_key(lh_rsc), NULL,
rh_action->rsc, NULL, rh_action,
pe_order_preserve | pe_order_runnable_left | extra,
data_set);
}
}
static inline void
order_action_then_stop(action_t *lh_action, resource_t *rh_rsc,
enum pe_ordering extra, pe_working_set_t *data_set)
{
if (lh_action && rh_rsc && data_set) {
custom_action_order(lh_action->rsc, NULL, lh_action,
rh_rsc, stop_key(rh_rsc), NULL,
pe_order_preserve | extra, data_set);
}
}
// Clear fail counts for orphaned rsc on all online nodes
static void
cleanup_orphans(resource_t * rsc, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node->details->online
&& pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
data_set)) {
pe_action_t *clear_op = NULL;
clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
data_set);
/* We can't use order_action_then_stop() here because its
* pe_order_preserve breaks things
*/
custom_action_order(clear_op->rsc, NULL, clear_op,
rsc, stop_key(rsc), NULL,
pe_order_optional, data_set);
}
}
}
gboolean
stage5(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
if (safe_str_neq(data_set->placement_strategy, "default")) {
GListPtr nodes = g_list_copy(data_set->nodes);
nodes = sort_nodes_by_weight(nodes, NULL, data_set);
data_set->resources =
g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
g_list_free(nodes);
}
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node);
}
crm_trace("Allocating services");
/* Take (next) highest resource, assign it and create its actions */
allocate_resources(data_set);
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node);
}
// Process deferred action checks
pe__foreach_param_check(data_set, check_params);
pe__free_param_checks(data_set);
if (is_set(data_set->flags, pe_flag_startup_probes)) {
crm_trace("Calculating needed probes");
/* This code probably needs optimization
* ptest -x with 100 nodes, 100 clones and clone-max=100:
With probes:
ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
36s
ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
Without probes:
ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
*/
probe_resources(data_set);
}
crm_trace("Handle orphans");
if (is_set(data_set->flags, pe_flag_stop_rsc_orphans)) {
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
/* There's no need to recurse into rsc->children because those
* should just be unallocated clone instances.
*/
if (is_set(rsc->flags, pe_rsc_orphan)) {
cleanup_orphans(rsc, data_set);
}
}
}
crm_trace("Creating actions");
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
rsc->cmds->create_actions(rsc, data_set);
}
crm_trace("Creating done");
return TRUE;
}
static gboolean
is_managed(const resource_t * rsc)
{
GListPtr gIter = rsc->children;
if (is_set(rsc->flags, pe_rsc_managed)) {
return TRUE;
}
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
if (is_managed(child_rsc)) {
return TRUE;
}
}
return FALSE;
}
static gboolean
any_managed_resources(pe_working_set_t * data_set)
{
GListPtr gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (is_managed(rsc)) {
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Create pseudo-op for guest node fence, and order relative to it
*
* \param[in] node Guest node to fence
* \param[in] data_set Working set of CIB state
*/
static void
fence_guest(pe_node_t *node, pe_working_set_t *data_set)
{
resource_t *container = node->details->remote_rsc->container;
pe_action_t *stop = NULL;
pe_action_t *stonith_op = NULL;
/* The fence action is just a label; we don't do anything differently for
* off vs. reboot. We specify it explicitly, rather than let it default to
* cluster's default action, because we are not _initiating_ fencing -- we
* are creating a pseudo-event to describe fencing that is already occurring
* by other means (container recovery).
*/
const char *fence_action = "off";
/* Check whether guest's container resource has any explicit stop or
* start (the stop may be implied by fencing of the guest's host).
*/
if (container) {
stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL);
if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) {
fence_action = "reboot";
}
}
/* Create a fence pseudo-event, so we have an event to order actions
* against, and the controller can always detect it.
*/
stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", data_set);
update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable,
__FUNCTION__, __LINE__);
/* We want to imply stops/demotes after the guest is stopped, not wait until
* it is restarted, so we always order pseudo-fencing after stop, not start
* (even though start might be closer to what is done for a real reboot).
*/
if(stop && is_set(stop->flags, pe_action_pseudo)) {
pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, data_set);
crm_info("Implying guest node %s is down (action %d) after %s fencing",
node->details->uname, stonith_op->id, stop->node->details->uname);
order_actions(parent_stonith_op, stonith_op,
pe_order_runnable_left|pe_order_implies_then);
} else if (stop) {
order_actions(stop, stonith_op,
pe_order_runnable_left|pe_order_implies_then);
crm_info("Implying guest node %s is down (action %d) "
"after container %s is stopped (action %d)",
node->details->uname, stonith_op->id,
container->id, stop->id);
} else {
/* If we're fencing the guest node but there's no stop for the guest
* resource, we must think the guest is already stopped. However, we may
* think so because its resource history was just cleaned. To avoid
* unnecessarily considering the guest node down if it's really up,
* order the pseudo-fencing after any stop of the connection resource,
* which will be ordered after any container (re-)probe.
*/
stop = find_first_action(node->details->remote_rsc->actions, NULL,
RSC_STOP, NULL);
if (stop) {
order_actions(stop, stonith_op, pe_order_optional);
crm_info("Implying guest node %s is down (action %d) "
"after connection is stopped (action %d)",
node->details->uname, stonith_op->id, stop->id);
} else {
/* Not sure why we're fencing, but everything must already be
* cleanly stopped.
*/
crm_info("Implying guest node %s is down (action %d) ",
node->details->uname, stonith_op->id);
}
}
/* Order/imply other actions relative to pseudo-fence as with real fence */
stonith_constraints(node, stonith_op, data_set);
}
/*
* Create dependencies for stonith and shutdown operations
*/
gboolean
stage6(pe_working_set_t * data_set)
{
action_t *dc_down = NULL;
action_t *stonith_op = NULL;
gboolean integrity_lost = FALSE;
gboolean need_stonith = TRUE;
GListPtr gIter;
GListPtr stonith_ops = NULL;
GList *shutdown_ops = NULL;
/* Remote ordering constraints need to happen prior to calculating fencing
* because it is one more place we will mark the node as dirty.
*
* A nice side effect of doing them early is that apply_*_ordering() can be
* simpler because pe_fence_node() has already done some of the work.
*/
crm_trace("Creating remote ordering constraints");
apply_remote_node_ordering(data_set);
crm_trace("Processing fencing and shutdown cases");
if (any_managed_resources(data_set) == FALSE) {
crm_notice("Delaying fencing operations until there are resources to manage");
need_stonith = FALSE;
}
/* Check each node for stonith/shutdown */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
/* Guest nodes are "fenced" by recovering their container resource,
* so handle them separately.
*/
if (pe__is_guest_node(node)) {
if (node->details->remote_requires_reset && need_stonith) {
fence_guest(node, data_set);
}
continue;
}
stonith_op = NULL;
if (node->details->unclean
&& need_stonith && pe_can_fence(data_set, node)) {
stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", data_set);
pe_warn("Scheduling Node %s for STONITH", node->details->uname);
stonith_constraints(node, stonith_op, data_set);
if (node->details->is_dc) {
// Remember if the DC is being fenced
dc_down = stonith_op;
} else {
if (is_not_set(data_set->flags, pe_flag_concurrent_fencing)
&& (stonith_ops != NULL)) {
/* Concurrent fencing is disabled, so order each non-DC
* fencing in a chain. If there is any DC fencing or
* shutdown, it will be ordered after the last action in the
* chain later.
*/
order_actions((pe_action_t *) stonith_ops->data,
stonith_op, pe_order_optional);
}
// Remember all non-DC fencing actions in a separate list
stonith_ops = g_list_prepend(stonith_ops, stonith_op);
}
} else if (node->details->online && node->details->shutdown &&
/* TODO define what a shutdown op means for a remote node.
* For now we do not send shutdown operations for remote nodes, but
* if we can come up with a good use for this in the future, we will. */
pe__is_guest_or_remote_node(node) == FALSE) {
action_t *down_op = sched_shutdown_op(node, data_set);
if (node->details->is_dc) {
// Remember if the DC is being shut down
dc_down = down_op;
} else {
// Remember non-DC shutdowns for later ordering
shutdown_ops = g_list_prepend(shutdown_ops, down_op);
}
}
if (node->details->unclean && stonith_op == NULL) {
integrity_lost = TRUE;
pe_warn("Node %s is unclean!", node->details->uname);
}
}
if (integrity_lost) {
if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
} else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) {
crm_notice("Cannot fence unclean nodes until quorum is"
" attained (or no-quorum-policy is set to ignore)");
}
}
if (dc_down != NULL) {
/* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
* DC elections. However, we don't want to order non-DC shutdowns before
* a DC *fencing*, because even though we don't want a node that's
* shutting down to become DC, the DC fencing could be ordered before a
* clone stop that's also ordered before the shutdowns, thus leading to
* a graph loop.
*/
if (safe_str_eq(dc_down->task, CRM_OP_SHUTDOWN)) {
for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) {
action_t *node_stop = (action_t *) gIter->data;
crm_debug("Ordering shutdown on %s before %s on DC %s",
node_stop->node->details->uname,
dc_down->task, dc_down->node->details->uname);
order_actions(node_stop, dc_down, pe_order_optional);
}
}
// Order any non-DC fencing before any DC fencing or shutdown
if (is_set(data_set->flags, pe_flag_concurrent_fencing)) {
/* With concurrent fencing, order each non-DC fencing action
* separately before any DC fencing or shutdown.
*/
for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) {
order_actions((pe_action_t *) gIter->data, dc_down,
pe_order_optional);
}
} else if (stonith_ops) {
/* Without concurrent fencing, the non-DC fencing actions are
* already ordered relative to each other, so we just need to order
* the DC fencing after the last action in the chain (which is the
* first item in the list).
*/
order_actions((pe_action_t *) stonith_ops->data, dc_down,
pe_order_optional);
}
}
g_list_free(stonith_ops);
g_list_free(shutdown_ops);
return TRUE;
}
/*
* Determine the sets of independent actions and the correct order for the
* actions in each set.
*
* Mark dependencies of un-runnable actions un-runnable
*
*/
static GListPtr
find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key)
{
GListPtr list = NULL;
list = find_actions(actions, original_key, NULL);
if (list == NULL) {
/* we're potentially searching a child of the original resource */
char *key = NULL;
char *task = NULL;
guint interval_ms = 0;
if (parse_op_key(original_key, NULL, &task, &interval_ms)) {
key = generate_op_key(rsc->id, task, interval_ms);
list = find_actions(actions, key, NULL);
} else {
crm_err("search key: %s", original_key);
}
free(key);
free(task);
}
return list;
}
static void
rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc,
pe__ordering_t *order)
{
GListPtr gIter = NULL;
GListPtr rh_actions = NULL;
action_t *rh_action = NULL;
enum pe_ordering type;
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(order != NULL, return);
type = order->type;
rh_action = order->rh_action;
crm_trace("Processing RH of ordering constraint %d", order->id);
if (rh_action != NULL) {
rh_actions = g_list_prepend(NULL, rh_action);
} else if (rsc != NULL) {
rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
}
if (rh_actions == NULL) {
pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
" ignoring", rsc->id, order->rh_action_task);
if (lh_action) {
pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
}
return;
}
if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) {
pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
order->rh_action_task);
clear_bit(type, pe_order_implies_then);
}
gIter = rh_actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *rh_action_iter = (action_t *) gIter->data;
if (lh_action) {
order_actions(lh_action, rh_action_iter, type);
} else if (type & pe_order_implies_then) {
update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
} else {
crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
}
}
g_list_free(rh_actions);
}
static void
rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order,
pe_working_set_t *data_set)
{
GListPtr gIter = NULL;
GListPtr lh_actions = NULL;
action_t *lh_action = order->lh_action;
resource_t *rh_rsc = order->rh_rsc;
crm_trace("Processing LH of ordering constraint %d", order->id);
CRM_ASSERT(lh_rsc != NULL);
if (lh_action != NULL) {
lh_actions = g_list_prepend(NULL, lh_action);
} else {
lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
}
if (lh_actions == NULL && lh_rsc != rh_rsc) {
char *key = NULL;
char *op_type = NULL;
guint interval_ms = 0;
parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms);
key = generate_op_key(lh_rsc->id, op_type, interval_ms);
if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) {
free(key);
pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
} else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) {
free(key);
pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
} else {
pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
lh_actions = g_list_prepend(NULL, lh_action);
}
free(op_type);
}
gIter = lh_actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *lh_action_iter = (action_t *) gIter->data;
if (rh_rsc == NULL && order->rh_action) {
rh_rsc = order->rh_action->rsc;
}
if (rh_rsc) {
rsc_order_then(lh_action_iter, rh_rsc, order);
} else if (order->rh_action) {
order_actions(lh_action_iter, order->rh_action, order->type);
}
}
g_list_free(lh_actions);
}
extern void update_colo_start_chain(pe_action_t *action,
pe_working_set_t *data_set);
static int
is_recurring_action(action_t *action)
{
const char *interval_ms_s = g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS);
guint interval_ms = crm_parse_ms(interval_ms_s);
return (interval_ms > 0);
}
static void
apply_container_ordering(action_t *action, pe_working_set_t *data_set)
{
/* VMs are also classified as containers for these purposes... in
* that they both involve a 'thing' running on a real or remote
* cluster node.
*
* This allows us to be smarter about the type and extent of
* recovery actions required in various scenarios
*/
resource_t *remote_rsc = NULL;
resource_t *container = NULL;
enum action_tasks task = text2task(action->task);
CRM_ASSERT(action->rsc);
CRM_ASSERT(action->node);
CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc);
container = remote_rsc->container;
CRM_ASSERT(container);
if(is_set(container->flags, pe_rsc_failed)) {
pe_fence_node(data_set, action->node, "container failed");
}
crm_trace("Order %s action %s relative to %s%s for %s%s",
action->task, action->uuid,
is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
remote_rsc->id,
is_set(container->flags, pe_rsc_failed)? "failed " : "",
container->id);
if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
|| safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
/* Migration ops map to "no_action", but we need to apply the same
* ordering as for stop or demote (see get_router_node()).
*/
task = stop_rsc;
}
switch (task) {
case start_rsc:
case action_promote:
/* Force resource recovery if the container is recovered */
order_start_then_action(container, action, pe_order_implies_then,
data_set);
/* Wait for the connection resource to be up too */
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
break;
case stop_rsc:
case action_demote:
if (is_set(container->flags, pe_rsc_failed)) {
/* When the container representing a guest node fails, any stop
* or demote actions for resources running on the guest node
* are implied by the container stopping. This is similar to
* how fencing operations work for cluster nodes and remote
* nodes.
*/
} else {
/* Ensure the operation happens before the connection is brought
* down.
*
* If we really wanted to, we could order these after the
* connection start, IFF the container's current role was
* stopped (otherwise we re-introduce an ordering loop when the
* connection is restarting).
*/
order_action_then_stop(action, remote_rsc, pe_order_none,
data_set);
}
break;
default:
/* Wait for the connection resource to be up */
if (is_recurring_action(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
if(task != no_action) {
order_start_then_action(remote_rsc, action,
pe_order_implies_then, data_set);
}
} else {
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
}
break;
}
}
static enum remote_connection_state
get_remote_node_state(pe_node_t *node)
{
resource_t *remote_rsc = NULL;
node_t *cluster_node = NULL;
CRM_ASSERT(node);
remote_rsc = node->details->remote_rsc;
CRM_ASSERT(remote_rsc);
cluster_node = pe__current_node(remote_rsc);
/* If the cluster node the remote connection resource resides on
* is unclean or went offline, we can't process any operations
* on that remote node until after it starts elsewhere.
*/
if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) {
/* The connection resource is not going to run anywhere */
if (cluster_node && cluster_node->details->unclean) {
/* The remote connection is failed because its resource is on a
* failed node and can't be recovered elsewhere, so we must fence.
*/
return remote_state_failed;
}
if (is_not_set(remote_rsc->flags, pe_rsc_failed)) {
/* Connection resource is cleanly stopped */
return remote_state_stopped;
}
/* Connection resource is failed */
if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
&& remote_rsc->remote_reconnect_ms
&& node->details->remote_was_fenced
&& !pe__shutdown_requested(node)) {
/* We won't know whether the connection is recoverable until the
* reconnect interval expires and we reattempt connection.
*/
return remote_state_unknown;
}
/* The remote connection is in a failed state. If there are any
* resources known to be active on it (stop) or in an unknown state
* (probe), we must assume the worst and fence it.
*/
return remote_state_failed;
} else if (cluster_node == NULL) {
/* Connection is recoverable but not currently running anywhere, see if we can recover it first */
return remote_state_unknown;
} else if(cluster_node->details->unclean == TRUE
|| cluster_node->details->online == FALSE) {
/* Connection is running on a dead node, see if we can recover it first */
return remote_state_resting;
} else if (g_list_length(remote_rsc->running_on) > 1
&& remote_rsc->partial_migration_source
&& remote_rsc->partial_migration_target) {
/* We're in the middle of migrating a connection resource,
* wait until after the resource migrates before performing
* any actions.
*/
return remote_state_resting;
}
return remote_state_alive;
}
/*!
* \internal
* \brief Order actions on remote node relative to actions for the connection
*/
static void
apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
{
resource_t *remote_rsc = NULL;
enum action_tasks task = text2task(action->task);
enum remote_connection_state state = get_remote_node_state(action->node);
enum pe_ordering order_opts = pe_order_none;
if (action->rsc == NULL) {
return;
}
CRM_ASSERT(action->node);
CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc);
crm_trace("Order %s action %s relative to %s%s (state: %s)",
action->task, action->uuid,
is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
remote_rsc->id, state2text(state));
if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
|| safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
/* Migration ops map to "no_action", but we need to apply the same
* ordering as for stop or demote (see get_router_node()).
*/
task = stop_rsc;
}
switch (task) {
case start_rsc:
case action_promote:
order_opts = pe_order_none;
if (state == remote_state_failed) {
/* Force recovery, by making this action required */
order_opts |= pe_order_implies_then;
}
/* Ensure connection is up before running this action */
order_start_then_action(remote_rsc, action, order_opts, data_set);
break;
case stop_rsc:
if(state == remote_state_alive) {
order_action_then_stop(action, remote_rsc,
pe_order_implies_first, data_set);
} else if(state == remote_state_failed) {
/* The resource is active on the node, but since we don't have a
* valid connection, the only way to stop the resource is by
* fencing the node. There is no need to order the stop relative
* to the remote connection, since the stop will become implied
* by the fencing.
*/
pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable");
} else if(remote_rsc->next_role == RSC_ROLE_STOPPED) {
/* State must be remote_state_unknown or remote_state_stopped.
* Since the connection is not coming back up in this
* transition, stop this resource first.
*/
order_action_then_stop(action, remote_rsc,
pe_order_implies_first, data_set);
} else {
/* The connection is going to be started somewhere else, so
* stop this resource after that completes.
*/
order_start_then_action(remote_rsc, action, pe_order_none, data_set);
}
break;
case action_demote:
/* Only order this demote relative to the connection start if the
* connection isn't being torn down. Otherwise, the demote would be
* blocked because the connection start would not be allowed.
*/
if(state == remote_state_resting || state == remote_state_unknown) {
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
} /* Otherwise we can rely on the stop ordering */
break;
default:
/* Wait for the connection resource to be up */
if (is_recurring_action(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
order_start_then_action(remote_rsc, action,
pe_order_implies_then, data_set);
} else {
node_t *cluster_node = pe__current_node(remote_rsc);
if(task == monitor_rsc && state == remote_state_failed) {
/* We would only be here if we do not know the
* state of the resource on the remote node.
* Since we have no way to find out, it is
* necessary to fence the node.
*/
pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable");
}
if(cluster_node && state == remote_state_stopped) {
/* The connection is currently up, but is going
* down permanently.
*
* Make sure we check services are actually
* stopped _before_ we let the connection get
* closed
*/
order_action_then_stop(action, remote_rsc,
pe_order_runnable_left, data_set);
} else {
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
}
}
break;
}
}
static void
apply_remote_node_ordering(pe_working_set_t *data_set)
{
if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
return;
}
for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
resource_t *remote = NULL;
// We are only interested in resource actions
if (action->rsc == NULL) {
continue;
}
/* Special case: If we are clearing the failcount of an actual
* remote connection resource, then make sure this happens before
* any start of the resource in this transition.
*/
if (action->rsc->is_remote_node &&
safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) {
custom_action_order(action->rsc,
NULL,
action,
action->rsc,
generate_op_key(action->rsc->id, RSC_START, 0),
NULL,
pe_order_optional,
data_set);
continue;
}
// We are only interested in actions allocated to a node
if (action->node == NULL) {
continue;
}
if (!pe__is_guest_or_remote_node(action->node)) {
continue;
}
/* We are only interested in real actions.
*
* @TODO This is probably wrong; pseudo-actions might be converted to
* real actions and vice versa later in update_actions() at the end of
* stage7().
*/
if (is_set(action->flags, pe_action_pseudo)) {
continue;
}
remote = action->node->details->remote_rsc;
if (remote == NULL) {
// Orphaned
continue;
}
/* Another special case: if a resource is moving to a Pacemaker Remote
* node, order the stop on the original node after any start of the
* remote connection. This ensures that if the connection fails to
* start, we leave the resource running on the original node.
*/
if (safe_str_eq(action->task, RSC_START)) {
for (GList *item = action->rsc->actions; item != NULL;
item = item->next) {
pe_action_t *rsc_action = item->data;
if ((rsc_action->node->details != action->node->details)
&& safe_str_eq(rsc_action->task, RSC_STOP)) {
custom_action_order(remote, start_key(remote), NULL,
action->rsc, NULL, rsc_action,
pe_order_optional, data_set);
}
}
}
/* The action occurs across a remote connection, so create
* ordering constraints that guarantee the action occurs while the node
* is active (after start, before stop ... things like that).
*
* This is somewhat brittle in that we need to make sure the results of
* this ordering are compatible with the result of get_router_node().
* It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
* of this logic rather than action2xml().
*/
if (remote->container) {
crm_trace("Container ordering for %s", action->uuid);
apply_container_ordering(action, data_set);
} else {
crm_trace("Remote ordering for %s", action->uuid);
apply_remote_ordering(action, data_set);
}
}
}
static gboolean
order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action)
{
/* No need to probe the resource on the node that is being
* unfenced. Otherwise it might introduce transition loop
* since probe will be performed after the node is
* unfenced.
*/
if (safe_str_eq(rh_action->task, CRM_OP_FENCE)
&& probe->node && rh_action->node
&& probe->node->details == rh_action->node->details) {
const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action");
if (safe_str_eq(op, "on")) {
return TRUE;
}
}
// Shutdown waits for probe to complete only if it's on the same node
if ((safe_str_eq(rh_action->task, CRM_OP_SHUTDOWN))
&& probe->node && rh_action->node
&& probe->node->details != rh_action->node->details) {
return TRUE;
}
return FALSE;
}
static void
order_first_probes_imply_stops(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
pe__ordering_t *order = gIter->data;
enum pe_ordering order_type = pe_order_optional;
pe_resource_t *lh_rsc = order->lh_rsc;
pe_resource_t *rh_rsc = order->rh_rsc;
pe_action_t *lh_action = order->lh_action;
pe_action_t *rh_action = order->rh_action;
const char *lh_action_task = order->lh_action_task;
const char *rh_action_task = order->rh_action_task;
GListPtr probes = NULL;
GListPtr rh_actions = NULL;
GListPtr pIter = NULL;
if (lh_rsc == NULL) {
continue;
} else if (rh_rsc && lh_rsc == rh_rsc) {
continue;
}
if (lh_action == NULL && lh_action_task == NULL) {
continue;
}
if (rh_action == NULL && rh_action_task == NULL) {
continue;
}
/* Technically probe is expected to return "not running", which could be
* the alternative of stop action if the status of the resource is
* unknown yet.
*/
if (lh_action && safe_str_neq(lh_action->task, RSC_STOP)) {
continue;
} else if (lh_action == NULL
&& lh_action_task
&& crm_ends_with(lh_action_task, "_" RSC_STOP "_0") == FALSE) {
continue;
}
/* Do not probe the resource inside of a stopping container. Otherwise
* it might introduce transition loop since probe will be performed
* after the container starts again.
*/
if (rh_rsc && lh_rsc->container == rh_rsc) {
if (rh_action && safe_str_eq(rh_action->task, RSC_STOP)) {
continue;
} else if (rh_action == NULL && rh_action_task
&& crm_ends_with(rh_action_task,"_" RSC_STOP "_0")) {
continue;
}
}
if (order->type == pe_order_none) {
continue;
}
// Preserve the order options for future filtering
if (is_set(order->type, pe_order_apply_first_non_migratable)) {
set_bit(order_type, pe_order_apply_first_non_migratable);
}
if (is_set(order->type, pe_order_same_node)) {
set_bit(order_type, pe_order_same_node);
}
// Keep the order types for future filtering
if (order->type == pe_order_anti_colocation
|| order->type == pe_order_load) {
order_type = order->type;
}
probes = pe__resource_actions(lh_rsc, NULL, RSC_STATUS, FALSE);
if (probes == NULL) {
continue;
}
if (rh_action) {
rh_actions = g_list_prepend(rh_actions, rh_action);
} else if (rh_rsc && rh_action_task) {
rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL);
}
if (rh_actions == NULL) {
g_list_free(probes);
continue;
}
crm_trace("Processing for LH probe based on ordering constraint %s -> %s"
" (id=%d, type=%.6x)",
lh_action ? lh_action->uuid : lh_action_task,
rh_action ? rh_action->uuid : rh_action_task,
order->id, order->type);
for (pIter = probes; pIter != NULL; pIter = pIter->next) {
pe_action_t *probe = (pe_action_t *) pIter->data;
GListPtr rIter = NULL;
for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) {
pe_action_t *rh_action_iter = (pe_action_t *) rIter->data;
if (order_first_probe_unneeded(probe, rh_action_iter)) {
continue;
}
order_actions(probe, rh_action_iter, order_type);
}
}
g_list_free(rh_actions);
g_list_free(probes);
}
}
static void
order_first_probe_then_restart_repromote(pe_action_t * probe,
pe_action_t * after,
pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
bool interleave = FALSE;
pe_resource_t *compatible_rsc = NULL;
if (probe == NULL
|| probe->rsc == NULL
|| probe->rsc->variant != pe_native) {
return;
}
if (after == NULL
// Avoid running into any possible loop
|| is_set(after->flags, pe_action_tracking)) {
return;
}
if (safe_str_neq(probe->task, RSC_STATUS)) {
return;
}
pe_set_action_bit(after, pe_action_tracking);
crm_trace("Processing based on %s %s -> %s %s",
probe->uuid,
probe->node ? probe->node->details->uname: "",
after->uuid,
after->node ? after->node->details->uname : "");
if (after->rsc
/* Better not build a dependency directly with a clone/group.
* We are going to proceed through the ordering chain and build
* dependencies with its children.
*/
&& after->rsc->variant == pe_native
&& probe->rsc != after->rsc) {
GListPtr then_actions = NULL;
enum pe_ordering probe_order_type = pe_order_optional;
if (safe_str_eq(after->task, RSC_START)) {
then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, FALSE);
} else if (safe_str_eq(after->task, RSC_PROMOTE)) {
then_actions = pe__resource_actions(after->rsc, NULL, RSC_DEMOTE, FALSE);
}
for (gIter = then_actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *then = (pe_action_t *) gIter->data;
// Skip any pseudo action which for example is implied by fencing
if (is_set(then->flags, pe_action_pseudo)) {
continue;
}
order_actions(probe, then, probe_order_type);
}
g_list_free(then_actions);
}
if (after->rsc
&& after->rsc->variant > pe_group) {
const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
XML_RSC_ATTR_INTERLEAVE);
interleave = crm_is_true(interleave_s);
if (interleave) {
/* For an interleaved clone, we should build a dependency only
* with the relevant clone child.
*/
compatible_rsc = find_compatible_child(probe->rsc,
after->rsc,
RSC_ROLE_UNKNOWN,
FALSE, data_set);
}
}
for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) {
pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) gIter->data;
/* pe_order_implies_then is the reason why a required A.start
* implies/enforces B.start to be required too, which is the cause of
* B.restart/re-promote.
*
* Not sure about pe_order_implies_then_on_node though. It's now only
* used for unfencing case, which tends to introduce transition
* loops...
*/
if (is_not_set(after_wrapper->type, pe_order_implies_then)) {
/* The order type between a group/clone and its child such as
* B.start-> B_child.start is:
* pe_order_implies_first_printed | pe_order_runnable_left
*
* Proceed through the ordering chain and build dependencies with
* its children.
*/
if (after->rsc == NULL
|| after->rsc->variant < pe_group
|| probe->rsc->parent == after->rsc
|| after_wrapper->action->rsc == NULL
|| after_wrapper->action->rsc->variant > pe_group
|| after->rsc != after_wrapper->action->rsc->parent) {
continue;
}
/* Proceed to the children of a group or a non-interleaved clone.
* For an interleaved clone, proceed only to the relevant child.
*/
if (after->rsc->variant > pe_group
&& interleave == TRUE
&& (compatible_rsc == NULL
|| compatible_rsc != after_wrapper->action->rsc)) {
continue;
}
}
crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)",
after->uuid,
after->node ? after->node->details->uname: "",
after_wrapper->action->uuid,
after_wrapper->action->node ? after_wrapper->action->node->details->uname : "",
after_wrapper->type);
order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
}
}
static void clear_actions_tracking_flag(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (is_set(action->flags, pe_action_tracking)) {
pe_clear_action_bit(action, pe_action_tracking);
}
}
}
static void
order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
GListPtr probes = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t * child = (pe_resource_t *) gIter->data;
order_first_rsc_probes(child, data_set);
}
if (rsc->variant != pe_native) {
return;
}
probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
for (gIter = probes; gIter != NULL; gIter= gIter->next) {
pe_action_t *probe = (pe_action_t *) gIter->data;
GListPtr aIter = NULL;
for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) {
pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) aIter->data;
order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
clear_actions_tracking_flag(data_set);
}
}
g_list_free(probes);
}
static void
order_first_probes(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
order_first_rsc_probes(rsc, data_set);
}
order_first_probes_imply_stops(data_set);
}
static void
order_then_probes(pe_working_set_t * data_set)
{
#if 0
GListPtr gIter = NULL;
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
/* Given "A then B", we would prefer to wait for A to be
* started before probing B.
*
* If A was a filesystem on which the binaries and data for B
* lived, it would have been useful if the author of B's agent
* could assume that A is running before B.monitor will be
* called.
*
* However we can't _only_ probe once A is running, otherwise
* we'd not detect the state of B if A could not be started
* for some reason.
*
* In practice however, we cannot even do an opportunistic
* version of this because B may be moving:
*
* B.probe -> B.start
* B.probe -> B.stop
* B.stop -> B.start
* A.stop -> A.start
* A.start -> B.probe
*
* So far so good, but if we add the result of this code:
*
* B.stop -> A.stop
*
* Then we get a loop:
*
* B.probe -> B.stop -> A.stop -> A.start -> B.probe
*
* We could kill the 'B.probe -> B.stop' dependency, but that
* could mean stopping B "too" soon, because B.start must wait
* for the probes to complete.
*
* Another option is to allow it only if A is a non-unique
* clone with clone-max == node-max (since we'll never be
* moving it). However, we could still be stopping one
* instance at the same time as starting another.
* The complexity of checking for allowed conditions combined
* with the ever narrowing usecase suggests that this code
* should remain disabled until someone gets smarter.
*/
action_t *start = NULL;
GListPtr actions = NULL;
GListPtr probes = NULL;
actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
if (actions) {
start = actions->data;
g_list_free(actions);
}
if(start == NULL) {
crm_err("No start action for %s", rsc->id);
continue;
}
probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
for (actions = start->actions_before; actions != NULL; actions = actions->next) {
action_wrapper_t *before = (action_wrapper_t *) actions->data;
GListPtr pIter = NULL;
action_t *first = before->action;
resource_t *first_rsc = first->rsc;
if(first->required_runnable_before) {
GListPtr clone_actions = NULL;
for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) {
before = (action_wrapper_t *) clone_actions->data;
crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid);
CRM_ASSERT(before->action->rsc);
first_rsc = before->action->rsc;
break;
}
} else if(safe_str_neq(first->task, RSC_START)) {
crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
}
if(first_rsc == NULL) {
continue;
} else if(uber_parent(first_rsc) == uber_parent(start->rsc)) {
crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
continue;
} else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) {
crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
continue;
}
crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant);
for (pIter = probes; pIter != NULL; pIter = pIter->next) {
action_t *probe = (action_t *) pIter->data;
crm_err("Ordering %s before %s", first->uuid, probe->uuid);
order_actions(first, probe, pe_order_optional);
}
}
}
#endif
}
static void
order_probes(pe_working_set_t * data_set)
{
order_first_probes(data_set);
order_then_probes(data_set);
}
gboolean
stage7(pe_working_set_t * data_set)
{
GList *gIter = NULL;
crm_trace("Applying ordering constraints");
/* Don't ask me why, but apparently they need to be processed in
* the order they were created in... go figure
*
* Also g_list_append() has horrendous performance characteristics
* So we need to use g_list_prepend() and then reverse the list here
*/
data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
pe__ordering_t *order = gIter->data;
resource_t *rsc = order->lh_rsc;
crm_trace("Applying ordering constraint: %d", order->id);
if (rsc != NULL) {
crm_trace("rsc_action-to-*");
rsc_order_first(rsc, order, data_set);
continue;
}
rsc = order->rh_rsc;
if (rsc != NULL) {
crm_trace("action-to-rsc_action");
rsc_order_then(order->lh_action, rsc, order);
} else {
crm_trace("action-to-action");
order_actions(order->lh_action, order->rh_action, order->type);
}
}
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
update_colo_start_chain(action, data_set);
}
crm_trace("Ordering probes");
order_probes(data_set);
crm_trace("Updating %d actions", g_list_length(data_set->actions));
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
update_action(action, data_set);
}
// Check for invalid orderings
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
pe_action_wrapper_t *input = NULL;
for (GList *input_iter = action->actions_before;
input_iter != NULL; input_iter = input_iter->next) {
input = (pe_action_wrapper_t *) input_iter->data;
if (pcmk__ordering_is_invalid(action, input)) {
input->type = pe_order_none;
}
}
}
LogNodeActions(data_set, FALSE);
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
LogActions(rsc, data_set, FALSE);
}
return TRUE;
}
static int transition_id = -1;
/*!
* \internal
* \brief Log a message after calculating a transition
*
* \param[in] filename Where transition input is stored
*/
void
pcmk__log_transition_summary(const char *filename)
{
if (was_processing_error) {
crm_err("Calculated transition %d (with errors), saving inputs in %s",
transition_id, filename);
} else if (was_processing_warning) {
crm_warn("Calculated transition %d (with warnings), saving inputs in %s",
transition_id, filename);
} else {
crm_notice("Calculated transition %d, saving inputs in %s",
transition_id, filename);
}
if (crm_config_error) {
crm_notice("Configuration errors found during scheduler processing,"
" please run \"crm_verify -L\" to identify issues");
}
}
/*
* Create a dependency graph to send to the transitioner (via the controller)
*/
gboolean
stage8(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
const char *value = NULL;
transition_id++;
crm_trace("Creating transition graph %d.", transition_id);
data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
value = pe_pref(data_set->config_hash, "cluster-delay");
crm_xml_add(data_set->graph, "cluster-delay", value);
value = pe_pref(data_set->config_hash, "stonith-timeout");
crm_xml_add(data_set->graph, "stonith-timeout", value);
crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
if (is_set(data_set->flags, pe_flag_start_failure_fatal)) {
crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
} else {
crm_xml_add(data_set->graph, "failed-start-offset", "1");
}
value = pe_pref(data_set->config_hash, "batch-limit");
crm_xml_add(data_set->graph, "batch-limit", value);
crm_xml_add_int(data_set->graph, "transition_id", transition_id);
value = pe_pref(data_set->config_hash, "migration-limit");
if (crm_int_helper(value, NULL) > 0) {
crm_xml_add(data_set->graph, "migration-limit", value);
}
if (data_set->recheck_by > 0) {
char *recheck_epoch = NULL;
recheck_epoch = crm_strdup_printf("%llu",
(long long) data_set->recheck_by);
crm_xml_add(data_set->graph, "recheck-by", recheck_epoch);
free(recheck_epoch);
}
/* errors...
slist_iter(action, action_t, action_list, lpc,
if(action->optional == FALSE && action->runnable == FALSE) {
print_action("Ignoring", action, TRUE);
}
);
*/
+ /* The following code will de-duplicate action inputs, so nothing past this
+ * should rely on the action input type flags retaining their original
+ * values.
+ */
+
gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
rsc->cmds->expand(rsc, data_set);
}
crm_log_xml_trace(data_set->graph, "created resource-driven action list");
/* pseudo action to distribute list of nodes with maintenance state update */
add_maintenance_update(data_set);
/* catch any non-resource specific actions */
crm_trace("processing non-resource actions");
gIter = data_set->actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (action->rsc
&& action->node
&& action->node->details->shutdown
&& is_not_set(action->rsc->flags, pe_rsc_maintenance)
&& is_not_set(action->flags, pe_action_optional)
&& is_not_set(action->flags, pe_action_runnable)
&& crm_str_eq(action->task, RSC_STOP, TRUE)
) {
/* Eventually we should just ignore the 'fence' case
* But for now it's the best way to detect (in CTS) when
* CIB resource updates are being lost
*/
if (is_set(data_set->flags, pe_flag_have_quorum)
|| data_set->no_quorum_policy == no_quorum_ignore) {
crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)",
action->node->details->unclean ? "fence" : "shut down",
action->node->details->uname, action->rsc->id,
is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked",
is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "",
action->uuid);
}
}
graph_element_from_action(action, data_set);
}
crm_log_xml_trace(data_set->graph, "created generic action list");
crm_trace("Created transition graph %d.", transition_id);
return TRUE;
}
void
LogNodeActions(pe_working_set_t * data_set, gboolean terminal)
{
GListPtr gIter = NULL;
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
char *node_name = NULL;
char *task = NULL;
action_t *action = (action_t *) gIter->data;
if (action->rsc != NULL) {
continue;
} else if (is_set(action->flags, pe_action_optional)) {
continue;
}
if (pe__is_guest_node(action->node)) {
node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id);
} else if(action->node) {
node_name = crm_strdup_printf("%s", action->node->details->uname);
}
if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
task = strdup("Shutdown");
} else if (safe_str_eq(action->task, CRM_OP_FENCE)) {
const char *op = g_hash_table_lookup(action->meta, "stonith_action");
task = crm_strdup_printf("Fence (%s)", op);
}
if(task == NULL) {
/* Nothing to report */
} else if(terminal && action->reason) {
printf(" * %s %s '%s'\n", task, node_name, action->reason);
} else if(terminal) {
printf(" * %s %s\n", task, node_name);
} else if(action->reason) {
crm_notice(" * %s %s '%s'\n", task, node_name, action->reason);
} else {
crm_notice(" * %s %s\n", task, node_name);
}
free(node_name);
free(task);
}
}
diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_sched_graph.c
index 5a7580acc1..744d9a540f 100644
--- a/lib/pacemaker/pcmk_sched_graph.c
+++ b/lib/pacemaker/pcmk_sched_graph.c
@@ -1,1768 +1,1808 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <glib.h>
#include <pacemaker-internal.h>
void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set);
gboolean rsc_update_action(action_t * first, action_t * then, enum pe_ordering type);
static enum pe_action_flags
get_action_flags(action_t * action, node_t * node)
{
enum pe_action_flags flags = action->flags;
if (action->rsc) {
flags = action->rsc->cmds->action_flags(action, NULL);
if (pe_rsc_is_clone(action->rsc) && node) {
/* We only care about activity on $node */
enum pe_action_flags clone_flags = action->rsc->cmds->action_flags(action, node);
/* Go to great lengths to ensure the correct value for pe_action_runnable...
*
* If we are a clone, then for _ordering_ constraints, it's only relevant
* if we are runnable _anywhere_.
*
* This only applies to _runnable_ though, and only for ordering constraints.
* If this function is ever used during colocation, then we'll need additional logic
*
* Not very satisfying, but it's logical and appears to work well.
*/
if (is_not_set(clone_flags, pe_action_runnable)
&& is_set(flags, pe_action_runnable)) {
pe_rsc_trace(action->rsc, "Fixing up runnable flag for %s", action->uuid);
set_bit(clone_flags, pe_action_runnable);
}
flags = clone_flags;
}
}
return flags;
}
static char *
convert_non_atomic_uuid(char *old_uuid, resource_t * rsc, gboolean allow_notify,
gboolean free_original)
{
guint interval_ms = 0;
char *uuid = NULL;
char *rid = NULL;
char *raw_task = NULL;
int task = no_action;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Processing %s", old_uuid);
if (old_uuid == NULL) {
return NULL;
} else if (strstr(old_uuid, "notify") != NULL) {
goto done; /* no conversion */
} else if (rsc->variant < pe_group) {
goto done; /* no conversion */
}
CRM_ASSERT(parse_op_key(old_uuid, &rid, &raw_task, &interval_ms));
if (interval_ms > 0) {
goto done; /* no conversion */
}
task = text2task(raw_task);
switch (task) {
case stop_rsc:
case start_rsc:
case action_notify:
case action_promote:
case action_demote:
break;
case stopped_rsc:
case started_rsc:
case action_notified:
case action_promoted:
case action_demoted:
task--;
break;
case monitor_rsc:
case shutdown_crm:
case stonith_node:
task = no_action;
break;
default:
crm_err("Unknown action: %s", raw_task);
task = no_action;
break;
}
if (task != no_action) {
if (is_set(rsc->flags, pe_rsc_notify) && allow_notify) {
uuid = generate_notify_key(rid, "confirmed-post", task2text(task + 1));
} else {
uuid = generate_op_key(rid, task2text(task + 1), 0);
}
pe_rsc_trace(rsc, "Converted %s -> %s", old_uuid, uuid);
}
done:
if (uuid == NULL) {
uuid = strdup(old_uuid);
}
if (free_original) {
free(old_uuid);
}
free(raw_task);
free(rid);
return uuid;
}
static action_t *
rsc_expand_action(action_t * action)
{
gboolean notify = FALSE;
action_t *result = action;
resource_t *rsc = action->rsc;
if (rsc == NULL) {
return action;
}
if ((rsc->parent == NULL)
|| (pe_rsc_is_clone(rsc) && (rsc->parent->variant == pe_container))) {
/* Only outermost resources have notification actions.
* The exception is those in bundles.
*/
notify = is_set(rsc->flags, pe_rsc_notify);
}
if (rsc->variant >= pe_group) {
/* Expand 'start' -> 'started' */
char *uuid = NULL;
uuid = convert_non_atomic_uuid(action->uuid, rsc, notify, FALSE);
if (uuid) {
pe_rsc_trace(rsc, "Converting %s to %s %d", action->uuid, uuid,
is_set(rsc->flags, pe_rsc_notify));
result = find_first_action(rsc->actions, uuid, NULL, NULL);
if (result == NULL) {
crm_err("Couldn't expand %s to %s in %s", action->uuid, uuid, rsc->id);
result = action;
}
free(uuid);
}
}
return result;
}
static enum pe_graph_flags
graph_update_action(action_t * first, action_t * then, node_t * node,
enum pe_action_flags first_flags, enum pe_action_flags then_flags,
action_wrapper_t *order, pe_working_set_t *data_set)
{
enum pe_graph_flags changed = pe_graph_none;
enum pe_ordering type = order->type;
gboolean processed = FALSE;
/* TODO: Do as many of these in parallel as possible */
if(is_set(type, pe_order_implies_then_on_node)) {
/* Normally we want the _whole_ 'then' clone to
* restart if 'first' is restarted, so then->node is
* needed.
*
* However for unfencing, we want to limit this to
* instances on the same node as 'first' (the
* unfencing operation), so first->node is supplied.
*
* Swap the node, from then on we can can treat it
* like any other 'pe_order_implies_then'
*/
clear_bit(type, pe_order_implies_then_on_node);
set_bit(type, pe_order_implies_then);
node = first->node;
}
clear_bit(first_flags, pe_action_pseudo);
if (type & pe_order_implies_then) {
processed = TRUE;
if (then->rsc) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags & pe_action_optional, pe_action_optional,
pe_order_implies_then, data_set);
} else if (is_set(first_flags, pe_action_optional) == FALSE) {
if (update_action_flags(then, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__)) {
changed |= pe_graph_updated_then;
}
}
if (changed) {
pe_rsc_trace(then->rsc, "implies right: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("implies right: %s then %s %p", first->uuid, then->uuid, then->rsc);
}
}
if ((type & pe_order_restart) && then->rsc) {
enum pe_action_flags restart = (pe_action_optional | pe_action_runnable);
processed = TRUE;
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, restart,
pe_order_restart, data_set);
if (changed) {
pe_rsc_trace(then->rsc, "restart: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("restart: %s then %s", first->uuid, then->uuid);
}
}
if (type & pe_order_implies_first) {
processed = TRUE;
if (first->rsc) {
changed |= first->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_optional, pe_order_implies_first,
data_set);
} else if (is_set(first_flags, pe_action_optional) == FALSE) {
pe_rsc_trace(first->rsc, "first unrunnable: %s (%d) then %s (%d)",
first->uuid, is_set(first_flags, pe_action_optional),
then->uuid, is_set(then_flags, pe_action_optional));
if (update_action_flags(first, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__)) {
changed |= pe_graph_updated_first;
}
}
if (changed) {
pe_rsc_trace(then->rsc, "implies left: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("implies left: %s (%d) then %s (%d)",
first->uuid, is_set(first_flags, pe_action_optional),
then->uuid, is_set(then_flags, pe_action_optional));
}
}
if (type & pe_order_implies_first_master) {
processed = TRUE;
if (then->rsc) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags & pe_action_optional, pe_action_optional,
pe_order_implies_first_master, data_set);
}
if (changed) {
pe_rsc_trace(then->rsc,
"implies left when right rsc is Master role: %s then %s: changed",
first->uuid, then->uuid);
} else {
crm_trace("implies left when right rsc is Master role: %s then %s", first->uuid,
then->uuid);
}
}
if (type & pe_order_one_or_more) {
processed = TRUE;
if (then->rsc) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_runnable, pe_order_one_or_more,
data_set);
} else if (is_set(first_flags, pe_action_runnable)) {
/* alright. a "first" action is considered runnable, incremente
* the 'runnable_before' counter */
then->runnable_before++;
/* if the runnable before count for then exceeds the required number
* of "before" runnable actions... mark then as runnable */
if (then->runnable_before >= then->required_runnable_before) {
if (update_action_flags(then, pe_action_runnable, __FUNCTION__, __LINE__)) {
changed |= pe_graph_updated_then;
}
}
}
if (changed) {
pe_rsc_trace(then->rsc, "runnable_one_or_more: %s then %s: changed", first->uuid,
then->uuid);
} else {
crm_trace("runnable_one_or_more: %s then %s", first->uuid, then->uuid);
}
}
if (then->rsc && is_set(type, pe_order_probe)) {
processed = TRUE;
if (is_not_set(first_flags, pe_action_runnable) && first->rsc->running_on != NULL) {
pe_rsc_trace(then->rsc, "Ignoring %s then %s - %s is about to be stopped",
first->uuid, then->uuid, first->rsc->id);
type = pe_order_none;
order->type = pe_order_none;
} else {
pe_rsc_trace(then->rsc, "Enforcing %s then %s", first->uuid, then->uuid);
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_runnable, pe_order_runnable_left,
data_set);
}
if (changed) {
pe_rsc_trace(then->rsc, "runnable: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("runnable: %s then %s", first->uuid, then->uuid);
}
}
if (type & pe_order_runnable_left) {
processed = TRUE;
if (then->rsc) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_runnable, pe_order_runnable_left,
data_set);
} else if (is_set(first_flags, pe_action_runnable) == FALSE) {
pe_rsc_trace(then->rsc, "then unrunnable: %s then %s", first->uuid, then->uuid);
if (update_action_flags(then, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__)) {
changed |= pe_graph_updated_then;
}
}
if (changed) {
pe_rsc_trace(then->rsc, "runnable: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("runnable: %s then %s", first->uuid, then->uuid);
}
}
if (type & pe_order_implies_first_migratable) {
processed = TRUE;
if (then->rsc) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_optional,
pe_order_implies_first_migratable, data_set);
}
if (changed) {
pe_rsc_trace(then->rsc, "optional: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("optional: %s then %s", first->uuid, then->uuid);
}
}
if (type & pe_order_pseudo_left) {
processed = TRUE;
if (then->rsc) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_optional, pe_order_pseudo_left,
data_set);
}
if (changed) {
pe_rsc_trace(then->rsc, "optional: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("optional: %s then %s", first->uuid, then->uuid);
}
}
if (type & pe_order_optional) {
processed = TRUE;
if (then->rsc) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_runnable, pe_order_optional, data_set);
}
if (changed) {
pe_rsc_trace(then->rsc, "optional: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("optional: %s then %s", first->uuid, then->uuid);
}
}
if (type & pe_order_asymmetrical) {
processed = TRUE;
if (then->rsc) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_runnable, pe_order_asymmetrical,
data_set);
}
if (changed) {
pe_rsc_trace(then->rsc, "asymmetrical: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("asymmetrical: %s then %s", first->uuid, then->uuid);
}
}
if ((first->flags & pe_action_runnable) && (type & pe_order_implies_then_printed)
&& (first_flags & pe_action_optional) == 0) {
processed = TRUE;
crm_trace("%s implies %s printed", first->uuid, then->uuid);
update_action_flags(then, pe_action_print_always, __FUNCTION__, __LINE__); /* don't care about changed */
}
if (is_set(type, pe_order_implies_first_printed) && is_set(then_flags, pe_action_optional) == FALSE) {
processed = TRUE;
crm_trace("%s implies %s printed", then->uuid, first->uuid);
update_action_flags(first, pe_action_print_always, __FUNCTION__, __LINE__); /* don't care about changed */
}
if ((type & pe_order_implies_then
|| type & pe_order_implies_first
|| type & pe_order_restart)
&& first->rsc
&& safe_str_eq(first->task, RSC_STOP)
&& is_not_set(first->rsc->flags, pe_rsc_managed)
&& is_set(first->rsc->flags, pe_rsc_block)
&& is_not_set(first->flags, pe_action_runnable)) {
if (update_action_flags(then, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__)) {
changed |= pe_graph_updated_then;
}
if (changed) {
pe_rsc_trace(then->rsc, "unmanaged left: %s then %s: changed", first->uuid, then->uuid);
} else {
crm_trace("unmanaged left: %s then %s", first->uuid, then->uuid);
}
}
if (processed == FALSE) {
crm_trace("Constraint 0x%.6x not applicable", type);
}
return changed;
}
static void
mark_start_blocked(pe_resource_t *rsc, pe_resource_t *reason,
pe_working_set_t *data_set)
{
GListPtr gIter = rsc->actions;
char *reason_text = crm_strdup_printf("colocation with %s", reason->id);
for (; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (safe_str_neq(action->task, RSC_START)) {
continue;
}
if (is_set(action->flags, pe_action_runnable)) {
pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, reason_text, pe_action_runnable, FALSE);
update_colo_start_chain(action, data_set);
update_action(action, data_set);
}
}
free(reason_text);
}
void
update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set)
{
GListPtr gIter = NULL;
resource_t *rsc = NULL;
if (is_not_set(action->flags, pe_action_runnable) && safe_str_eq(action->task, RSC_START)) {
rsc = uber_parent(action->rsc);
if (rsc->parent) {
/* For bundles, uber_parent() returns the clone/master, not the
* bundle, so the existence of rsc->parent implies this is a bundle.
* In this case, we need the bundle resource, so that we can check
* if all containers are stopped/stopping.
*/
rsc = rsc->parent;
}
}
if (rsc == NULL || rsc->rsc_cons_lhs == NULL) {
return;
}
/* if rsc has children, all the children need to have start set to
* unrunnable before we follow the colo chain for the parent. */
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *)gIter->data;
action_t *start = find_first_action(child->actions, NULL, RSC_START, NULL);
if (start == NULL || is_set(start->flags, pe_action_runnable)) {
return;
}
}
for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
rsc_colocation_t *colocate_with = (rsc_colocation_t *)gIter->data;
if (colocate_with->score == INFINITY) {
mark_start_blocked(colocate_with->rsc_lh, action->rsc, data_set);
}
}
}
gboolean
update_action(pe_action_t *then, pe_working_set_t *data_set)
{
GListPtr lpc = NULL;
enum pe_graph_flags changed = pe_graph_none;
int last_flags = then->flags;
crm_trace("Processing %s (%s %s %s)",
then->uuid,
is_set(then->flags, pe_action_optional) ? "optional" : "required",
is_set(then->flags, pe_action_runnable) ? "runnable" : "unrunnable",
is_set(then->flags,
pe_action_pseudo) ? "pseudo" : then->node ? then->node->details->uname : "");
if (is_set(then->flags, pe_action_requires_any)) {
/* initialize current known runnable before actions to 0
* from here as graph_update_action is called for each of
* then's before actions, this number will increment as
* runnable 'first' actions are encountered */
then->runnable_before = 0;
/* for backwards compatibility with previous options that use
* the 'requires_any' flag, initialize required to 1 if it is
* not set. */
if (then->required_runnable_before == 0) {
then->required_runnable_before = 1;
}
pe_clear_action_bit(then, pe_action_runnable);
/* We are relying on the pe_order_one_or_more clause of
* graph_update_action(), called as part of the:
*
* 'if (first == other->action)'
*
* block below, to set this back if appropriate
*/
}
for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) {
action_wrapper_t *other = (action_wrapper_t *) lpc->data;
action_t *first = other->action;
node_t *then_node = then->node;
node_t *first_node = first->node;
enum pe_action_flags then_flags = 0;
enum pe_action_flags first_flags = 0;
if (first->rsc && first->rsc->variant == pe_group && safe_str_eq(first->task, RSC_START)) {
first_node = first->rsc->fns->location(first->rsc, NULL, FALSE);
if (first_node) {
crm_trace("First: Found node %s for %s", first_node->details->uname, first->uuid);
}
}
if (then->rsc && then->rsc->variant == pe_group && safe_str_eq(then->task, RSC_START)) {
then_node = then->rsc->fns->location(then->rsc, NULL, FALSE);
if (then_node) {
crm_trace("Then: Found node %s for %s", then_node->details->uname, then->uuid);
}
}
/* Disable constraint if it only applies when on same node, but isn't */
if (is_set(other->type, pe_order_same_node) && first_node && then_node
&& (first_node->details != then_node->details)) {
crm_trace("Disabled constraint %s on %s -> %s on %s",
other->action->uuid, first_node->details->uname,
then->uuid, then_node->details->uname);
other->type = pe_order_none;
continue;
}
clear_bit(changed, pe_graph_updated_first);
if (first->rsc && is_set(other->type, pe_order_then_cancels_first)
&& is_not_set(then->flags, pe_action_optional)) {
/* 'then' is required, so we must abandon 'first'
* (e.g. a required stop cancels any reload).
* Only used with reload actions as 'first'.
*/
set_bit(other->action->flags, pe_action_optional);
clear_bit(first->rsc->flags, pe_rsc_reload);
}
if (first->rsc && then->rsc && (first->rsc != then->rsc)
&& (is_parent(then->rsc, first->rsc) == FALSE)) {
first = rsc_expand_action(first);
}
if (first != other->action) {
crm_trace("Ordering %s after %s instead of %s", then->uuid, first->uuid,
other->action->uuid);
}
first_flags = get_action_flags(first, then_node);
then_flags = get_action_flags(then, first_node);
crm_trace("Checking %s (%s %s %s) against %s (%s %s %s) filter=0x%.6x type=0x%.6x",
then->uuid,
is_set(then_flags, pe_action_optional) ? "optional" : "required",
is_set(then_flags, pe_action_runnable) ? "runnable" : "unrunnable",
is_set(then_flags,
pe_action_pseudo) ? "pseudo" : then->node ? then->node->details->
uname : "", first->uuid, is_set(first_flags,
pe_action_optional) ? "optional" : "required",
is_set(first_flags, pe_action_runnable) ? "runnable" : "unrunnable",
is_set(first_flags,
pe_action_pseudo) ? "pseudo" : first->node ? first->node->details->
uname : "", first_flags, other->type);
if (first == other->action) {
/*
* 'first' was not expanded (e.g. from 'start' to 'running'), which could mean it:
* - has no associated resource,
* - was a primitive,
* - was pre-expanded (e.g. 'running' instead of 'start')
*
* The third argument here to graph_update_action() is a node which is used under two conditions:
* - Interleaving, in which case first->node and
* then->node are equal (and NULL)
* - If 'then' is a clone, to limit the scope of the
* constraint to instances on the supplied node
*
*/
node_t *node = then->node;
changed |= graph_update_action(first, then, node, first_flags,
then_flags, other, data_set);
/* 'first' was for a complex resource (clone, group, etc),
* create a new dependency if necessary
*/
} else if (order_actions(first, then, other->type)) {
/* This was the first time 'first' and 'then' were associated,
* start again to get the new actions_before list
*/
changed |= (pe_graph_updated_then | pe_graph_disable);
}
if (changed & pe_graph_disable) {
crm_trace("Disabled constraint %s -> %s in favor of %s -> %s",
other->action->uuid, then->uuid, first->uuid, then->uuid);
clear_bit(changed, pe_graph_disable);
other->type = pe_order_none;
}
if (changed & pe_graph_updated_first) {
GListPtr lpc2 = NULL;
crm_trace("Updated %s (first %s %s %s), processing dependents ",
first->uuid,
is_set(first->flags, pe_action_optional) ? "optional" : "required",
is_set(first->flags, pe_action_runnable) ? "runnable" : "unrunnable",
is_set(first->flags,
pe_action_pseudo) ? "pseudo" : first->node ? first->node->details->
uname : "");
for (lpc2 = first->actions_after; lpc2 != NULL; lpc2 = lpc2->next) {
action_wrapper_t *other = (action_wrapper_t *) lpc2->data;
update_action(other->action, data_set);
}
update_action(first, data_set);
}
}
if (is_set(then->flags, pe_action_requires_any)) {
if (last_flags != then->flags) {
changed |= pe_graph_updated_then;
} else {
clear_bit(changed, pe_graph_updated_then);
}
}
if (changed & pe_graph_updated_then) {
crm_trace("Updated %s (then %s %s %s), processing dependents ",
then->uuid,
is_set(then->flags, pe_action_optional) ? "optional" : "required",
is_set(then->flags, pe_action_runnable) ? "runnable" : "unrunnable",
is_set(then->flags,
pe_action_pseudo) ? "pseudo" : then->node ? then->node->details->
uname : "");
if (is_set(last_flags, pe_action_runnable) && is_not_set(then->flags, pe_action_runnable)) {
update_colo_start_chain(then, data_set);
}
update_action(then, data_set);
for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) {
action_wrapper_t *other = (action_wrapper_t *) lpc->data;
update_action(other->action, data_set);
}
}
return FALSE;
}
gboolean
shutdown_constraints(node_t * node, action_t * shutdown_op, pe_working_set_t * data_set)
{
/* add the stop to the before lists so it counts as a pre-req
* for the shutdown
*/
GListPtr lpc = NULL;
for (lpc = data_set->actions; lpc != NULL; lpc = lpc->next) {
action_t *action = (action_t *) lpc->data;
if (action->rsc == NULL || action->node == NULL) {
continue;
} else if (action->node->details != node->details) {
continue;
} else if (is_set(action->rsc->flags, pe_rsc_maintenance)) {
pe_rsc_trace(action->rsc, "Skipping %s: maintenance mode", action->uuid);
continue;
} else if (node->details->maintenance) {
pe_rsc_trace(action->rsc, "Skipping %s: node %s is in maintenance mode",
action->uuid, node->details->uname);
continue;
} else if (safe_str_neq(action->task, RSC_STOP)) {
continue;
} else if (is_not_set(action->rsc->flags, pe_rsc_managed)
&& is_not_set(action->rsc->flags, pe_rsc_block)) {
/*
* If another action depends on this one, we may still end up blocking
*/
pe_rsc_trace(action->rsc, "Skipping %s: unmanaged", action->uuid);
continue;
}
pe_rsc_trace(action->rsc, "Ordering %s before shutdown on %s", action->uuid,
node->details->uname);
pe_clear_action_bit(action, pe_action_optional);
custom_action_order(action->rsc, NULL, action,
NULL, strdup(CRM_OP_SHUTDOWN), shutdown_op,
pe_order_optional | pe_order_runnable_left, data_set);
}
return TRUE;
}
/*!
* \internal
* \brief Order all actions appropriately relative to a fencing operation
*
* Ensure start operations of affected resources are ordered after fencing,
* imply stop and demote operations of affected resources by marking them as
* pseudo-actions, etc.
*
* \param[in] node Node to be fenced
* \param[in] stonith_op Fencing operation
* \param[in,out] data_set Working set of cluster
*/
gboolean
stonith_constraints(node_t * node, action_t * stonith_op, pe_working_set_t * data_set)
{
GListPtr r = NULL;
CRM_CHECK(stonith_op != NULL, return FALSE);
for (r = data_set->resources; r != NULL; r = r->next) {
rsc_stonith_ordering((resource_t *) r->data, stonith_op, data_set);
}
return TRUE;
}
static node_t *
get_router_node(action_t *action)
{
node_t *began_on = NULL;
node_t *ended_on = NULL;
node_t *router_node = NULL;
bool partial_migration = FALSE;
const char *task = action->task;
if (safe_str_eq(task, CRM_OP_FENCE)
|| !pe__is_guest_or_remote_node(action->node)) {
return NULL;
}
CRM_ASSERT(action->node->details->remote_rsc != NULL);
began_on = pe__current_node(action->node->details->remote_rsc);
ended_on = action->node->details->remote_rsc->allocated_to;
if (action->node->details->remote_rsc
&& (action->node->details->remote_rsc->container == NULL)
&& action->node->details->remote_rsc->partial_migration_target) {
partial_migration = TRUE;
}
/* if there is only one location to choose from,
* this is easy. Check for those conditions first */
if (!began_on || !ended_on) {
/* remote rsc is either shutting down or starting up */
return began_on ? began_on : ended_on;
} else if (began_on->details == ended_on->details) {
/* remote rsc didn't move nodes. */
return began_on;
}
/* If we have get here, we know the remote resource
* began on one node and is moving to another node.
*
* This means some actions will get routed through the cluster
* node the connection rsc began on, and others are routed through
* the cluster node the connection rsc ends up on.
*
* 1. stop, demote, migrate actions of resources living in the remote
* node _MUST_ occur _BEFORE_ the connection can move (these actions
* are all required before the remote rsc stop action can occur.) In
* this case, we know these actions have to be routed through the initial
* cluster node the connection resource lived on before the move takes place.
* The exception is a partial migration of a (non-guest) remote
* connection resource; in that case, all actions (even these) will be
* ordered after the connection's pseudo-start on the migration target,
* so the target is the router node.
*
* 2. Everything else (start, promote, monitor, probe, refresh, clear failcount
* delete ....) must occur after the resource starts on the node it is
* moving to.
*/
if (safe_str_eq(task, "notify")) {
task = g_hash_table_lookup(action->meta, "notify_operation");
}
/* 1. before connection rsc moves. */
if ((safe_str_eq(task, "stop") ||
safe_str_eq(task, "demote") ||
safe_str_eq(task, "migrate_from") ||
safe_str_eq(task, "migrate_to")) && !partial_migration) {
router_node = began_on;
/* 2. after connection rsc moves. */
} else {
router_node = ended_on;
}
return router_node;
}
/*!
* \internal
* \brief Add an XML node tag for a specified ID
*
* \param[in] id Node UUID to add
* \param[in,out] xml Parent XML tag to add to
*/
static xmlNode*
add_node_to_xml_by_id(const char *id, xmlNode *xml)
{
xmlNode *node_xml;
node_xml = create_xml_node(xml, XML_CIB_TAG_NODE);
crm_xml_add(node_xml, XML_ATTR_UUID, id);
return node_xml;
}
/*!
* \internal
* \brief Add an XML node tag for a specified node
*
* \param[in] node Node to add
* \param[in,out] xml XML to add node to
*/
static void
add_node_to_xml(const node_t *node, void *xml)
{
add_node_to_xml_by_id(node->details->id, (xmlNode *) xml);
}
/*!
* \internal
* \brief Add XML with nodes that need an update of their maintenance state
*
* \param[in,out] xml Parent XML tag to add to
* \param[in] data_set Working set for cluster
*/
static int
add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set)
{
GListPtr gIter = NULL;
xmlNode *maintenance =
xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL;
int count = 0;
for (gIter = data_set->nodes; gIter != NULL;
gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
struct pe_node_shared_s *details = node->details;
if (!pe__is_guest_or_remote_node(node)) {
continue; /* just remote nodes need to know atm */
}
if (details->maintenance != details->remote_maintenance) {
if (maintenance) {
crm_xml_add(
add_node_to_xml_by_id(node->details->id, maintenance),
XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0");
}
count++;
}
}
crm_trace("%s %d nodes to adjust maintenance-mode "
"to transition", maintenance?"Added":"Counted", count);
return count;
}
/*!
* \internal
* \brief Add pseudo action with nodes needing maintenance state update
*
* \param[in,out] data_set Working set for cluster
*/
void
add_maintenance_update(pe_working_set_t *data_set)
{
action_t *action = NULL;
if (add_maintenance_nodes(NULL, data_set)) {
crm_trace("adding maintenance state update pseudo action");
action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set);
set_bit(action->flags, pe_action_print_always);
}
}
/*!
* \internal
* \brief Add XML with nodes that an action is expected to bring down
*
* If a specified action is expected to bring any nodes down, add an XML block
* with their UUIDs. When a node is lost, this allows the controller to
* determine whether it was expected.
*
* \param[in,out] xml Parent XML tag to add to
* \param[in] action Action to check for downed nodes
* \param[in] data_set Working set for cluster
*/
static void
add_downed_nodes(xmlNode *xml, const action_t *action,
const pe_working_set_t *data_set)
{
CRM_CHECK(xml && action && action->node && data_set, return);
if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
/* Shutdown makes the action's node down */
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
} else if (safe_str_eq(action->task, CRM_OP_FENCE)) {
/* Fencing makes the action's node and any hosted guest nodes down */
const char *fence = g_hash_table_lookup(action->meta, "stonith_action");
if (safe_str_eq(fence, "off") || safe_str_eq(fence, "reboot")) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed);
}
} else if (action->rsc && action->rsc->is_remote_node
&& safe_str_eq(action->task, CRMD_ACTION_STOP)) {
/* Stopping a remote connection resource makes connected node down,
* unless it's part of a migration
*/
GListPtr iter;
action_t *input;
gboolean migrating = FALSE;
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
input = ((action_wrapper_t *) iter->data)->action;
if (input->rsc && safe_str_eq(action->rsc->id, input->rsc->id)
&& safe_str_eq(input->task, CRMD_ACTION_MIGRATED)) {
migrating = TRUE;
break;
}
}
if (!migrating) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->rsc->id, downed);
}
}
}
static xmlNode *
action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
{
gboolean needs_node_info = TRUE;
gboolean needs_maintenance_info = FALSE;
xmlNode *action_xml = NULL;
xmlNode *args_xml = NULL;
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *rsc_details = NULL;
#endif
if (action == NULL) {
return NULL;
}
if (safe_str_eq(action->task, CRM_OP_FENCE)) {
/* All fences need node info; guest node fences are pseudo-events */
action_xml = create_xml_node(NULL,
is_set(action->flags, pe_action_pseudo)?
XML_GRAPH_TAG_PSEUDO_EVENT :
XML_GRAPH_TAG_CRM_EVENT);
} else if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
} else if (safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) {
action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
} else if (safe_str_eq(action->task, CRM_OP_LRM_REFRESH)) {
action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
/* } else if(safe_str_eq(action->task, RSC_PROBED)) { */
/* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */
} else if (is_set(action->flags, pe_action_pseudo)) {
if (safe_str_eq(action->task, CRM_OP_MAINTENANCE_NODES)) {
needs_maintenance_info = TRUE;
}
action_xml = create_xml_node(NULL, XML_GRAPH_TAG_PSEUDO_EVENT);
needs_node_info = FALSE;
} else {
action_xml = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP);
#if ENABLE_VERSIONED_ATTRS
rsc_details = pe_rsc_action_details(action);
#endif
}
crm_xml_add_int(action_xml, XML_ATTR_ID, action->id);
crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task);
if (action->rsc != NULL && action->rsc->clone_name != NULL) {
char *clone_key = NULL;
const char *interval_ms_s = g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS);
guint interval_ms = crm_parse_ms(interval_ms_s);
if (safe_str_eq(action->task, RSC_NOTIFY)) {
const char *n_type = g_hash_table_lookup(action->meta, "notify_type");
const char *n_task = g_hash_table_lookup(action->meta, "notify_operation");
CRM_CHECK(n_type != NULL, crm_err("No notify type value found for %s", action->uuid));
CRM_CHECK(n_task != NULL,
crm_err("No notify operation value found for %s", action->uuid));
clone_key = generate_notify_key(action->rsc->clone_name, n_type, n_task);
} else if(action->cancel_task) {
clone_key = generate_op_key(action->rsc->clone_name,
action->cancel_task, interval_ms);
} else {
clone_key = generate_op_key(action->rsc->clone_name,
action->task, interval_ms);
}
CRM_CHECK(clone_key != NULL, crm_err("Could not generate a key for %s", action->uuid));
crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key);
crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid);
free(clone_key);
} else {
crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid);
}
if (needs_node_info && action->node != NULL) {
node_t *router_node = get_router_node(action);
crm_xml_add(action_xml, XML_LRM_ATTR_TARGET, action->node->details->uname);
crm_xml_add(action_xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id);
if (router_node) {
crm_xml_add(action_xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname);
}
g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET), strdup(action->node->details->uname));
g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID), strdup(action->node->details->id));
}
/* No details if this action is only being listed in the inputs section */
if (as_input) {
return action_xml;
}
/* List affected resource */
if (action->rsc) {
if (is_set(action->flags, pe_action_pseudo) == FALSE) {
int lpc = 0;
xmlNode *rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml));
const char *attr_list[] = {
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER,
XML_ATTR_TYPE
};
if (is_set(action->rsc->flags, pe_rsc_orphan) && action->rsc->clone_name) {
/* Do not use the 'instance free' name here as that
* might interfere with the instance we plan to keep.
* Ie. if there are more than two named /anonymous/
* instances on a given node, we need to make sure the
* command goes to the right one.
*
* Keep this block, even when everyone is using
* 'instance free' anonymous clone names - it means
* we'll do the right thing if anyone toggles the
* unique flag to 'off'
*/
crm_debug("Using orphan clone name %s instead of %s", action->rsc->id,
action->rsc->clone_name);
crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name);
crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
} else if (is_not_set(action->rsc->flags, pe_rsc_unique)) {
const char *xml_id = ID(action->rsc->xml);
crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id,
action->rsc->clone_name);
/* ID is what we'd like client to use
* ID_LONG is what they might know it as instead
*
* ID_LONG is only strictly needed /here/ during the
* transition period until all nodes in the cluster
* are running the new software /and/ have rebooted
* once (meaning that they've only ever spoken to a DC
* supporting this feature).
*
* If anyone toggles the unique flag to 'on', the
* 'instance free' name will correspond to an orphan
* and fall into the clause above instead
*/
crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id);
if (action->rsc->clone_name && safe_str_neq(xml_id, action->rsc->clone_name)) {
crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name);
} else {
crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
}
} else {
CRM_ASSERT(action->rsc->clone_name == NULL);
crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id);
}
for (lpc = 0; lpc < DIMOF(attr_list); lpc++) {
crm_xml_add(rsc_xml, attr_list[lpc],
g_hash_table_lookup(action->rsc->meta, attr_list[lpc]));
}
}
}
/* List any attributes in effect */
args_xml = create_xml_node(NULL, XML_TAG_ATTRS);
crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
g_hash_table_foreach(action->extra, hash2field, args_xml);
if (action->rsc != NULL && action->node) {
GHashTable *p = crm_str_table_new();
get_rsc_attributes(p, action->rsc, action->node, data_set);
g_hash_table_foreach(p, hash2smartfield, args_xml);
g_hash_table_destroy(p);
#if ENABLE_VERSIONED_ATTRS
{
xmlNode *versioned_parameters = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS);
pe_get_versioned_attributes(versioned_parameters, action->rsc,
action->node, data_set);
if (xml_has_children(versioned_parameters)) {
add_node_copy(action_xml, versioned_parameters);
}
free_xml(versioned_parameters);
}
#endif
} else if(action->rsc && action->rsc->variant <= pe_native) {
g_hash_table_foreach(action->rsc->parameters, hash2smartfield, args_xml);
#if ENABLE_VERSIONED_ATTRS
if (xml_has_children(action->rsc->versioned_parameters)) {
add_node_copy(action_xml, action->rsc->versioned_parameters);
}
#endif
}
#if ENABLE_VERSIONED_ATTRS
if (rsc_details) {
if (xml_has_children(rsc_details->versioned_parameters)) {
add_node_copy(action_xml, rsc_details->versioned_parameters);
}
if (xml_has_children(rsc_details->versioned_meta)) {
add_node_copy(action_xml, rsc_details->versioned_meta);
}
}
#endif
g_hash_table_foreach(action->meta, hash2metafield, args_xml);
if (action->rsc != NULL) {
const char *value = g_hash_table_lookup(action->rsc->meta, "external-ip");
resource_t *parent = action->rsc;
while (parent != NULL) {
parent->cmds->append_meta(parent, args_xml);
parent = parent->parent;
}
if(value) {
hash2smartfield((gpointer)"pcmk_external_ip", (gpointer)value, (gpointer)args_xml);
}
if (pe__is_guest_node(action->node)) {
pe_node_t *host = NULL;
enum action_tasks task = text2task(action->task);
if(task == action_notify || task == action_notified) {
const char *n_task = g_hash_table_lookup(action->meta, "notify_operation");
task = text2task(n_task);
}
// Differentiate between up and down actions
switch (task) {
case stop_rsc:
case stopped_rsc:
case action_demote:
case action_demoted:
host = pe__current_node(action->node->details->remote_rsc->container);
break;
case start_rsc:
case started_rsc:
case monitor_rsc:
case action_promote:
case action_promoted:
host = action->node->details->remote_rsc->container->allocated_to;
break;
default:
break;
}
if(host) {
hash2metafield((gpointer)XML_RSC_ATTR_TARGET,
(gpointer)g_hash_table_lookup(action->rsc->meta, XML_RSC_ATTR_TARGET), (gpointer)args_xml);
hash2metafield((gpointer)PCMK_ENV_PHYSICAL_HOST, (gpointer)host->details->uname, (gpointer)args_xml);
}
}
} else if (safe_str_eq(action->task, CRM_OP_FENCE) && action->node) {
/* Pass the node's attributes as meta-attributes.
*
* @TODO: Determine whether it is still necessary to do this. It was
* added in 33d99707, probably for the libfence-based implementation in
* c9a90bd, which is no longer used.
*/
g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml);
}
sorted_xml(args_xml, action_xml, FALSE);
free_xml(args_xml);
/* List any nodes this action is expected to make down */
if (needs_node_info && (action->node != NULL)) {
add_downed_nodes(action_xml, action, data_set);
}
if (needs_maintenance_info) {
add_maintenance_nodes(action_xml, data_set);
}
crm_log_xml_trace(action_xml, "dumped action");
return action_xml;
}
static bool
should_dump_action(pe_action_t *action)
{
CRM_CHECK(action != NULL, return false);
if (is_set(action->flags, pe_action_dumped)) {
crm_trace("Action %s (%d) already dumped", action->uuid, action->id);
return false;
} else if (is_set(action->flags, pe_action_pseudo)
&& safe_str_eq(action->task, CRM_OP_PROBED)) {
GListPtr lpc = NULL;
/* This is a horrible but convenient hack
*
* It mimimizes the number of actions with unsatisfied inputs
* (i.e. not included in the graph)
*
* This in turn, means we can be more concise when printing
* aborted/incomplete graphs.
*
* It also makes it obvious which node is preventing
* probe_complete from running (presumably because it is only
* partially up)
*
* For these reasons we tolerate such perversions
*/
for (lpc = action->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *wrapper = (pe_action_wrapper_t *) lpc->data;
if (is_not_set(wrapper->action->flags, pe_action_runnable)) {
/* Only interested in runnable operations */
} else if (safe_str_neq(wrapper->action->task, RSC_START)) {
/* Only interested in start operations */
} else if (is_set(wrapper->action->flags, pe_action_dumped)
|| should_dump_action(wrapper->action)) {
crm_trace("Action %s (%d) should be dumped: "
"dependency of %s (%d)",
action->uuid, action->id,
wrapper->action->uuid, wrapper->action->id);
return true;
}
}
}
if (is_not_set(action->flags, pe_action_runnable)) {
crm_trace("Ignoring action %s (%d): unrunnable",
action->uuid, action->id);
return false;
} else if (is_set(action->flags, pe_action_optional)
&& is_not_set(action->flags, pe_action_print_always)) {
crm_trace("Ignoring action %s (%d): optional",
action->uuid, action->id);
return false;
// Monitors should be dumped even for unmanaged resources
} else if (action->rsc && is_not_set(action->rsc->flags, pe_rsc_managed)
&& safe_str_neq(action->task, RSC_STATUS)) {
const char *interval_ms_s = g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS);
// Cancellation of recurring monitors should still be dumped
if ((interval_ms_s == NULL) || !strcmp(interval_ms_s, "0")) {
crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)",
action->uuid, action->id, action->rsc->id);
return false;
}
}
if (is_set(action->flags, pe_action_pseudo)
|| safe_str_eq(action->task, CRM_OP_FENCE)
|| safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
/* skip the next checks */
return true;
}
if (action->node == NULL) {
pe_err("Skipping action %s (%d) "
"because it was not allocated to a node (bug?)",
action->uuid, action->id);
log_action(LOG_DEBUG, "Unallocated action", action, false);
return false;
} else if (pe__is_guest_node(action->node)
&& !action->node->details->remote_requires_reset) {
crm_trace("Action %s (%d) should be dumped: "
"assuming will be runnable on guest node %s",
action->uuid, action->id, action->node->details->uname);
} else if (action->node->details->online == false) {
pe_err("Skipping action %s (%d) "
"because it was scheduled for offline node (bug?)",
action->uuid, action->id);
log_action(LOG_DEBUG, "Action for offline node", action, FALSE);
return false;
#if 0
/* but this would also affect resources that can be safely
* migrated before a fencing op
*/
} else if (action->node->details->unclean == false) {
pe_err("Skipping action %s (%d) "
"because it was scheduled for unclean node (bug?)",
action->uuid, action->id);
log_action(LOG_DEBUG, "Action for unclean node", action, false);
return false;
#endif
}
return true;
}
/* lowest to highest */
static gint
sort_action_id(gconstpointer a, gconstpointer b)
{
const action_wrapper_t *action_wrapper2 = (const action_wrapper_t *)a;
const action_wrapper_t *action_wrapper1 = (const action_wrapper_t *)b;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (action_wrapper1->action->id > action_wrapper2->action->id) {
return -1;
}
if (action_wrapper1->action->id < action_wrapper2->action->id) {
return 1;
}
return 0;
}
+/*!
+ * \internal
+ * \brief Check whether an action input should be in the transition graph
+ *
+ * \param[in] action Action to check
+ * \param[in,out] input Action input to check
+ *
+ * \return true if input should be in graph, false otherwise
+ * \note This function may not only check an input, but disable it under certian
+ * circumstances (load or anti-colocation orderings that are not needed).
+ */
static bool
-check_dump_input(int last_action, pe_action_t *action,
- pe_action_wrapper_t *input)
+check_dump_input(pe_action_t *action, pe_action_wrapper_t *input)
{
int type = input->type;
if (input->state == pe_link_dumped) {
return true;
-
- } else if (input->state == pe_link_dup) {
- return false;
}
type &= ~pe_order_implies_first_printed;
type &= ~pe_order_implies_then_printed;
type &= ~pe_order_optional;
- if (last_action == input->action->id) {
- crm_trace("Ignoring %s (%d) input %s (%d): "
- "duplicated",
- action->uuid, action->id,
- input->action->uuid, input->action->id);
- input->state = pe_link_dup;
- return false;
-
- } else if (input->type == pe_order_none) {
+ if (input->type == pe_order_none) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"ordering disabled",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (is_not_set(input->action->flags, pe_action_runnable)
&& (type == pe_order_none)
&& safe_str_neq(input->action->uuid, CRM_OP_PROBED)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional and input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (is_not_set(input->action->flags, pe_action_runnable)
&& is_set(input->type, pe_order_one_or_more)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"one-or-more and input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (is_set(action->flags, pe_action_pseudo)
&& is_set(input->type, pe_order_stonith_stop)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"stonith stop but action is pseudo",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (is_set(input->type, pe_order_implies_first_migratable)
&& is_not_set(input->action->flags, pe_action_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"implies input migratable but input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (is_set(input->type, pe_order_apply_first_non_migratable)
&& is_set(input->action->flags, pe_action_migrate_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"only if input unmigratable but input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if ((input->type == pe_order_optional)
&& is_set(input->action->flags, pe_action_migrate_runnable)
&& crm_ends_with(input->action->uuid, "_stop_0")) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional but stop in migration",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (input->type == pe_order_load) {
pe_node_t *input_node = input->action->node;
// load orderings are relevant only if actions are for same node
if (action->rsc && safe_str_eq(action->task, RSC_MIGRATE)) {
pe_node_t *allocated = action->rsc->allocated_to;
/* For load_stopped -> migrate_to orderings, we care about where it
* has been allocated to, not where it will be executed.
*/
if ((input_node == NULL) || (allocated == NULL)
|| (input_node->details != allocated->details)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"load ordering node mismatch %s vs %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
(allocated? allocated->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = pe_order_none;
return false;
}
} else if ((input_node == NULL) || (action->node == NULL)
|| (input_node->details != action->node->details)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"load ordering node mismatch %s vs %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
(action->node? action->node->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = pe_order_none;
return false;
} else if (is_set(input->action->flags, pe_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"load ordering input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = pe_order_none;
return false;
}
} else if (input->type == pe_order_anti_colocation) {
if (input->action->node && action->node
&& (input->action->node->details != action->node->details)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"anti-colocation node mismatch %s vs %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
action->node->details->uname,
input->action->node->details->uname);
input->type = pe_order_none;
return false;
} else if (is_set(input->action->flags, pe_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"anti-colocation input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = pe_order_none;
return false;
}
} else if (input->action->rsc
&& input->action->rsc != action->rsc
&& is_set(input->action->rsc->flags, pe_rsc_failed)
&& is_not_set(input->action->rsc->flags, pe_rsc_managed)
&& crm_ends_with(input->action->uuid, "_stop_0")
&& action->rsc && pe_rsc_is_clone(action->rsc)) {
crm_warn("Ignoring requirement that %s complete before %s:"
" unmanaged failed resources cannot prevent clone shutdown",
input->action->uuid, action->uuid);
return false;
} else if (is_set(input->action->flags, pe_action_optional)
&& is_not_set(input->action->flags, pe_action_print_always)
&& is_not_set(input->action->flags, pe_action_dumped)
&& !should_dump_action(input->action)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
}
crm_trace("%s (%d) input %s (%d) @ %s should be dumped: %s, %s, %s, 0x%.6x",
action->uuid, action->id,
input->action->uuid, input->action->id,
input->action->node? input->action->node->details->uname : "no node",
is_set(input->action->flags, pe_action_pseudo)? "pseudo" : "real",
is_set(input->action->flags, pe_action_runnable)? "runnable" : "unrunnable",
is_set(input->action->flags, pe_action_optional)? "optional" : "required",
input->type);
return true;
}
static bool
graph_has_loop(pe_action_t *init_action, pe_action_t *action,
pe_action_wrapper_t *input)
{
- GListPtr lpc = NULL;
bool has_loop = false;
if (is_set(input->action->flags, pe_action_tracking)) {
crm_trace("Breaking tracking loop: %s@%s -> %s@%s (0x%.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
return false;
}
// Don't need to check inputs that won't be used
- if (!check_dump_input(-1, action, input)) {
+ if (!check_dump_input(action, input)) {
return false;
}
if (input->action == init_action) {
crm_debug("Input loop found in %s@%s ->...-> %s@%s",
action->uuid,
action->node? action->node->details->uname : "",
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
return true;
}
set_bit(input->action->flags, pe_action_tracking);
crm_trace("Checking inputs of action %s@%s input %s@%s (0x%.6x)"
"for graph loop with %s@%s ",
action->uuid,
action->node? action->node->details->uname : "",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
input->type,
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
// Recursively check input itself for loops
- for (lpc = input->action->actions_before; lpc != NULL; lpc = lpc->next) {
+ for (GList *iter = input->action->actions_before;
+ iter != NULL; iter = iter->next) {
+
if (graph_has_loop(init_action, input->action,
- (pe_action_wrapper_t *) lpc->data)) {
+ (pe_action_wrapper_t *) iter->data)) {
// Recursive call already logged a debug message
has_loop = true;
goto done;
}
}
done:
pe_clear_action_bit(input->action, pe_action_tracking);
if (!has_loop) {
crm_trace("No input loop found in %s@%s -> %s@%s (0x%.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
}
return has_loop;
}
bool
pcmk__ordering_is_invalid(pe_action_t *action, pe_action_wrapper_t *input)
{
/* Prevent user-defined ordering constraints between resources
* running in a guest node and the resource that defines that node.
*/
if (is_not_set(input->type, pe_order_preserve)
&& action->rsc && action->rsc->fillers
&& input->action->rsc && input->action->node
&& input->action->node->details->remote_rsc
&& (input->action->node->details->remote_rsc->container == action->rsc)) {
crm_warn("Invalid ordering constraint between %s and %s",
input->action->rsc->id, action->rsc->id);
return true;
}
/* If there's an order like
* "rscB_stop node2"-> "load_stopped_node2" -> "rscA_migrate_to node1"
*
* then rscA is being migrated from node1 to node2, while rscB is being
* migrated from node2 to node1. If there would be a graph loop,
* break the order "load_stopped_node2" -> "rscA_migrate_to node1".
*/
if ((input->type == pe_order_load) && action->rsc
&& safe_str_eq(action->task, RSC_MIGRATE)
&& graph_has_loop(action, action, input)) {
return true;
}
return false;
}
-static bool
-should_dump_input(int last_action, pe_action_t *action,
- pe_action_wrapper_t *input)
+// Remove duplicate inputs (regardless of flags)
+static void
+deduplicate_inputs(pe_action_t *action)
{
- input->state = pe_link_not_dumped;
+ GList *item = NULL;
+ GList *next = NULL;
+ pe_action_wrapper_t *last_input = NULL;
+
+ action->actions_before = g_list_sort(action->actions_before,
+ sort_action_id);
+ for (item = action->actions_before; item != NULL; item = next) {
+ pe_action_wrapper_t *input = (pe_action_wrapper_t *) item->data;
+
+ next = item->next;
+ if (last_input && (input->action->id == last_input->action->id)) {
+ crm_trace("Input %s (%d) duplicate skipped for action %s (%d)",
+ input->action->uuid, input->action->id,
+ action->uuid, action->id);
- if (!check_dump_input(last_action, action, input)) {
- return false;
+ /* For the purposes of scheduling, the ordering flags no longer
+ * matter, but crm_simulate looks at certain ones when creating a
+ * dot graph. Combining the flags is sufficient for that purpose.
+ */
+ last_input->type |= input->type;
+ if (input->state == pe_link_dumped) {
+ last_input->state = pe_link_dumped;
+ }
+
+ free(item->data);
+ action->actions_before = g_list_delete_link(action->actions_before,
+ item);
+ } else {
+ last_input = input;
+ input->state = pe_link_not_dumped;
+ }
}
- return true;
}
+/*!
+ * \internal
+ * \brief Add an action to the transition graph XML if appropriate
+ *
+ * \param[in] action Action to possibly add
+ * \param[in] data_set Cluster working set
+ *
+ * \note This will de-duplicate the action inputs, meaning that the
+ * pe_action_wrapper_t:type flags can no longer be relied on to retain
+ * their original settings. That means this MUST be called after stage7()
+ * is complete, and nothing after this should rely on those type flags.
+ * (For example, some code looks for type equal to some flag rather than
+ * whether the flag is set, and some code looks for particular
+ * combinations of flags -- such code must be done before stage8().)
+ */
void
-graph_element_from_action(action_t * action, pe_working_set_t * data_set)
+graph_element_from_action(pe_action_t *action, pe_working_set_t *data_set)
{
- GListPtr lpc = NULL;
- int last_action = -1;
+ GList *lpc = NULL;
int synapse_priority = 0;
xmlNode *syn = NULL;
xmlNode *set = NULL;
xmlNode *in = NULL;
- xmlNode *input = NULL;
xmlNode *xml_action = NULL;
+ pe_action_wrapper_t *input = NULL;
+
+ /* If we haven't already, de-duplicate inputs -- even if we won't be dumping
+ * the action, so that crm_simulate dot graphs don't have duplicates.
+ */
+ if (is_not_set(action->flags, pe_action_dedup)) {
+ deduplicate_inputs(action);
+ set_bit(action->flags, pe_action_dedup);
+ }
if (should_dump_action(action) == FALSE) {
return;
}
set_bit(action->flags, pe_action_dumped);
syn = create_xml_node(data_set->graph, "synapse");
set = create_xml_node(syn, "action_set");
in = create_xml_node(syn, "inputs");
crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse);
data_set->num_synapse++;
if (action->rsc != NULL) {
synapse_priority = action->rsc->priority;
}
if (action->priority > synapse_priority) {
synapse_priority = action->priority;
}
if (synapse_priority > 0) {
crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority);
}
xml_action = action2xml(action, FALSE, data_set);
add_node_nocopy(set, crm_element_name(xml_action), xml_action);
- action->actions_before = g_list_sort(action->actions_before, sort_action_id);
-
for (lpc = action->actions_before; lpc != NULL; lpc = lpc->next) {
- action_wrapper_t *wrapper = (action_wrapper_t *) lpc->data;
+ input = (pe_action_wrapper_t *) lpc->data;
+ if (check_dump_input(action, input)) {
+ xmlNode *input_xml = create_xml_node(in, "trigger");
- if (should_dump_input(last_action, action, wrapper) == FALSE) {
- continue;
+ input->state = pe_link_dumped;
+ xml_action = action2xml(input->action, TRUE, data_set);
+ add_node_nocopy(input_xml, crm_element_name(xml_action), xml_action);
}
-
- wrapper->state = pe_link_dumped;
- CRM_CHECK(last_action < wrapper->action->id,;
- );
- last_action = wrapper->action->id;
- input = create_xml_node(in, "trigger");
-
- xml_action = action2xml(wrapper->action, TRUE, data_set);
- add_node_nocopy(input, crm_element_name(xml_action), xml_action);
}
}
diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c
index 243fa65450..acb5029d36 100644
--- a/tools/crm_simulate.c
+++ b/tools/crm_simulate.c
@@ -1,958 +1,956 @@
/*
* Copyright 2009-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
#include <sys/stat.h>
#include <sys/param.h>
#include <sys/types.h>
#include <dirent.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/common/util.h>
#include <crm/common/iso8601.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
cib_t *global_cib = NULL;
GListPtr op_fail = NULL;
bool action_numbers = FALSE;
gboolean quiet = FALSE;
gboolean print_pending = TRUE;
char *temp_shadow = NULL;
extern gboolean bringing_nodes_online;
#define quiet_log(fmt, args...) do { \
if(quiet == FALSE) { \
printf(fmt , ##args); \
} \
} while(0)
char *use_date = NULL;
static void
get_date(pe_working_set_t *data_set, bool print_original)
{
time_t original_date = 0;
crm_element_value_epoch(data_set->input, "execution-date", &original_date);
if (use_date) {
data_set->now = crm_time_new(use_date);
quiet_log(" + Setting effective cluster time: %s", use_date);
crm_time_log(LOG_NOTICE, "Pretending 'now' is", data_set->now,
crm_time_log_date | crm_time_log_timeofday);
} else if (original_date) {
data_set->now = crm_time_new(NULL);
crm_time_set_timet(data_set->now, &original_date);
if (print_original) {
char *when = crm_time_as_string(data_set->now,
crm_time_log_date|crm_time_log_timeofday);
printf("Using the original execution date of: %s\n", when);
free(when);
}
}
}
static void
print_cluster_status(pe_working_set_t * data_set, long options)
{
char *online_nodes = NULL;
char *online_remote_nodes = NULL;
char *online_guest_nodes = NULL;
char *offline_nodes = NULL;
char *offline_remote_nodes = NULL;
GListPtr gIter = NULL;
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_mode = NULL;
char *node_name = NULL;
if (pe__is_guest_node(node)) {
node_name = crm_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id);
} else {
node_name = crm_strdup_printf("%s", node->details->uname);
}
if (node->details->unclean) {
if (node->details->online && node->details->unclean) {
node_mode = "UNCLEAN (online)";
} else if (node->details->pending) {
node_mode = "UNCLEAN (pending)";
} else {
node_mode = "UNCLEAN (offline)";
}
} else if (node->details->pending) {
node_mode = "pending";
} else if (node->details->standby_onfail && node->details->online) {
node_mode = "standby (on-fail)";
} else if (node->details->standby) {
if (node->details->online) {
node_mode = "standby";
} else {
node_mode = "OFFLINE (standby)";
}
} else if (node->details->maintenance) {
if (node->details->online) {
node_mode = "maintenance";
} else {
node_mode = "OFFLINE (maintenance)";
}
} else if (node->details->online) {
if (pe__is_guest_node(node)) {
online_guest_nodes = add_list_element(online_guest_nodes, node_name);
} else if (pe__is_remote_node(node)) {
online_remote_nodes = add_list_element(online_remote_nodes, node_name);
} else {
online_nodes = add_list_element(online_nodes, node_name);
}
free(node_name);
continue;
} else {
if (pe__is_remote_node(node)) {
offline_remote_nodes = add_list_element(offline_remote_nodes, node_name);
} else if (pe__is_guest_node(node)) {
/* ignore offline container nodes */
} else {
offline_nodes = add_list_element(offline_nodes, node_name);
}
free(node_name);
continue;
}
if (pe__is_guest_node(node)) {
printf("GuestNode %s: %s\n", node_name, node_mode);
} else if (pe__is_remote_node(node)) {
printf("RemoteNode %s: %s\n", node_name, node_mode);
} else if (safe_str_eq(node->details->uname, node->details->id)) {
printf("Node %s: %s\n", node_name, node_mode);
} else {
printf("Node %s (%s): %s\n", node_name, node->details->id, node_mode);
}
free(node_name);
}
if (online_nodes) {
printf("Online: [%s ]\n", online_nodes);
free(online_nodes);
}
if (offline_nodes) {
printf("OFFLINE: [%s ]\n", offline_nodes);
free(offline_nodes);
}
if (online_remote_nodes) {
printf("RemoteOnline: [%s ]\n", online_remote_nodes);
free(online_remote_nodes);
}
if (offline_remote_nodes) {
printf("RemoteOFFLINE: [%s ]\n", offline_remote_nodes);
free(offline_remote_nodes);
}
if (online_guest_nodes) {
printf("GuestOnline: [%s ]\n", online_guest_nodes);
free(online_guest_nodes);
}
fprintf(stdout, "\n");
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (is_set(rsc->flags, pe_rsc_orphan)
&& rsc->role == RSC_ROLE_STOPPED) {
continue;
}
rsc->fns->print(rsc, NULL, pe_print_printf | options, stdout);
}
fprintf(stdout, "\n");
}
static char *
create_action_name(pe_action_t *action)
{
char *action_name = NULL;
const char *prefix = "";
const char *action_host = NULL;
const char *clone_name = NULL;
const char *task = action->task;
if (action->node) {
action_host = action->node->details->uname;
} else if (is_not_set(action->flags, pe_action_pseudo)) {
action_host = "<none>";
}
if (safe_str_eq(action->task, RSC_CANCEL)) {
prefix = "Cancel ";
task = action->cancel_task;
}
if (action->rsc && action->rsc->clone_name) {
clone_name = action->rsc->clone_name;
}
if (clone_name) {
char *key = NULL;
const char *interval_ms_s = NULL;
guint interval_ms = 0;
interval_ms_s = g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
if (safe_str_eq(action->task, RSC_NOTIFY)
|| safe_str_eq(action->task, RSC_NOTIFIED)) {
const char *n_type = g_hash_table_lookup(action->meta, "notify_key_type");
const char *n_task = g_hash_table_lookup(action->meta, "notify_key_operation");
CRM_ASSERT(n_type != NULL);
CRM_ASSERT(n_task != NULL);
key = generate_notify_key(clone_name, n_type, n_task);
} else {
key = generate_op_key(clone_name, task, interval_ms);
}
if (action_host) {
action_name = crm_strdup_printf("%s%s %s", prefix, key, action_host);
} else {
action_name = crm_strdup_printf("%s%s", prefix, key);
}
free(key);
} else if (safe_str_eq(action->task, CRM_OP_FENCE)) {
const char *op = g_hash_table_lookup(action->meta, "stonith_action");
action_name = crm_strdup_printf("%s%s '%s' %s", prefix, action->task, op, action_host);
} else if (action->rsc && action_host) {
action_name = crm_strdup_printf("%s%s %s", prefix, action->uuid, action_host);
} else if (action_host) {
action_name = crm_strdup_printf("%s%s %s", prefix, action->task, action_host);
} else {
action_name = crm_strdup_printf("%s", action->uuid);
}
if (action_numbers) { // i.e. verbose
char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id);
free(action_name);
action_name = with_id;
}
return action_name;
}
static void
create_dotfile(pe_working_set_t * data_set, const char *dot_file, gboolean all_actions)
{
GListPtr gIter = NULL;
FILE *dot_strm = fopen(dot_file, "w");
if (dot_strm == NULL) {
crm_perror(LOG_ERR, "Could not open %s for writing", dot_file);
return;
}
fprintf(dot_strm, " digraph \"g\" {\n");
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
const char *style = "dashed";
const char *font = "black";
const char *color = "black";
char *action_name = create_action_name(action);
crm_trace("Action %d: %s %s %p", action->id, action_name, action->uuid, action);
if (is_set(action->flags, pe_action_pseudo)) {
font = "orange";
}
if (is_set(action->flags, pe_action_dumped)) {
style = "bold";
color = "green";
} else if (action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) {
color = "red";
font = "purple";
if (all_actions == FALSE) {
goto do_not_write;
}
} else if (is_set(action->flags, pe_action_optional)) {
color = "blue";
if (all_actions == FALSE) {
goto do_not_write;
}
} else {
color = "red";
CRM_CHECK(is_set(action->flags, pe_action_runnable) == FALSE,;
);
}
set_bit(action->flags, pe_action_dumped);
crm_trace("\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]",
action_name, style, color, font);
fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n",
action_name, style, color, font);
do_not_write:
free(action_name);
}
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
GListPtr gIter2 = NULL;
for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) {
action_wrapper_t *before = (action_wrapper_t *) gIter2->data;
char *before_name = NULL;
char *after_name = NULL;
const char *style = "dashed";
gboolean optional = TRUE;
if (before->state == pe_link_dumped) {
optional = FALSE;
style = "bold";
} else if (is_set(action->flags, pe_action_pseudo)
&& (before->type & pe_order_stonith_stop)) {
continue;
- } else if (before->state == pe_link_dup) {
- continue;
} else if (before->type == pe_order_none) {
continue;
} else if (is_set(before->action->flags, pe_action_dumped)
&& is_set(action->flags, pe_action_dumped)
&& before->type != pe_order_load) {
optional = FALSE;
}
if (all_actions || optional == FALSE) {
before_name = create_action_name(before->action);
after_name = create_action_name(action);
crm_trace("\"%s\" -> \"%s\" [ style = %s]",
before_name, after_name, style);
fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n",
before_name, after_name, style);
free(before_name);
free(after_name);
}
}
}
fprintf(dot_strm, "}\n");
fflush(dot_strm);
fclose(dot_strm);
}
static void
setup_input(const char *input, const char *output)
{
int rc = pcmk_ok;
cib_t *cib_conn = NULL;
xmlNode *cib_object = NULL;
char *local_output = NULL;
if (input == NULL) {
/* Use live CIB */
cib_conn = cib_new();
rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command);
if (rc == pcmk_ok) {
rc = cib_conn->cmds->query(cib_conn, NULL, &cib_object, cib_scope_local | cib_sync_call);
}
cib_conn->cmds->signoff(cib_conn);
cib_delete(cib_conn);
cib_conn = NULL;
if (rc != pcmk_ok) {
fprintf(stderr, "Live CIB query failed: %s (%d)\n", pcmk_strerror(rc), rc);
crm_exit(crm_errno2exit(rc));
} else if (cib_object == NULL) {
fprintf(stderr, "Live CIB query failed: empty result\n");
crm_exit(CRM_EX_NOINPUT);
}
} else if (safe_str_eq(input, "-")) {
cib_object = filename2xml(NULL);
} else {
cib_object = filename2xml(input);
}
if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) {
create_xml_node(cib_object, XML_CIB_TAG_STATUS);
}
if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) {
free_xml(cib_object);
crm_exit(CRM_EX_CONFIG);
}
if (validate_xml(cib_object, NULL, FALSE) != TRUE) {
free_xml(cib_object);
crm_exit(CRM_EX_CONFIG);
}
if (output == NULL) {
char *pid = crm_getpid_s();
local_output = get_shadow_file(pid);
temp_shadow = strdup(local_output);
output = local_output;
free(pid);
}
rc = write_xml_file(cib_object, output, FALSE);
free_xml(cib_object);
cib_object = NULL;
if (rc < 0) {
fprintf(stderr, "Could not create '%s': %s\n",
output, pcmk_strerror(rc));
crm_exit(CRM_EX_CANTCREAT);
}
setenv("CIB_file", output, 1);
free(local_output);
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"quiet", 0, 0, 'Q', "\tDisplay only essentialoutput"},
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"-spacer-", 0, 0, '-', "\nOperations:"},
{"run", 0, 0, 'R', "\tDetermine the cluster's response to the given configuration and status"},
{"simulate", 0, 0, 'S', "Simulate the transition's execution and display the resulting cluster status"},
{"in-place", 0, 0, 'X', "Simulate the transition's execution and store the result back to the input file"},
{"show-scores", 0, 0, 's', "Show allocation scores"},
{"show-utilization", 0, 0, 'U', "Show utilization information"},
{"profile", 1, 0, 'P', "Run all tests in the named directory to create profiling data"},
{"repeat", 1, 0, 'N', "With --profile, repeat each test N times and print timings"},
{"pending", 0, 0, 'j', "\tDisplay pending state if 'record-pending' is enabled", pcmk_option_hidden},
{"-spacer-", 0, 0, '-', "\nSynthetic Cluster Events:"},
{"node-up", 1, 0, 'u', "\tBring a node online"},
{"node-down", 1, 0, 'd', "\tTake a node offline"},
{"node-fail", 1, 0, 'f', "\tMark a node as failed"},
{"op-inject", 1, 0, 'i', "\tGenerate a failure for the cluster to react to in the simulation"},
{"-spacer-", 0, 0, '-', "\t\tValue is of the form ${resource}_${task}_${interval_in_ms}@${node}=${rc}."},
{"-spacer-", 0, 0, '-', "\t\tEg. memcached_monitor_20000@bart.example.com=7"},
{"-spacer-", 0, 0, '-', "\t\tFor more information on OCF return codes, refer to: https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/2.0/html/Pacemaker_Administration/s-ocf-return-codes.html"},
{"op-fail", 1, 0, 'F', "\tIf the specified task occurs during the simulation, have it fail with return code ${rc}"},
{"-spacer-", 0, 0, '-', "\t\tValue is of the form ${resource}_${task}_${interval_in_ms}@${node}=${rc}."},
{"-spacer-", 0, 0, '-', "\t\tEg. memcached_stop_0@bart.example.com=1\n"},
{"-spacer-", 0, 0, '-', "\t\tThe transition will normally stop at the failed action. Save the result with --save-output and re-run with --xml-file"},
{ "set-datetime", required_argument, NULL, 't',
"Set date/time (ISO 8601 format, see https://en.wikipedia.org/wiki/ISO_8601)"
},
{"quorum", 1, 0, 'q', "\tSpecify a value for quorum"},
{"watchdog", 1, 0, 'w', "\tAssume a watchdog device is active"},
{"ticket-grant", 1, 0, 'g', "Grant a ticket"},
{"ticket-revoke", 1, 0, 'r', "Revoke a ticket"},
{"ticket-standby", 1, 0, 'b', "Make a ticket standby"},
{"ticket-activate", 1, 0, 'e', "Activate a ticket"},
{"-spacer-", 0, 0, '-', "\nOutput Options:"},
{"save-input", 1, 0, 'I', "\tSave the input configuration to the named file"},
{"save-output", 1, 0, 'O', "Save the output configuration to the named file"},
{"save-graph", 1, 0, 'G', "\tSave the transition graph (XML format) to the named file"},
{"save-dotfile", 1, 0, 'D', "Save the transition graph (DOT format) to the named file"},
{"all-actions", 0, 0, 'a', "\tDisplay all possible actions in the DOT graph - even ones not part of the transition"},
{"-spacer-", 0, 0, '-', "\nData Source:"},
{"live-check", 0, 0, 'L', "\tConnect to the CIB mamager and use the current CIB contents as input"},
{"xml-file", 1, 0, 'x', "\tRetrieve XML from the named file"},
{"xml-pipe", 0, 0, 'p', "\tRetrieve XML from stdin"},
{"-spacer-", 0, 0, '-', "\nExamples:\n"},
{"-spacer-", 0, 0, '-', "Pretend a recurring monitor action found memcached stopped on node fred.example.com and, during recovery, that the memcached stop action failed", pcmk_option_paragraph},
{"-spacer-", 0, 0, '-', " crm_simulate -LS --op-inject memcached:0_monitor_20000@bart.example.com=7 --op-fail memcached:0_stop_0@fred.example.com=1 --save-output /tmp/memcached-test.xml", pcmk_option_example},
{"-spacer-", 0, 0, '-', "Now see what the reaction to the stop failure would be", pcmk_option_paragraph},
{"-spacer-", 0, 0, '-', " crm_simulate -S --xml-file /tmp/memcached-test.xml", pcmk_option_example},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static void
profile_one(const char *xml_file, long long repeat, pe_working_set_t *data_set)
{
xmlNode *cib_object = NULL;
clock_t start = 0;
printf("* Testing %s ...", xml_file);
fflush(stdout);
cib_object = filename2xml(xml_file);
start = clock();
if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) {
create_xml_node(cib_object, XML_CIB_TAG_STATUS);
}
if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) {
free_xml(cib_object);
return;
}
if (validate_xml(cib_object, NULL, FALSE) != TRUE) {
free_xml(cib_object);
return;
}
for (int i = 0; i < repeat; ++i) {
xmlNode *input = (repeat == 1)? cib_object : copy_xml(cib_object);
data_set->input = input;
get_date(data_set, false);
pcmk__schedule_actions(data_set, input, NULL);
pe_reset_working_set(data_set);
}
printf(" %.2f secs\n", (clock() - start) / (float) CLOCKS_PER_SEC);
}
#ifndef FILENAME_MAX
# define FILENAME_MAX 512
#endif
static void
profile_all(const char *dir, long long repeat, pe_working_set_t *data_set)
{
struct dirent **namelist;
int file_num = scandir(dir, &namelist, 0, alphasort);
if (file_num > 0) {
struct stat prop;
char buffer[FILENAME_MAX];
while (file_num--) {
if ('.' == namelist[file_num]->d_name[0]) {
free(namelist[file_num]);
continue;
} else if (!crm_ends_with_ext(namelist[file_num]->d_name, ".xml")) {
free(namelist[file_num]);
continue;
}
snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name);
if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) {
profile_one(buffer, repeat, data_set);
}
free(namelist[file_num]);
}
free(namelist);
}
}
static int
count_resources(pe_working_set_t * data_set, resource_t * rsc)
{
int count = 0;
GListPtr gIter = NULL;
if (rsc == NULL) {
gIter = data_set->resources;
} else if (rsc->children) {
gIter = rsc->children;
} else {
return is_not_set(rsc->flags, pe_rsc_orphan);
}
for (; gIter != NULL; gIter = gIter->next) {
count += count_resources(data_set, gIter->data);
}
return count;
}
int
main(int argc, char **argv)
{
int rc = pcmk_ok;
guint modified = 0;
gboolean store = FALSE;
gboolean process = FALSE;
gboolean simulate = FALSE;
gboolean all_actions = FALSE;
gboolean have_stdout = FALSE;
pe_working_set_t *data_set = NULL;
const char *xml_file = "-";
const char *quorum = NULL;
const char *watchdog = NULL;
const char *test_dir = NULL;
const char *dot_file = NULL;
const char *graph_file = NULL;
const char *input_file = NULL;
const char *output_file = NULL;
const char *repeat_s = NULL;
int flag = 0;
int index = 0;
int argerr = 0;
long long repeat = 1;
GListPtr node_up = NULL;
GListPtr node_down = NULL;
GListPtr node_fail = NULL;
GListPtr op_inject = NULL;
GListPtr ticket_grant = NULL;
GListPtr ticket_revoke = NULL;
GListPtr ticket_standby = NULL;
GListPtr ticket_activate = NULL;
xmlNode *input = NULL;
crm_log_cli_init("crm_simulate");
crm_set_options(NULL, "datasource operation [additional options]",
long_options, "Tool for simulating the cluster's response to events");
if (argc < 2) {
crm_help('?', CRM_EX_USAGE);
}
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1)
break;
switch (flag) {
case 'V':
if (have_stdout == FALSE) {
/* Redirect stderr to stdout so we can grep the output */
have_stdout = TRUE;
close(STDERR_FILENO);
dup2(STDOUT_FILENO, STDERR_FILENO);
}
crm_bump_log_level(argc, argv);
action_numbers = TRUE;
break;
case '?':
case '$':
crm_help(flag, CRM_EX_OK);
break;
case 'p':
xml_file = "-";
break;
case 'Q':
quiet = TRUE;
break;
case 'L':
xml_file = NULL;
break;
case 'x':
xml_file = optarg;
break;
case 'u':
modified++;
bringing_nodes_online = TRUE;
node_up = g_list_append(node_up, optarg);
break;
case 'd':
modified++;
node_down = g_list_append(node_down, optarg);
break;
case 'f':
modified++;
node_fail = g_list_append(node_fail, optarg);
break;
case 't':
use_date = strdup(optarg);
break;
case 'i':
modified++;
op_inject = g_list_append(op_inject, optarg);
break;
case 'F':
process = TRUE;
simulate = TRUE;
op_fail = g_list_append(op_fail, optarg);
break;
case 'w':
modified++;
watchdog = optarg;
break;
case 'q':
modified++;
quorum = optarg;
break;
case 'g':
modified++;
ticket_grant = g_list_append(ticket_grant, optarg);
break;
case 'r':
modified++;
ticket_revoke = g_list_append(ticket_revoke, optarg);
break;
case 'b':
modified++;
ticket_standby = g_list_append(ticket_standby, optarg);
break;
case 'e':
modified++;
ticket_activate = g_list_append(ticket_activate, optarg);
break;
case 'a':
all_actions = TRUE;
break;
case 's':
process = TRUE;
show_scores = TRUE;
break;
case 'U':
process = TRUE;
show_utilization = TRUE;
break;
case 'j':
print_pending = TRUE;
break;
case 'S':
process = TRUE;
simulate = TRUE;
break;
case 'X':
store = TRUE;
process = TRUE;
simulate = TRUE;
break;
case 'R':
process = TRUE;
break;
case 'D':
process = TRUE;
dot_file = optarg;
break;
case 'G':
process = TRUE;
graph_file = optarg;
break;
case 'I':
input_file = optarg;
break;
case 'O':
output_file = optarg;
break;
case 'P':
test_dir = optarg;
break;
case 'N':
repeat_s = optarg;
break;
default:
++argerr;
break;
}
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
data_set = pe_new_working_set();
if (data_set == NULL) {
crm_perror(LOG_ERR, "Could not allocate working set");
rc = -ENOMEM;
goto done;
}
if (test_dir != NULL) {
if (repeat_s != NULL) {
repeat = crm_int_helper(repeat_s, NULL);
if (errno || (repeat < 1)) {
fprintf(stderr, "--repeat must be positive integer, not '%s' -- using 1",
repeat_s);
repeat = 1;
}
}
profile_all(test_dir, repeat, data_set);
return CRM_EX_OK;
}
setup_input(xml_file, store ? xml_file : output_file);
global_cib = cib_new();
rc = global_cib->cmds->signon(global_cib, crm_system_name, cib_command);
if (rc != pcmk_ok) {
fprintf(stderr, "Could not connect to the CIB manager: %s\n",
pcmk_strerror(rc));
goto done;
}
rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call | cib_scope_local);
if (rc != pcmk_ok) {
fprintf(stderr, "Could not get local CIB: %s\n", pcmk_strerror(rc));
goto done;
}
data_set->input = input;
get_date(data_set, true);
if(xml_file) {
set_bit(data_set->flags, pe_flag_sanitized);
}
set_bit(data_set->flags, pe_flag_stdout);
cluster_status(data_set);
if (quiet == FALSE) {
int options = print_pending ? pe_print_pending : 0;
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
quiet_log("\n *** Resource management is DISABLED ***");
quiet_log("\n The cluster will not attempt to start, stop or recover services");
quiet_log("\n");
}
if (data_set->disabled_resources || data_set->blocked_resources) {
quiet_log("%d of %d resources DISABLED and %d BLOCKED from being started due to failures\n",
data_set->disabled_resources,
count_resources(data_set, NULL),
data_set->blocked_resources);
}
quiet_log("\nCurrent cluster status:\n");
print_cluster_status(data_set, options);
}
if (modified) {
quiet_log("Performing requested modifications\n");
modify_configuration(data_set, global_cib, quorum, watchdog, node_up, node_down, node_fail, op_inject,
ticket_grant, ticket_revoke, ticket_standby, ticket_activate);
rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call);
if (rc != pcmk_ok) {
fprintf(stderr, "Could not get modified CIB: %s\n", pcmk_strerror(rc));
goto done;
}
cleanup_calculations(data_set);
data_set->input = input;
get_date(data_set, true);
if(xml_file) {
set_bit(data_set->flags, pe_flag_sanitized);
}
set_bit(data_set->flags, pe_flag_stdout);
cluster_status(data_set);
}
if (input_file != NULL) {
rc = write_xml_file(input, input_file, FALSE);
if (rc < 0) {
fprintf(stderr, "Could not create '%s': %s\n",
input_file, pcmk_strerror(rc));
goto done;
}
}
if (process || simulate) {
crm_time_t *local_date = NULL;
if (show_scores && show_utilization) {
printf("Allocation scores and utilization information:\n");
} else if (show_scores) {
fprintf(stdout, "Allocation scores:\n");
} else if (show_utilization) {
printf("Utilization information:\n");
}
pcmk__schedule_actions(data_set, input, local_date);
input = NULL; /* Don't try and free it twice */
if (graph_file != NULL) {
write_xml_file(data_set->graph, graph_file, FALSE);
}
if (dot_file != NULL) {
create_dotfile(data_set, dot_file, all_actions);
}
if (quiet == FALSE) {
GListPtr gIter = NULL;
quiet_log("%sTransition Summary:\n", show_scores || show_utilization
|| modified ? "\n" : "");
fflush(stdout);
LogNodeActions(data_set, TRUE);
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
LogActions(rsc, data_set, TRUE);
}
}
}
rc = pcmk_ok;
if (simulate) {
if (run_simulation(data_set, global_cib, op_fail, quiet) != pcmk_ok) {
rc = pcmk_err_generic;
}
if(quiet == FALSE) {
get_date(data_set, true);
quiet_log("\nRevised cluster status:\n");
set_bit(data_set->flags, pe_flag_stdout);
cluster_status(data_set);
print_cluster_status(data_set, 0);
}
}
done:
pe_free_working_set(data_set);
global_cib->cmds->signoff(global_cib);
cib_delete(global_cib);
free(use_date);
fflush(stderr);
if (temp_shadow) {
unlink(temp_shadow);
free(temp_shadow);
}
crm_exit(crm_errno2exit(rc));
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 4:05 PM (1 d, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002244
Default Alt Text
(222 KB)

Event Timeline