Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
index 890c3a1ea0..716b55c20d 100644
--- a/include/crm/pengine/status.h
+++ b/include/crm/pengine/status.h
@@ -1,297 +1,299 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef PENGINE_STATUS__H
#define PENGINE_STATUS__H
#include <glib.h>
#include <crm/common/iso8601.h>
#include <crm/pengine/common.h>
typedef struct node_s node_t;
typedef struct action_s action_t;
typedef struct action_s pe_action_t;
typedef struct resource_s resource_t;
typedef enum no_quorum_policy_e {
no_quorum_freeze,
no_quorum_stop,
no_quorum_ignore,
no_quorum_suicide
} no_quorum_policy_t;
enum node_type {
node_ping,
node_member
};
enum pe_restart {
pe_restart_restart,
pe_restart_ignore
};
enum pe_find {
pe_find_renamed = 0x001,
pe_find_partial = 0x002,
pe_find_clone = 0x004,
pe_find_current = 0x008,
pe_find_inactive = 0x010,
};
#define pe_flag_have_quorum 0x00000001ULL
#define pe_flag_symmetric_cluster 0x00000002ULL
#define pe_flag_is_managed_default 0x00000004ULL
#define pe_flag_maintenance_mode 0x00000008ULL
#define pe_flag_stonith_enabled 0x00000010ULL
#define pe_flag_have_stonith_resource 0x00000020ULL
#define pe_flag_stop_rsc_orphans 0x00000100ULL
#define pe_flag_stop_action_orphans 0x00000200ULL
#define pe_flag_stop_everything 0x00000400ULL
#define pe_flag_start_failure_fatal 0x00001000ULL
#define pe_flag_remove_after_stop 0x00002000ULL
#define pe_flag_startup_probes 0x00010000ULL
#define pe_flag_have_status 0x00020000ULL
typedef struct pe_working_set_s
{
xmlNode *input;
ha_time_t *now;
/* options extracted from the input */
char *dc_uuid;
node_t *dc_node;
const char *stonith_action;
const char *placement_strategy;
unsigned long long flags;
int stonith_timeout;
int default_resource_stickiness;
no_quorum_policy_t no_quorum_policy;
GHashTable *config_hash;
GHashTable *domains;
GListPtr nodes;
GListPtr resources;
GListPtr placement_constraints;
GListPtr ordering_constraints;
GListPtr colocation_constraints;
GListPtr actions;
xmlNode *failed;
xmlNode *op_defaults;
xmlNode *rsc_defaults;
/* stats */
int num_synapse;
int max_valid_nodes;
int order_id;
int action_id;
/* final output */
xmlNode *graph;
} pe_working_set_t;
struct node_shared_s {
const char *id;
const char *uname;
gboolean online;
gboolean standby;
gboolean standby_onfail;
gboolean pending;
gboolean unclean;
gboolean shutdown;
gboolean expected_up;
gboolean is_dc;
int num_resources;
GListPtr running_rsc; /* resource_t* */
GListPtr allocated_rsc; /* resource_t* */
GHashTable *attrs; /* char* => char* */
enum node_type type;
GHashTable *utilization;
};
struct node_s {
int weight;
gboolean fixed;
int count;
struct node_shared_s *details;
};
#include <crm/pengine/complex.h>
#define pe_rsc_orphan 0x00000001ULL
#define pe_rsc_managed 0x00000002ULL
#define pe_rsc_notify 0x00000010ULL
#define pe_rsc_unique 0x00000020ULL
#define pe_rsc_can_migrate 0x00000040ULL
#define pe_rsc_provisional 0x00000100ULL
#define pe_rsc_allocating 0x00000200ULL
#define pe_rsc_merging 0x00000400ULL
#define pe_rsc_failed 0x00010000ULL
#define pe_rsc_shutdown 0x00020000ULL
#define pe_rsc_runnable 0x00040000ULL
#define pe_rsc_start_pending 0x00080000ULL
#define pe_rsc_starting 0x00100000ULL
#define pe_rsc_stopping 0x00200000ULL
+#define pe_rsc_failure_ignored 0x01000000ULL
+
enum pe_graph_flags
{
pe_graph_none = 0x00000,
pe_graph_updated_first = 0x00001,
pe_graph_updated_then = 0x00002,
pe_graph_disable = 0x00004,
};
enum pe_action_flags
{
pe_action_pseudo = 0x00001,
pe_action_runnable = 0x00002,
pe_action_optional = 0x00004,
pe_action_print_always = 0x00008,
pe_action_have_node_attrs = 0x00010,
pe_action_failure_is_fatal = 0x00020,
pe_action_implied_by_stonith = 0x00040,
pe_action_allow_reload_conversion = 0x00080,
pe_action_dumped = 0x00100,
pe_action_processed = 0x00200,
pe_action_clear = 0x00400,
pe_action_dangle = 0x00800,
};
struct resource_s {
char *id;
char *clone_name;
char *long_name;
xmlNode *xml;
xmlNode *ops_xml;
resource_t *parent;
void *variant_opaque;
enum pe_obj_types variant;
resource_object_functions_t *fns;
resource_alloc_functions_t *cmds;
enum rsc_recovery_type recovery_type;
enum pe_restart restart_type;
int priority;
int stickiness;
int sort_index;
int failure_timeout;
int effective_priority;
int migration_threshold;
unsigned long long flags;
GListPtr rsc_cons_lhs; /* rsc_colocation_t* */
GListPtr rsc_cons; /* rsc_colocation_t* */
GListPtr rsc_location; /* rsc_to_node_t* */
GListPtr actions; /* action_t* */
node_t *allocated_to;
GListPtr running_on; /* node_t* */
GHashTable *known_on; /* node_t* */
GHashTable *allowed_nodes; /* node_t* */
enum rsc_role_e role;
enum rsc_role_e next_role;
GHashTable *meta;
GHashTable *parameters;
GHashTable *utilization;
GListPtr children; /* resource_t* */
GListPtr dangling_migrations; /* node_t* */
};
struct action_s
{
int id;
int priority;
resource_t *rsc;
node_t *node;
xmlNode *op_entry;
char *task;
char *uuid;
enum pe_action_flags flags;
enum rsc_start_requirement needs;
enum action_fail_response on_fail;
enum rsc_role_e fail_role;
action_t *pre_notify;
action_t *pre_notified;
action_t *post_notify;
action_t *post_notified;
int seen_count;
GHashTable *meta;
GHashTable *extra;
GListPtr actions_before; /* action_warpper_t* */
GListPtr actions_after; /* action_warpper_t* */
};
typedef struct notify_data_s {
GHashTable *keys;
const char *action;
action_t *pre;
action_t *post;
action_t *pre_done;
action_t *post_done;
GListPtr active; /* notify_entry_t* */
GListPtr inactive; /* notify_entry_t* */
GListPtr start; /* notify_entry_t* */
GListPtr stop; /* notify_entry_t* */
GListPtr demote; /* notify_entry_t* */
GListPtr promote; /* notify_entry_t* */
GListPtr master; /* notify_entry_t* */
GListPtr slave; /* notify_entry_t* */
} notify_data_t;
gboolean cluster_status(pe_working_set_t *data_set);
extern void set_working_set_defaults(pe_working_set_t *data_set);
extern void cleanup_calculations(pe_working_set_t *data_set);
extern resource_t *pe_find_resource(GListPtr rsc_list, const char *id_rh);
extern node_t *pe_find_node(GListPtr node_list, const char *uname);
extern node_t *pe_find_node_id(GListPtr node_list, const char *id);
extern GListPtr find_operations(
const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set);
#endif
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
index 46024d0e1e..661f97ced0 100644
--- a/lib/pengine/native.c
+++ b/lib/pengine/native.c
@@ -1,497 +1,502 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/pengine/status.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/complex.h>
#include <unpack.h>
#include <utils.h>
#include <crm/msg_xml.h>
#define VARIANT_NATIVE 1
#include "./variant.h"
void
native_add_running(resource_t *rsc, node_t *node, pe_working_set_t *data_set)
{
GListPtr gIter = rsc->running_on;
CRM_CHECK(node != NULL, return);
for(; gIter != NULL; gIter = gIter->next) {
node_t *a_node = (node_t*)gIter->data;
CRM_CHECK(a_node != NULL, return);
if(safe_str_eq(a_node->details->id, node->details->id)) {
return;
}
}
crm_debug_3("Adding %s to %s", rsc->id, node->details->uname);
rsc->running_on = g_list_append(rsc->running_on, node);
if(rsc->variant == pe_native) {
node->details->running_rsc = g_list_append(
node->details->running_rsc, rsc);
}
if(is_not_set(rsc->flags, pe_rsc_managed)) {
crm_info("resource %s isnt managed", rsc->id);
resource_location(rsc, node, INFINITY,
"not_managed_default", data_set);
return;
}
if(rsc->variant == pe_native && g_list_length(rsc->running_on) > 1) {
switch(rsc->recovery_type) {
case recovery_stop_only:
{
GHashTableIter gIter;
node_t *local_node = NULL;
/* make sure it doesnt come up again */
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
g_hash_table_iter_init (&gIter, rsc->allowed_nodes);
while (g_hash_table_iter_next (&gIter, NULL, (void**)&local_node)) {
local_node->weight = -INFINITY;
}
}
break;
case recovery_stop_start:
break;
case recovery_block:
clear_bit(rsc->flags, pe_rsc_managed);
break;
}
crm_debug("%s is active on %d nodes including %s: %s",
rsc->id, g_list_length(rsc->running_on), node->details->uname, recovery2text(rsc->recovery_type));
} else {
crm_trace("Resource %s is active on: %s", rsc->id, node->details->uname);
}
if(rsc->parent != NULL) {
native_add_running(rsc->parent, node, data_set);
}
}
extern void force_non_unique_clone(resource_t *rsc, const char *rid, pe_working_set_t *data_set);
gboolean native_unpack(resource_t *rsc, pe_working_set_t *data_set)
{
native_variant_data_t *native_data = NULL;
crm_debug_3("Processing resource %s...", rsc->id);
crm_malloc0(native_data, sizeof(native_variant_data_t));
if(is_set(rsc->flags, pe_rsc_unique) && rsc->parent) {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if(safe_str_eq(class, "lsb")) {
resource_t *top = uber_parent(rsc);
force_non_unique_clone(top, rsc->id, data_set);
}
}
rsc->variant_opaque = native_data;
return TRUE;
}
resource_t *
native_find_rsc(
resource_t *rsc, const char *id, node_t *on_node, int flags)
{
gboolean match = FALSE;
resource_t *result = NULL;
GListPtr gIter = rsc->children;
if(is_not_set(flags, pe_find_clone) && id == NULL) {
return NULL;
}
if(flags & pe_find_partial) {
if(strstr(rsc->id, id) == rsc->id) {
match = TRUE;
} else if(rsc->long_name && strstr(rsc->long_name, id) == rsc->long_name) {
match = TRUE;
} else if(is_set(flags, pe_find_renamed) && rsc->clone_name && strstr(rsc->clone_name, id) == rsc->clone_name) {
match = TRUE;
}
} else if(flags & pe_find_clone) {
if(rsc->children != NULL) {
match = FALSE;
} else if(id == NULL) {
match = TRUE;
} else if(strstr(rsc->id, id)) {
match = TRUE;
} else if(is_set(flags, pe_find_renamed) && rsc->clone_name && strstr(rsc->clone_name, id) == rsc->clone_name) {
match = TRUE;
}
} else {
if(strcmp(rsc->id, id) == 0){
match = TRUE;
} else if(rsc->long_name && strcmp(rsc->long_name, id) == 0) {
match = TRUE;
} else if(is_set(flags, pe_find_renamed) && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
match = TRUE;
}
}
if(match && on_node) {
if(is_set(flags, pe_find_current) && rsc->running_on) {
GListPtr gIter = rsc->running_on;
for(; gIter != NULL; gIter = gIter->next) {
node_t *loc = (node_t*)gIter->data;
if(loc->details == on_node->details) {
return rsc;
}
}
} else if(is_set(flags, pe_find_inactive) && rsc->running_on == NULL) {
return rsc;
} else if(is_not_set(flags, pe_find_current) && rsc->allocated_to && rsc->allocated_to->details == on_node->details) {
return rsc;
}
} else if(match) {
return rsc;
}
for(; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t*)gIter->data;
result = rsc->fns->find_rsc(child, id, on_node, flags);
if(result) {
return result;
}
}
return NULL;
}
char *
native_parameter(
resource_t *rsc, node_t *node, gboolean create, const char *name,
pe_working_set_t *data_set)
{
char *value_copy = NULL;
const char *value = NULL;
GHashTable *hash = rsc->parameters;
GHashTable *local_hash = NULL;
CRM_CHECK(rsc != NULL, return NULL);
CRM_CHECK(name != NULL && strlen(name) != 0, return NULL);
crm_debug_2("Looking up %s in %s", name, rsc->id);
if(create || g_hash_table_size(rsc->parameters) == 0) {
if(node != NULL) {
crm_debug_2("Creating hash with node %s",
node->details->uname);
} else {
crm_debug_2("Creating default hash");
}
local_hash = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
get_rsc_attributes(local_hash, rsc, node, data_set);
hash = local_hash;
}
value = g_hash_table_lookup(hash, name);
if(value == NULL) {
/* try meta attributes instead */
value = g_hash_table_lookup(rsc->meta, name);
}
if(value != NULL) {
value_copy = crm_strdup(value);
}
if(local_hash != NULL) {
g_hash_table_destroy(local_hash);
}
return value_copy;
}
gboolean native_active(resource_t *rsc, gboolean all)
{
GListPtr gIter = rsc->running_on;
for(; gIter != NULL; gIter = gIter->next) {
node_t *a_node = (node_t*)gIter->data;
if(a_node->details->online == FALSE) {
crm_debug("Resource %s: node %s is offline",
rsc->id, a_node->details->uname);
} else if(a_node->details->unclean) {
crm_debug("Resource %s: node %s is unclean",
rsc->id, a_node->details->uname);
} else {
crm_debug("Resource %s active on %s",
rsc->id, a_node->details->uname);
return TRUE;
}
}
return FALSE;
}
struct print_data_s
{
long options;
void *print_data;
};
static void native_print_attr(gpointer key, gpointer value, gpointer user_data)
{
long options = ((struct print_data_s*)user_data)->options;
void *print_data = ((struct print_data_s*)user_data)->print_data;
status_print("Option: %s = %s\n", (char*)key, (char*)value);
}
void
native_print(
resource_t *rsc, const char *pre_text, long options, void *print_data)
{
node_t *node = NULL;
const char *prov = NULL;
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if(pre_text == NULL && (options & pe_print_printf)) { pre_text = " "; }
if(safe_str_eq(class, "ocf")) {
prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
}
if(rsc->running_on != NULL) {
node = rsc->running_on->data;
}
if(options & pe_print_html) {
if(is_not_set(rsc->flags, pe_rsc_managed)) {
status_print("<font color=\"yellow\">");
} else if(is_set(rsc->flags, pe_rsc_failed)) {
status_print("<font color=\"red\">");
} else if(rsc->variant == pe_native
&& g_list_length(rsc->running_on) == 0) {
status_print("<font color=\"red\">");
} else if(g_list_length(rsc->running_on) > 1) {
status_print("<font color=\"orange\">");
+ } else if(is_set(rsc->flags, pe_rsc_failure_ignored)) {
+ status_print("<font color=\"yellow\">");
+
} else {
status_print("<font color=\"green\">");
}
}
if((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
const char *desc = NULL;
desc = crm_element_value(rsc->xml, XML_ATTR_DESC);
- status_print("%s%s\t(%s%s%s:%s%s) %s %s%s%s%s",
+ status_print("%s%s\t(%s%s%s:%s%s) %s %s%s%s%s%s",
pre_text?pre_text:"", rsc->id,
class, prov?"::":"", prov?prov:"",
crm_element_value(rsc->xml, XML_ATTR_TYPE),
is_set(rsc->flags, pe_rsc_orphan)?" ORPHANED":"",
(rsc->variant!=pe_native)?"":role2text(rsc->role),
is_set(rsc->flags, pe_rsc_managed)?"":" (unmanaged)",
is_set(rsc->flags, pe_rsc_failed)?" FAILED":"",
+ is_set(rsc->flags, pe_rsc_failure_ignored)?" (failure ignored)":"",
desc?": ":"", desc?desc:"");
} else {
- status_print("%s%s\t(%s%s%s:%s):\t%s%s %s%s%s",
+ status_print("%s%s\t(%s%s%s:%s):\t%s%s %s%s%s%s",
pre_text?pre_text:"", rsc->id,
class, prov?"::":"", prov?prov:"",
crm_element_value(rsc->xml, XML_ATTR_TYPE),
is_set(rsc->flags, pe_rsc_orphan)?" ORPHANED ":"",
(rsc->variant!=pe_native)?"":role2text(rsc->role),
(rsc->variant!=pe_native)?"":node!=NULL?node->details->uname:"",
is_set(rsc->flags, pe_rsc_managed)?"":" (unmanaged)",
- is_set(rsc->flags, pe_rsc_failed)?" FAILED":"");
+ is_set(rsc->flags, pe_rsc_failed)?" FAILED":"",
+ is_set(rsc->flags, pe_rsc_failure_ignored)?" (failure ignored)":"");
#if CURSES_ENABLED
if(options & pe_print_ncurses) {
move(-1, 0);
}
#endif
}
if(options & pe_print_html) {
status_print(" </font> ");
}
if((options & pe_print_rsconly)) {
} else if(g_list_length(rsc->running_on) > 1) {
GListPtr gIter = rsc->running_on;
int counter = 0;
if(options & pe_print_html) {
status_print("<ul>\n");
} else if((options & pe_print_printf)
|| (options & pe_print_ncurses)) {
status_print("[");
}
for(; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t*)gIter->data;
counter++;
if(options & pe_print_html) {
status_print("<li>\n%s",
node->details->uname);
} else if((options & pe_print_printf)
|| (options & pe_print_ncurses)) {
status_print("\t%s", node->details->uname);
} else if((options & pe_print_log)) {
status_print("\t%d : %s", counter, node->details->uname);
} else {
status_print("%s", node->details->uname);
}
if(options & pe_print_html) {
status_print("</li>\n");
}
}
if(options & pe_print_html) {
status_print("</ul>\n");
} else if((options & pe_print_printf)
|| (options & pe_print_ncurses)) {
status_print(" ]");
}
}
if(options & pe_print_html) {
status_print("<br/>\n");
} else if(options & pe_print_suppres_nl) {
/* nothing */
} else if((options & pe_print_printf) || (options & pe_print_ncurses)) {
status_print("\n");
}
if(options & pe_print_details) {
struct print_data_s pdata;
pdata.options = options;
pdata.print_data = print_data;
g_hash_table_foreach(rsc->parameters, native_print_attr, &pdata);
}
if(options & pe_print_dev) {
GHashTableIter iter;
node_t *node = NULL;
status_print("%s\t(%s%svariant=%s, priority=%f)", pre_text,
is_set(rsc->flags, pe_rsc_provisional)?"provisional, ":"",
is_set(rsc->flags, pe_rsc_runnable)?"":"non-startable, ",
crm_element_name(rsc->xml),
(double)rsc->priority);
status_print("%s\tAllowed Nodes", pre_text);
g_hash_table_iter_init (&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next (&iter, NULL, (void**)&node)) {
status_print("%s\t * %s %d",
pre_text,
node->details->uname,
node->weight);
}
}
if(options & pe_print_max_details) {
GHashTableIter iter;
node_t *node = NULL;
status_print("%s\t=== Allowed Nodes\n", pre_text);
g_hash_table_iter_init (&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next (&iter, NULL, (void**)&node)) {
print_node("\t", node, FALSE);
}
}
}
void native_free(resource_t *rsc)
{
crm_debug_4("Freeing resource action list (not the data)");
common_free(rsc);
}
enum rsc_role_e
native_resource_state(const resource_t *rsc, gboolean current)
{
enum rsc_role_e role = rsc->next_role;
if(current) {
role = rsc->role;
}
crm_debug_4("%s state: %s", rsc->id, role2text(role));
return role;
}
node_t *native_location(resource_t *rsc, GListPtr *list, gboolean current)
{
node_t *one = NULL;
GListPtr result = NULL;
if(rsc->children) {
GListPtr gIter = rsc->children;
for(; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t*)gIter->data;
child->fns->location(child, &result, current);
}
} else if(current && rsc->running_on) {
result = g_list_copy(rsc->running_on);
} else if(current == FALSE && rsc->allocated_to) {
result = g_list_append(NULL, rsc->allocated_to);
}
if(result && g_list_length(result) == 1) {
one = g_list_nth_data(result, 0);
}
if(list) {
GListPtr gIter = result;
for(; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t*)gIter->data;
if(*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) {
*list = g_list_append(*list, node);
}
}
}
g_list_free(result);
return one;
}
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 2552366222..30d54f697e 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,2126 +1,2132 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <lrm/lrm_api.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/msg.h>
#include <crm/common/util.h>
#include <crm/pengine/status.h>
#include <crm/pengine/rules.h>
#include <utils.h>
#include <unpack.h>
CRM_TRACE_INIT_DATA(pe_status);
#define set_config_flag(data_set, option, flag) do { \
const char *tmp = pe_pref(data_set->config_hash, option); \
if(tmp) { \
if(crm_is_true(tmp)) { \
set_bit_inplace(data_set->flags, flag); \
} else { \
clear_bit_inplace(data_set->flags, flag); \
} \
} \
} while(0)
gboolean unpack_rsc_op(
resource_t *rsc, node_t *node, xmlNode *xml_op, GListPtr next,
enum action_fail_response *failed, pe_working_set_t *data_set);
static void pe_fence_node(pe_working_set_t *data_set, node_t *node, const char *reason)
{
CRM_CHECK(node, return);
if(node->details->unclean == FALSE) {
if(is_set(data_set->flags, pe_flag_stonith_enabled)) {
crm_warn("Node %s will be fenced %s", node->details->uname, reason);
} else {
crm_warn("Node %s is unclean %s", node->details->uname, reason);
}
}
node->details->unclean = TRUE;
}
gboolean
unpack_config(xmlNode *config, pe_working_set_t *data_set)
{
const char *value = NULL;
GHashTable *config_hash = g_hash_table_new_full(
g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str);
data_set->config_hash = config_hash;
unpack_instance_attributes(
data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, data_set->now);
verify_pe_options(data_set->config_hash);
set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
crm_info("Startup probes: %s",
is_set(data_set->flags, pe_flag_startup_probes)?"enabled":"disabled (dangerous)");
value = pe_pref(data_set->config_hash, "stonith-timeout");
data_set->stonith_timeout = crm_get_msec(value);
crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
crm_debug("STONITH of failed nodes is %s",
is_set(data_set->flags, pe_flag_stonith_enabled)?"enabled":"disabled");
data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
crm_debug_2("STONITH will %s nodes", data_set->stonith_action);
set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
crm_debug("Stop all active resources: %s",
is_set(data_set->flags, pe_flag_stop_everything)?"true":"false");
set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
if(is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_debug("Cluster is symmetric"
" - resources can run anywhere by default");
}
value = pe_pref(data_set->config_hash, "default-resource-stickiness");
data_set->default_resource_stickiness = char2score(value);
crm_debug("Default stickiness: %d",
data_set->default_resource_stickiness);
value = pe_pref(data_set->config_hash, "no-quorum-policy");
if(safe_str_eq(value, "ignore")) {
data_set->no_quorum_policy = no_quorum_ignore;
} else if(safe_str_eq(value, "freeze")) {
data_set->no_quorum_policy = no_quorum_freeze;
} else if(safe_str_eq(value, "suicide")) {
gboolean do_panic = FALSE;
crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic);
if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE){
crm_config_err("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false");
}
if(do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) {
data_set->no_quorum_policy = no_quorum_suicide;
} else if(is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) {
crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
data_set->no_quorum_policy = no_quorum_stop;
}
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
crm_debug("On loss of CCM Quorum: Freeze resources");
break;
case no_quorum_stop:
crm_debug("On loss of CCM Quorum: Stop ALL resources");
break;
case no_quorum_suicide:
crm_notice("On loss of CCM Quorum: Fence all remaining nodes");
break;
case no_quorum_ignore:
crm_notice("On loss of CCM Quorum: Ignore");
break;
}
set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
crm_debug_2("Orphan resources are %s",
is_set(data_set->flags, pe_flag_stop_rsc_orphans)?"stopped":"ignored");
set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
crm_debug_2("Orphan resource actions are %s",
is_set(data_set->flags, pe_flag_stop_action_orphans)?"stopped":"ignored");
set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
crm_debug_2("Stopped resources are removed from the status section: %s",
is_set(data_set->flags, pe_flag_remove_after_stop)?"true":"false");
set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
crm_debug_2("Maintenance mode: %s",
is_set(data_set->flags, pe_flag_maintenance_mode)?"true":"false");
if(is_set(data_set->flags, pe_flag_maintenance_mode)) {
clear_bit(data_set->flags, pe_flag_is_managed_default);
} else {
set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default);
}
crm_debug_2("By default resources are %smanaged",
is_set(data_set->flags, pe_flag_is_managed_default)?"":"not ");
set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
crm_debug_2("Start failures are %s",
is_set(data_set->flags, pe_flag_start_failure_fatal)?"always fatal":"handled by failcount");
node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
crm_info("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
pe_pref(data_set->config_hash, "node-health-red"),
pe_pref(data_set->config_hash, "node-health-yellow"),
pe_pref(data_set->config_hash, "node-health-green"));
data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
crm_debug_2("Placement strategy: %s", data_set->placement_strategy);
return TRUE;
}
gboolean
unpack_nodes(xmlNode * xml_nodes, pe_working_set_t *data_set)
{
xmlNode *xml_obj = NULL;
node_t *new_node = NULL;
const char *id = NULL;
const char *uname = NULL;
const char *type = NULL;
const char *score = NULL;
gboolean unseen_are_unclean = TRUE;
const char *blind_faith = pe_pref(
data_set->config_hash, "startup-fencing");
if(crm_is_true(blind_faith) == FALSE) {
unseen_are_unclean = FALSE;
crm_warn("Blind faith: not fencing unseen nodes");
}
for(xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
if(crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
new_node = NULL;
id = crm_element_value(xml_obj, XML_ATTR_ID);
uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
type = crm_element_value(xml_obj, XML_ATTR_TYPE);
score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
crm_debug_3("Processing node %s/%s", uname, id);
if(id == NULL) {
crm_config_err("Must specify id tag in <node>");
continue;
}
if(type == NULL) {
crm_config_err("Must specify type tag in <node>");
continue;
}
if(pe_find_node(data_set->nodes, uname) != NULL) {
crm_config_warn("Detected multiple node entries with uname=%s"
" - this is rarely intended", uname);
}
crm_malloc0(new_node, sizeof(node_t));
if(new_node == NULL) {
return FALSE;
}
new_node->weight = char2score(score);
new_node->fixed = FALSE;
crm_malloc0(new_node->details,
sizeof(struct node_shared_s));
if(new_node->details == NULL) {
crm_free(new_node);
return FALSE;
}
crm_debug_3("Creaing node for entry %s/%s", uname, id);
new_node->details->id = id;
new_node->details->uname = uname;
new_node->details->type = node_ping;
new_node->details->online = FALSE;
new_node->details->shutdown = FALSE;
new_node->details->running_rsc = NULL;
new_node->details->attrs = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
new_node->details->utilization = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
/* if(data_set->have_quorum == FALSE */
/* && data_set->no_quorum_policy == no_quorum_stop) { */
/* /\* start shutting resources down *\/ */
/* new_node->weight = -INFINITY; */
/* } */
if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) {
/* blind faith... */
new_node->details->unclean = FALSE;
} else {
/* all nodes are unclean until we've seen their
* status entry
*/
new_node->details->unclean = TRUE;
}
if(type == NULL
|| safe_str_eq(type, "member")
|| safe_str_eq(type, NORMALNODE)) {
new_node->details->type = node_member;
}
add_node_attrs(xml_obj, new_node, FALSE, data_set);
unpack_instance_attributes(
data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
new_node->details->utilization, NULL, FALSE, data_set->now);
data_set->nodes = g_list_append(data_set->nodes, new_node);
crm_debug_3("Done with node %s",
crm_element_value(xml_obj, XML_ATTR_UNAME));
}
}
return TRUE;
}
static void g_hash_destroy_node_list(gpointer data)
{
GListPtr domain = data;
slist_basic_destroy(domain);
}
gboolean
unpack_domains(xmlNode *xml_domains, pe_working_set_t *data_set)
{
const char *id = NULL;
GListPtr domain = NULL;
xmlNode *xml_node = NULL;
xmlNode *xml_domain = NULL;
crm_info("Unpacking domains");
data_set->domains = g_hash_table_new_full(
g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_node_list);
for(xml_domain = __xml_first_child(xml_domains); xml_domain != NULL; xml_domain = __xml_next(xml_domain)) {
if(crm_str_eq((const char *)xml_domain->name, XML_CIB_TAG_DOMAIN, TRUE)) {
domain = NULL;
id = crm_element_value(xml_domain, XML_ATTR_ID);
for(xml_node = __xml_first_child(xml_domain); xml_node != NULL; xml_node = __xml_next(xml_node)) {
if(crm_str_eq((const char *)xml_node->name, XML_CIB_TAG_NODE, TRUE)) {
node_t *copy = NULL;
node_t *node = NULL;
const char *uname = crm_element_value(xml_node, "name");
const char *score = crm_element_value(xml_node, XML_RULE_ATTR_SCORE);
if(uname == NULL) {
crm_config_err("Invalid domain %s: Must specify id tag in <node>", id);
continue;
}
node = pe_find_node(data_set->nodes, uname);
if(node == NULL) {
node = pe_find_node_id(data_set->nodes, uname);
}
if(node == NULL) {
crm_config_warn("Invalid domain %s: Node %s does not exist", id, uname);
continue;
}
copy = node_copy(node);
copy->weight = char2score(score);
crm_debug("Adding %s to domain %s with score %s", node->details->uname, id, score);
domain = g_list_prepend(domain, copy);
}
}
if(domain) {
crm_debug("Created domain %s with %d members", id, g_list_length(domain));
g_hash_table_replace(data_set->domains, crm_strdup(id), domain);
}
}
}
return TRUE;
}
gboolean
unpack_resources(xmlNode * xml_resources, pe_working_set_t *data_set)
{
xmlNode *xml_obj = NULL;
for(xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
resource_t *new_rsc = NULL;
crm_debug_3("Beginning unpack... <%s id=%s... >",
crm_element_name(xml_obj), ID(xml_obj));
if(common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
data_set->resources = g_list_append(
data_set->resources, new_rsc);
print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE);
} else {
crm_config_err("Failed unpacking %s %s",
crm_element_name(xml_obj),
crm_element_value(xml_obj, XML_ATTR_ID));
if(new_rsc != NULL && new_rsc->fns != NULL) {
new_rsc->fns->free(new_rsc);
}
}
}
data_set->resources = g_list_sort(
data_set->resources, sort_rsc_priority);
if(is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
}
return TRUE;
}
/* remove nodes that are down, stopping */
/* create +ve rsc_to_node constraints between resources and the nodes they are running on */
/* anything else? */
gboolean
unpack_status(xmlNode * status, pe_working_set_t *data_set)
{
const char *id = NULL;
const char *uname = NULL;
xmlNode * lrm_rsc = NULL;
xmlNode * attrs = NULL;
xmlNode * node_state = NULL;
node_t *this_node = NULL;
crm_debug_3("Beginning unpack");
for(node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) {
if(crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
id = crm_element_value(node_state, XML_ATTR_ID);
uname = crm_element_value(node_state, XML_ATTR_UNAME);
attrs = find_xml_node(node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
crm_debug_3("Processing node id=%s, uname=%s", id, uname);
this_node = pe_find_node_id(data_set->nodes, id);
if(uname == NULL) {
/* error */
continue;
} else if(this_node == NULL) {
crm_config_warn("Node %s in status section no longer exists",
uname);
continue;
}
/* Mark the node as provisionally clean
* - at least we have seen it in the current cluster's lifetime
*/
this_node->details->unclean = FALSE;
add_node_attrs(attrs, this_node, TRUE, data_set);
if(crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
crm_info("Node %s is in standby-mode",
this_node->details->uname);
this_node->details->standby = TRUE;
}
crm_debug_3("determining node state");
determine_online_status(node_state, this_node, data_set);
if(this_node->details->online
&& data_set->no_quorum_policy == no_quorum_suicide) {
/* Everything else should flow from this automatically
* At least until the PE becomes able to migrate off healthy resources
*/
pe_fence_node(data_set, this_node, "because the cluster does not have quorum");
}
}
}
/* Now that we know all node states, we can safely handle migration ops
* But, for now, only process healthy nodes
* - this is necessary for the logic in bug lf#2508 to function correctly
*/
for(node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) {
if(crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
continue;
}
id = crm_element_value(node_state, XML_ATTR_ID);
this_node = pe_find_node_id(data_set->nodes, id);
if(this_node == NULL) {
crm_info("Node %s is unknown", id);
continue;
} else if(this_node->details->online) {
crm_trace("Processing lrm resource entries on healthy node: %s", this_node->details->uname);
lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
unpack_lrm_resources(this_node, lrm_rsc, data_set);
}
}
/* Now handle failed nodes - but only if stonith is enabled
*
* By definition, offline nodes run no resources so there is nothing to do.
* Only when stonith is enabled do we need to know what is on the node to
* ensure rsc start events happen after the stonith
*/
for(node_state = __xml_first_child(status);
node_state != NULL && is_set(data_set->flags, pe_flag_stonith_enabled);
node_state = __xml_next(node_state)) {
if(crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
continue;
}
id = crm_element_value(node_state, XML_ATTR_ID);
this_node = pe_find_node_id(data_set->nodes, id);
if(this_node == NULL || this_node->details->online) {
continue;
} else {
crm_trace("Processing lrm resource entries on unhealthy node: %s", this_node->details->uname);
lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
unpack_lrm_resources(this_node, lrm_rsc, data_set);
}
}
return TRUE;
}
static gboolean
determine_online_status_no_fencing(pe_working_set_t *data_set, xmlNode * node_state, node_t *this_node)
{
gboolean online = FALSE;
const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE);
const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE);
const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM);
const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE);
const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE);
if(ha_state == NULL) {
ha_state = DEADSTATUS;
}
if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)){
crm_debug_2("Node is down: ha_state=%s, ccm_state=%s",
crm_str(ha_state), crm_str(ccm_state));
} else if(safe_str_eq(crm_state, ONLINESTATUS)) {
if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) {
online = TRUE;
} else {
crm_debug("Node is not ready to run resources: %s", join_state);
}
} else if(this_node->details->expected_up == FALSE) {
crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s",
crm_str(ha_state), crm_str(ccm_state));
crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s",
crm_str(crm_state), crm_str(join_state),
crm_str(exp_state));
} else {
/* mark it unclean */
pe_fence_node(data_set, this_node, "because it is partially and/or un-expectedly down");
crm_info("\tha_state=%s, ccm_state=%s,"
" crm_state=%s, join_state=%s, expected=%s",
crm_str(ha_state), crm_str(ccm_state),
crm_str(crm_state), crm_str(join_state),
crm_str(exp_state));
}
return online;
}
static gboolean
determine_online_status_fencing(pe_working_set_t *data_set, xmlNode * node_state, node_t *this_node)
{
gboolean online = FALSE;
gboolean do_terminate = FALSE;
const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE);
const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE);
const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM);
const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE);
const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE);
const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate");
if(ha_state == NULL) {
ha_state = DEADSTATUS;
}
if(crm_is_true(terminate)) {
do_terminate = TRUE;
} else if(terminate != NULL && strlen(terminate) > 0) {
/* could be a time() value */
char t = terminate[0];
if(t != '0' && isdigit(t)) {
do_terminate = TRUE;
}
}
if(crm_is_true(ccm_state)
&& safe_str_eq(ha_state, ACTIVESTATUS)
&& safe_str_eq(crm_state, ONLINESTATUS)) {
if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) {
online = TRUE;
if(do_terminate) {
pe_fence_node(data_set, this_node, "because termination was requested");
}
} else if(join_state == exp_state /* == NULL */) {
crm_info("Node %s is coming up", this_node->details->uname);
crm_debug("\tha_state=%s, ccm_state=%s,"
" crm_state=%s, join_state=%s, expected=%s",
crm_str(ha_state), crm_str(ccm_state),
crm_str(crm_state), crm_str(join_state),
crm_str(exp_state));
} else if(safe_str_eq(join_state, CRMD_JOINSTATE_PENDING)) {
crm_info("Node %s is not ready to run resources",
this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
online = TRUE;
} else if(safe_str_eq(join_state, CRMD_JOINSTATE_NACK)) {
crm_warn("Node %s is not part of the cluster",
this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
online = TRUE;
} else if(safe_str_eq(join_state, exp_state)) {
crm_info("Node %s is still coming up: %s",
this_node->details->uname, join_state);
crm_info("\tha_state=%s, ccm_state=%s, crm_state=%s",
crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state));
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
online = TRUE;
} else {
crm_warn("Node %s (%s) is un-expectedly down",
this_node->details->uname, this_node->details->id);
crm_info("\tha_state=%s, ccm_state=%s,"
" crm_state=%s, join_state=%s, expected=%s",
crm_str(ha_state), crm_str(ccm_state),
crm_str(crm_state), crm_str(join_state),
crm_str(exp_state));
pe_fence_node(data_set, this_node, "because it is un-expectedly down");
}
} else if(crm_is_true(ccm_state) == FALSE
&& safe_str_eq(ha_state, DEADSTATUS)
&& safe_str_eq(crm_state, OFFLINESTATUS)
&& this_node->details->expected_up == FALSE) {
crm_debug("Node %s is down: join_state=%s, expected=%s",
this_node->details->uname,
crm_str(join_state), crm_str(exp_state));
#if 0
/* While a nice optimization, it causes the cluster to block until the node
* comes back online. Which is a serious problem if the cluster software
* is not configured to start at boot or stonith is configured to merely
* stop the node instead of restart it.
* Easily triggered by setting terminate=true for the DC
*/
} else if(do_terminate) {
crm_info("Node %s is %s after forced termination",
this_node->details->uname, crm_is_true(ccm_state)?"coming up":"going down");
crm_debug("\tha_state=%s, ccm_state=%s,"
" crm_state=%s, join_state=%s, expected=%s",
crm_str(ha_state), crm_str(ccm_state),
crm_str(crm_state), crm_str(join_state),
crm_str(exp_state));
if(crm_is_true(ccm_state) == FALSE) {
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
online = TRUE;
}
#endif
} else if(this_node->details->expected_up) {
/* mark it unclean */
pe_fence_node(data_set, this_node, "because it is un-expectedly down");
crm_info("\tha_state=%s, ccm_state=%s,"
" crm_state=%s, join_state=%s, expected=%s",
crm_str(ha_state), crm_str(ccm_state),
crm_str(crm_state), crm_str(join_state),
crm_str(exp_state));
} else {
crm_info("Node %s is down", this_node->details->uname);
crm_debug("\tha_state=%s, ccm_state=%s,"
" crm_state=%s, join_state=%s, expected=%s",
crm_str(ha_state), crm_str(ccm_state),
crm_str(crm_state), crm_str(join_state),
crm_str(exp_state));
}
return online;
}
gboolean
determine_online_status(
xmlNode * node_state, node_t *this_node, pe_working_set_t *data_set)
{
gboolean online = FALSE;
const char *shutdown = NULL;
const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE);
if(this_node == NULL) {
crm_config_err("No node to check");
return online;
}
this_node->details->shutdown = FALSE;
this_node->details->expected_up = FALSE;
shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN);
if(shutdown != NULL && safe_str_neq("0", shutdown)) {
this_node->details->shutdown = TRUE;
} else if(safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
this_node->details->expected_up = TRUE;
}
if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
online = determine_online_status_no_fencing(
data_set, node_state, this_node);
} else {
online = determine_online_status_fencing(
data_set, node_state, this_node);
}
if(online) {
this_node->details->online = TRUE;
} else {
/* remove node from contention */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if(online && this_node->details->shutdown) {
/* dont run resources here */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if(this_node->details->unclean) {
pe_proc_warn("Node %s is unclean", this_node->details->uname);
} else if(this_node->details->online) {
crm_info("Node %s is %s", this_node->details->uname,
this_node->details->shutdown?"shutting down":
this_node->details->pending?"pending":
this_node->details->standby?"standby":"online");
} else {
crm_debug_2("Node %s is offline", this_node->details->uname);
}
return online;
}
#define set_char(x) last_rsc_id[lpc] = x; complete = TRUE;
char *
clone_zero(const char *last_rsc_id)
{
int lpc = 0;
char *zero = NULL;
CRM_CHECK(last_rsc_id != NULL, return NULL);
if(last_rsc_id != NULL) {
lpc = strlen(last_rsc_id);
}
while(--lpc > 0) {
switch(last_rsc_id[lpc]) {
case 0:
return NULL;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
crm_malloc0(zero, lpc + 3);
memcpy(zero, last_rsc_id, lpc);
zero[lpc] = ':';
zero[lpc+1] = '0';
zero[lpc+2] = 0;
return zero;
}
}
return NULL;
}
char *
increment_clone(char *last_rsc_id)
{
int lpc = 0;
int len = 0;
char *tmp = NULL;
gboolean complete = FALSE;
CRM_CHECK(last_rsc_id != NULL, return NULL);
if(last_rsc_id != NULL) {
len = strlen(last_rsc_id);
}
lpc = len-1;
while(complete == FALSE && lpc > 0) {
switch (last_rsc_id[lpc]) {
case 0:
lpc--;
break;
case '0':
set_char('1');
break;
case '1':
set_char('2');
break;
case '2':
set_char('3');
break;
case '3':
set_char('4');
break;
case '4':
set_char('5');
break;
case '5':
set_char('6');
break;
case '6':
set_char('7');
break;
case '7':
set_char('8');
break;
case '8':
set_char('9');
break;
case '9':
last_rsc_id[lpc] = '0';
lpc--;
break;
case ':':
tmp = last_rsc_id;
crm_malloc0(last_rsc_id, len + 2);
memcpy(last_rsc_id, tmp, len);
last_rsc_id[++lpc] = '1';
last_rsc_id[len] = '0';
last_rsc_id[len+1] = 0;
complete = TRUE;
crm_free(tmp);
break;
default:
crm_err("Unexpected char: %c (%d)",
last_rsc_id[lpc], lpc);
break;
}
}
return last_rsc_id;
}
static int
get_clone(char *last_rsc_id)
{
int clone = 0;
int lpc = 0;
int len = 0;
CRM_CHECK(last_rsc_id != NULL, return -1);
if(last_rsc_id != NULL) {
len = strlen(last_rsc_id);
}
lpc = len-1;
while(lpc > 0) {
switch (last_rsc_id[lpc]) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
clone += (int)(last_rsc_id[lpc] - '0') * (len - lpc);
lpc--;
break;
case ':':
return clone;
break;
default:
crm_err("Unexpected char: %d (%c)",
lpc, last_rsc_id[lpc]);
return clone;
break;
}
}
return -1;
}
static resource_t *
create_fake_resource(const char *rsc_id, xmlNode *rsc_entry, pe_working_set_t *data_set)
{
resource_t *rsc = NULL;
xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
copy_in_properties(xml_rsc, rsc_entry);
crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
crm_log_xml_debug(xml_rsc, "Orphan resource");
if(!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
return NULL;
}
set_bit(rsc->flags, pe_rsc_orphan);
data_set->resources = g_list_append(data_set->resources, rsc);
return rsc;
}
extern resource_t *create_child_clone(resource_t *rsc, int sub_id, pe_working_set_t *data_set);
static resource_t *find_clone(pe_working_set_t *data_set, node_t *node, resource_t *parent, const char *rsc_id)
{
int len = 0;
resource_t *rsc = NULL;
char *base = clone_zero(rsc_id);
char *alt_rsc_id = crm_strdup(rsc_id);
CRM_ASSERT(parent != NULL);
CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master);
if(base) {
len = strlen(base);
}
if(len > 0) {
base[len-1] = 0;
}
crm_debug_3("Looking for %s on %s in %s %d",
rsc_id, node->details->uname, parent->id, is_set(parent->flags, pe_rsc_unique));
if(is_set(parent->flags, pe_rsc_unique)) {
crm_debug_3("Looking for %s", rsc_id);
rsc = parent->fns->find_rsc(parent, rsc_id, NULL, pe_find_current);
} else {
crm_trace("Looking for %s on %s", base, node->details->uname);
rsc = parent->fns->find_rsc(parent, base, node, pe_find_partial|pe_find_current);
if(rsc != NULL && rsc->running_on) {
GListPtr gIter = parent->children;
rsc = NULL;
crm_debug_3("Looking for an existing orphan for %s: %s on %s", parent->id, rsc_id, node->details->uname);
/* There is already an instance of this _anonymous_ clone active on "node".
*
* If there is a partially active orphan (only applies to clone groups) on
* the same node, use that.
* Otherwise create a new (orphaned) instance at "orphan_check:".
*/
for(; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t*)gIter->data;
node_t *loc = child->fns->location(child, NULL, TRUE);
if(loc && loc->details == node->details) {
resource_t *tmp = child->fns->find_rsc(child, base, NULL, pe_find_partial|pe_find_current);
if(tmp && tmp->running_on == NULL) {
rsc = tmp;
break;
}
}
}
goto orphan_check;
} else if(((resource_t*)parent->children->data)->variant == pe_group) {
/* If we're grouped, we need to look for a peer thats active on $node
* and use their clone instance number
*/
resource_t *peer = parent->fns->find_rsc(parent, NULL, node, pe_find_clone|pe_find_current);
if(peer && peer->running_on) {
char buffer[256];
int clone_num = get_clone(peer->id);
snprintf(buffer, 256, "%s%d", base, clone_num);
rsc = parent->fns->find_rsc(parent, buffer, node, pe_find_current|pe_find_inactive);
if(rsc) {
crm_trace("Found someone active: %s on %s, becoming %s", peer->id, ((node_t*)peer->running_on->data)->details->uname, buffer);
}
}
}
while(rsc == NULL) {
rsc = parent->fns->find_rsc(parent, alt_rsc_id, NULL, pe_find_current);
if(rsc == NULL) {
crm_trace("Unknown resource: %s", alt_rsc_id);
break;
}
if(rsc->running_on == NULL) {
crm_trace("Resource %s: just right", alt_rsc_id);
break;
}
crm_trace("Resource %s: already active", alt_rsc_id);
alt_rsc_id = increment_clone(alt_rsc_id);
rsc = NULL;
}
}
orphan_check:
if(rsc == NULL) {
/* Create an extra orphan */
resource_t *top = create_child_clone(parent, -1, data_set);
crm_debug("Created orphan for %s: %s on %s", parent->id, rsc_id, node->details->uname);
rsc = top->fns->find_rsc(top, base, NULL, pe_find_current|pe_find_partial);
CRM_ASSERT(rsc != NULL);
}
crm_free(rsc->clone_name); rsc->clone_name = NULL;
if(safe_str_neq(rsc_id, rsc->id)) {
crm_info("Internally renamed %s on %s to %s%s",
rsc_id, node->details->uname, rsc->id,
is_set(rsc->flags, pe_rsc_orphan)?" (ORPHAN)":"");
rsc->clone_name = crm_strdup(rsc_id);
}
crm_free(alt_rsc_id);
crm_free(base);
return rsc;
}
static resource_t *
unpack_find_resource(
pe_working_set_t *data_set, node_t *node, const char *rsc_id, xmlNode *rsc_entry)
{
resource_t *rsc = NULL;
resource_t *clone_parent = NULL;
char *alt_rsc_id = crm_strdup(rsc_id);
crm_debug_2("looking for %s", rsc_id);
rsc = pe_find_resource(data_set->resources, alt_rsc_id);
/* no match */
if(rsc == NULL) {
/* Even when clone-max=0, we still create a single :0 orphan to match against */
char *tmp = clone_zero(alt_rsc_id);
resource_t *clone0 = pe_find_resource(data_set->resources, tmp);
clone_parent = uber_parent(clone0);
crm_free(tmp);
crm_debug_2("%s not found: %s", alt_rsc_id, clone_parent?clone_parent->id:"orphan");
} else {
clone_parent = uber_parent(rsc);
}
if(clone_parent && clone_parent->variant > pe_group) {
rsc = find_clone(data_set, node, clone_parent, rsc_id);
CRM_ASSERT(rsc != NULL);
}
crm_free(alt_rsc_id);
return rsc;
}
static resource_t *
process_orphan_resource(xmlNode *rsc_entry, node_t *node, pe_working_set_t *data_set)
{
resource_t *rsc = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
if(is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
clear_bit(rsc->flags, pe_rsc_managed);
} else {
print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE);
CRM_CHECK(rsc != NULL, return NULL);
resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
}
return rsc;
}
static void
process_rsc_state(resource_t *rsc, node_t *node,
enum action_fail_response on_fail,
xmlNode *migrate_op,
pe_working_set_t *data_set)
{
crm_debug_2("Resource %s is %s on %s: on_fail=%s",
rsc->id, role2text(rsc->role),
node->details->uname, fail2text(on_fail));
/* process current state */
if(rsc->role != RSC_ROLE_UNKNOWN) {
resource_t *iter = rsc;
while(iter) {
if(g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
node_t *n = node_copy(node);
g_hash_table_insert(iter->known_on, (gpointer)n->details->id, n);
}
if(is_set(iter->flags, pe_rsc_unique)) {
break;
}
iter = iter->parent;
}
}
if(node->details->unclean) {
/* No extra processing needed
* Also allows resources to be started again after a node is shot
*/
on_fail = action_fail_ignore;
}
switch(on_fail) {
case action_fail_ignore:
/* nothing to do */
break;
case action_fail_fence:
/* treat it as if it is still running
* but also mark the node as unclean
*/
pe_fence_node(data_set, node, "to recover from resource failure(s)");
break;
case action_fail_standby:
node->details->standby = TRUE;
node->details->standby_onfail = TRUE;
break;
case action_fail_block:
/* is_managed == FALSE will prevent any
* actions being sent for the resource
*/
clear_bit(rsc->flags, pe_rsc_managed);
break;
case action_fail_migrate:
/* make sure it comes up somewhere else
* or not at all
*/
resource_location(rsc, node, -INFINITY,
"__action_migration_auto__",data_set);
break;
case action_fail_stop:
rsc->next_role = RSC_ROLE_STOPPED;
break;
case action_fail_recover:
if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
set_bit(rsc->flags, pe_rsc_failed);
stop_action(rsc, node, FALSE);
}
break;
}
if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
if(is_set(rsc->flags, pe_rsc_orphan)) {
if(is_set(rsc->flags, pe_rsc_managed)) {
crm_config_warn("Detected active orphan %s running on %s",
rsc->id, node->details->uname);
} else {
crm_config_warn("Cluster configured not to stop active orphans."
" %s must be stopped manually on %s",
rsc->id, node->details->uname);
}
}
native_add_running(rsc, node, data_set);
if(on_fail != action_fail_ignore) {
set_bit(rsc->flags, pe_rsc_failed);
}
} else if(rsc->clone_name) {
crm_debug_2("Resetting clone_name %s for %s (stopped)",
rsc->clone_name, rsc->id);
crm_free(rsc->clone_name);
rsc->clone_name = NULL;
} else {
char *key = stop_key(rsc);
GListPtr possible_matches = find_actions(rsc->actions, key, node);
GListPtr gIter = possible_matches;
for(; gIter != NULL; gIter = gIter->next) {
action_t *stop = (action_t*)gIter->data;
stop->flags |= pe_action_optional;
}
crm_free(key);
}
}
/* create active recurring operations as optional */
static void
process_recurring(node_t *node, resource_t *rsc,
int start_index, int stop_index,
GListPtr sorted_op_list, pe_working_set_t *data_set)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
crm_debug_3("%s: Start index %d, stop index = %d",
rsc->id, start_index, stop_index);
for(; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode*)gIter->data;
int interval = 0;
char *key = NULL;
const char *id = ID(rsc_op);
const char *interval_s = NULL;
counter++;
if(node->details->online == FALSE) {
crm_debug_4("Skipping %s/%s: node is offline",
rsc->id, node->details->uname);
break;
} else if(start_index < stop_index) {
crm_debug_4("Skipping %s/%s: not active",
rsc->id, node->details->uname);
break;
} else if(counter <= start_index) {
crm_debug_4("Skipping %s/%s: old",
id, node->details->uname);
continue;
}
interval_s = crm_element_value(rsc_op,XML_LRM_ATTR_INTERVAL);
interval = crm_parse_int(interval_s, "0");
if(interval == 0) {
crm_debug_4("Skipping %s/%s: non-recurring",
id, node->details->uname);
continue;
}
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if(safe_str_eq(status, "-1")) {
crm_debug_4("Skipping %s/%s: status",
id, node->details->uname);
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
/* create the action */
key = generate_op_key(rsc->id, task, interval);
crm_debug_3("Creating %s/%s", key, node->details->uname);
custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
}
}
void
calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
*stop_index = -1;
*start_index = -1;
for(; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode*)gIter->data;
counter++;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if(safe_str_eq(task, CRMD_ACTION_STOP)
&& safe_str_eq(status, "0")) {
*stop_index = counter;
} else if(safe_str_eq(task, CRMD_ACTION_START)) {
*start_index = counter;
} else if(*start_index <= *stop_index
&& safe_str_eq(task, CRMD_ACTION_STATUS)) {
const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
if(safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
*start_index = counter;
}
}
}
}
static void
unpack_lrm_rsc_state(
node_t *node, xmlNode * rsc_entry, pe_working_set_t *data_set)
{
GListPtr gIter = NULL;
int stop_index = -1;
int start_index = -1;
enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
const char *task = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
resource_t *rsc = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
xmlNode *migrate_op = NULL;
xmlNode *rsc_op = NULL;
enum action_fail_response on_fail = FALSE;
enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
crm_debug_3("[%s] Processing %s on %s",
crm_element_name(rsc_entry), rsc_id, node->details->uname);
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for(rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
if(crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_prepend(op_list, rsc_op);
}
}
if(op_list == NULL) {
/* if there are no operations, there is nothing to do */
return;
}
/* find the resource */
rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
if(rsc == NULL) {
rsc = process_orphan_resource(rsc_entry, node, data_set);
}
CRM_ASSERT(rsc != NULL);
/* process operations */
saved_role = rsc->role;
on_fail = action_fail_ignore;
rsc->role = RSC_ROLE_UNKNOWN;
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
for(gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode*)gIter->data;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
migrate_op = rsc_op;
}
unpack_rsc_op(rsc, node, rsc_op, gIter->next, &on_fail, data_set);
}
/* create active recurring operations as optional */
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
process_recurring(node, rsc, start_index, stop_index,
sorted_op_list, data_set);
/* no need to free the contents */
g_list_free(sorted_op_list);
process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
if(get_target_role(rsc, &req_role)) {
if(rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
crm_debug("%s: Overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role),
role2text(req_role));
rsc->next_role = req_role;
} else if(req_role > rsc->next_role) {
crm_info("%s: Not overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role),
role2text(req_role));
}
}
if(saved_role > rsc->role) {
rsc->role = saved_role;
}
}
gboolean
unpack_lrm_resources(node_t *node, xmlNode * lrm_rsc_list, pe_working_set_t *data_set)
{
xmlNode *rsc_entry = NULL;
CRM_CHECK(node != NULL, return FALSE);
crm_debug_3("Unpacking resources on %s", node->details->uname);
for(rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) {
if(crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
unpack_lrm_rsc_state(node, rsc_entry, data_set);
}
}
return TRUE;
}
static void set_active(resource_t *rsc)
{
resource_t *top = uber_parent(rsc);
if(top && top->variant == pe_master) {
rsc->role = RSC_ROLE_SLAVE;
} else {
rsc->role = RSC_ROLE_STARTED;
}
}
static void set_node_score(gpointer key, gpointer value, gpointer user_data)
{
node_t *node = value;
int *score = user_data;
node->weight = *score;
}
#define STATUS_PATH_MAX 1024
static xmlNode *find_lrm_op(const char *resource, const char *op, const char *node, const char *source, pe_working_set_t *data_set)
{
int offset = 0;
char xpath[STATUS_PATH_MAX];
offset += snprintf(xpath+offset, STATUS_PATH_MAX-offset, "//node_state[@id='%s']", node);
offset += snprintf(xpath+offset, STATUS_PATH_MAX-offset, "//"XML_LRM_TAG_RESOURCE"[@id='%s']", resource);
/* Need to check against transition_magic too? */
if(source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
offset += snprintf(xpath+offset, STATUS_PATH_MAX-offset, "/"XML_LRM_TAG_RSC_OP"[@operation='%s' and @migrate_target='%s']", op, source);
} else if(source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
offset += snprintf(xpath+offset, STATUS_PATH_MAX-offset, "/"XML_LRM_TAG_RSC_OP"[@operation='%s' and @migrate_source='%s']", op, source);
} else {
offset += snprintf(xpath+offset, STATUS_PATH_MAX-offset, "/"XML_LRM_TAG_RSC_OP"[@operation='%s']", op);
}
return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
}
gboolean
unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, GListPtr next,
enum action_fail_response *on_fail, pe_working_set_t *data_set)
{
int task_id = 0;
const char *id = NULL;
const char *key = NULL;
const char *task = NULL;
const char *magic = NULL;
const char *actual_rc = NULL;
/* const char *target_rc = NULL; */
const char *task_status = NULL;
const char *interval_s = NULL;
const char *op_version = NULL;
int interval = 0;
int task_status_i = -2;
int actual_rc_i = 0;
int target_rc = -1;
int last_failure = 0;
action_t *action = NULL;
node_t *effective_node = NULL;
resource_t *failed = NULL;
gboolean expired = FALSE;
gboolean is_probe = FALSE;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(node != NULL, return FALSE);
CRM_CHECK(xml_op != NULL, return FALSE);
id = ID(xml_op);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS);
op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
CRM_CHECK(id != NULL, return FALSE);
CRM_CHECK(task != NULL, return FALSE);
CRM_CHECK(task_status != NULL, return FALSE);
task_status_i = crm_parse_int(task_status, NULL);
CRM_CHECK(task_status_i <= LRM_OP_ERROR, return FALSE);
CRM_CHECK(task_status_i >= LRM_OP_PENDING, return FALSE);
if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
/* safe to ignore these */
return TRUE;
}
if(rsc->failure_timeout > 0) {
int last_run = 0;
if(crm_element_value_int(xml_op, "last-rc-change", &last_run) == 0) {
time_t now = get_timet_now(data_set);
if(now > (last_run + rsc->failure_timeout)) {
expired = TRUE;
}
}
}
crm_debug_2("Unpacking task %s/%s (call_id=%d, status=%s) on %s (role=%s)",
id, task, task_id, task_status, node->details->uname,
role2text(rsc->role));
interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
interval = crm_parse_int(interval_s, "0");
if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
is_probe = TRUE;
}
if(node->details->unclean) {
crm_debug_2("Node %s (where %s is running) is unclean."
" Further action depends on the value of the stop's on-fail attribue",
node->details->uname, rsc->id);
}
actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC);
CRM_CHECK(actual_rc != NULL, return FALSE);
actual_rc_i = crm_parse_int(actual_rc, NULL);
if(key) {
int dummy = 0;
char *dummy_string = NULL;
decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc);
crm_free(dummy_string);
}
if(task_status_i == LRM_OP_DONE && target_rc >= 0) {
if(target_rc == actual_rc_i) {
task_status_i = LRM_OP_DONE;
} else {
task_status_i = LRM_OP_ERROR;
crm_debug("%s on %s returned %d (%s) instead of the expected value: %d (%s)",
id, node->details->uname,
actual_rc_i, execra_code2string(actual_rc_i),
target_rc, execra_code2string(target_rc));
}
} else if(task_status_i == LRM_OP_ERROR) {
/* let us decide that */
task_status_i = LRM_OP_DONE;
}
if(task_status_i == LRM_OP_NOTSUPPORTED) {
actual_rc_i = EXECRA_UNIMPLEMENT_FEATURE;
}
if(task_status_i != actual_rc_i
&& rsc->failure_timeout > 0
&& get_failcount(node, rsc, &last_failure, data_set) == 0) {
if(last_failure > 0) {
action_t *clear_op = NULL;
clear_op = custom_action(
rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'),
CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname);
}
}
if(expired
&& actual_rc_i != EXECRA_NOT_RUNNING
&& actual_rc_i != EXECRA_RUNNING_MASTER
&& actual_rc_i != EXECRA_OK) {
crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s",
id, actual_rc_i, magic, node->details->uname);
goto done;
}
/* we could clean this up significantly except for old LRMs and CRMs that
* didnt include target_rc and liked to remap status
*/
switch(actual_rc_i) {
case EXECRA_NOT_RUNNING:
if(is_probe || target_rc == actual_rc_i) {
task_status_i = LRM_OP_DONE;
rsc->role = RSC_ROLE_STOPPED;
/* clear any previous failure actions */
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
} else if(safe_str_neq(task, CRMD_ACTION_STOP)) {
task_status_i = LRM_OP_ERROR;
}
break;
case EXECRA_RUNNING_MASTER:
if(is_probe) {
task_status_i = LRM_OP_DONE;
crm_notice("Operation %s found resource %s active in master mode on %s",
id, rsc->id, node->details->uname);
} else if(target_rc == actual_rc_i) {
/* nothing to do */
} else if(target_rc >= 0) {
task_status_i = LRM_OP_ERROR;
/* legacy code for pre-0.6.5 operations */
} else if(safe_str_neq(task, CRMD_ACTION_STATUS)
|| rsc->role != RSC_ROLE_MASTER) {
task_status_i = LRM_OP_ERROR;
if(rsc->role != RSC_ROLE_MASTER) {
crm_err("%s reported %s in master mode on %s",
id, rsc->id,
node->details->uname);
}
}
rsc->role = RSC_ROLE_MASTER;
break;
case EXECRA_FAILED_MASTER:
rsc->role = RSC_ROLE_MASTER;
task_status_i = LRM_OP_ERROR;
break;
case EXECRA_UNIMPLEMENT_FEATURE:
if(interval > 0) {
task_status_i = LRM_OP_NOTSUPPORTED;
break;
}
/* else: fall through */
case EXECRA_INSUFFICIENT_PRIV:
case EXECRA_NOT_INSTALLED:
case EXECRA_INVALID_PARAM:
effective_node = node;
/* fall through */
case EXECRA_NOT_CONFIGURED:
failed = rsc;
if(is_not_set(rsc->flags, pe_rsc_unique)) {
failed = uber_parent(failed);
}
do_crm_log(actual_rc_i==EXECRA_NOT_INSTALLED?LOG_NOTICE:LOG_ERR,
"Hard error - %s failed with rc=%d: Preventing %s from re-starting %s %s",
id, actual_rc_i, failed->id,
effective_node?"on":"anywhere",
effective_node?effective_node->details->uname:"in the cluster");
resource_location(failed, effective_node, -INFINITY, "hard-error", data_set);
if(is_probe) {
/* treat these like stops */
task = CRMD_ACTION_STOP;
task_status_i = LRM_OP_DONE;
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
if(actual_rc_i != EXECRA_NOT_INSTALLED
|| is_set(data_set->flags, pe_flag_symmetric_cluster)) {
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
add_node_copy(data_set->failed, xml_op);
}
}
}
break;
case EXECRA_OK:
if(is_probe && target_rc == 7) {
task_status_i = LRM_OP_DONE;
crm_notice("Operation %s found resource %s active on %s",
id, rsc->id, node->details->uname);
/* legacy code for pre-0.6.5 operations */
} else if(target_rc < 0
&& interval > 0
&& rsc->role == RSC_ROLE_MASTER) {
/* catch status ops that return 0 instead of 8 while they
* are supposed to be in master mode
*/
task_status_i = LRM_OP_ERROR;
}
break;
default:
if(task_status_i == LRM_OP_DONE) {
crm_info("Remapping %s (rc=%d) on %s to an ERROR",
id, actual_rc_i, node->details->uname);
task_status_i = LRM_OP_ERROR;
}
}
if(task_status_i == LRM_OP_ERROR
|| task_status_i == LRM_OP_TIMEOUT
|| task_status_i == LRM_OP_NOTSUPPORTED) {
action = custom_action(rsc, crm_strdup(id), task, NULL, TRUE, FALSE, data_set);
if(expired) {
crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s",
id, actual_rc_i, magic, node->details->uname);
goto done;
} else if(action->on_fail == action_fail_ignore) {
crm_warn("Remapping %s (rc=%d) on %s to DONE: ignore",
id, actual_rc_i, node->details->uname);
task_status_i = LRM_OP_DONE;
+ set_bit(rsc->flags, pe_rsc_failure_ignored);
+
+ crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
+ if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
+ add_node_copy(data_set->failed, xml_op);
+ }
}
}
switch(task_status_i) {
case LRM_OP_PENDING:
if(safe_str_eq(task, CRMD_ACTION_START)) {
set_bit(rsc->flags, pe_rsc_start_pending);
set_active(rsc);
} else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
}
/*
* Intentionally ignoring pending migrate ops here;
* haven't decided if we need to do anything special
* with them yet...
*/
break;
case LRM_OP_DONE:
crm_debug_3("%s/%s completed on %s",
rsc->id, task, node->details->uname);
if(actual_rc_i == EXECRA_NOT_RUNNING) {
/* nothing to do */
} else if(safe_str_eq(task, CRMD_ACTION_STOP)) {
rsc->role = RSC_ROLE_STOPPED;
/* clear any previous failure actions */
switch(*on_fail) {
case action_fail_block:
case action_fail_stop:
case action_fail_fence:
case action_fail_migrate:
case action_fail_standby:
crm_debug_2("%s.%s is not cleared by a completed stop",
rsc->id, fail2text(*on_fail));
break;
case action_fail_ignore:
case action_fail_recover:
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
}
} else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
rsc->role = RSC_ROLE_SLAVE;
} else if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
rsc->role = RSC_ROLE_STARTED;
} else if(safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
/*
* The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src)
*
* So if a migrate_to is followed by a stop, then we dont need to care what
* happended on the target node
*
* Without the stop, we need to look for a successful migrate_from.
* This would also imply we're no longer running on the source
*
* Without the stop, and without a migrate_from op we make sure the resource
* gets stopped on both source and target (assuming the target is up)
*
*/
int stop_id = 0;
xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set);
if(stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
if(stop_op == NULL || stop_id < task_id) {
int from_rc = 0, from_status = 0;
const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
node_t *target = pe_find_node_id(data_set->nodes, migrate_target);
xmlNode *migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set);
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if(migrate_from) {
crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
crm_trace("%s op on %s exited with status=%d, rc=%d",
ID(migrate_from), migrate_target, from_status, from_rc);
}
if(migrate_from && from_rc == EXECRA_OK && from_status == LRM_OP_DONE) {
crm_trace("Detected dangling migration op: %s on %s", ID(xml_op), migrate_source);
/* all good
* just need to arrange for the stop action to get sent
* but _without_ affecting the target somehow
*/
rsc->role = RSC_ROLE_STOPPED;
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
} else if(migrate_from) { /* Failed */
crm_trace("Marking active on %s %p %d", migrate_target, target, target->details->online);
if(target && target->details->online) {
native_add_running(rsc, target, data_set);
}
} else { /* Pending or complete but erased */
node_t *target = pe_find_node_id(data_set->nodes, migrate_target);
crm_trace("Marking active on %s %p %d", migrate_target, target, target->details->online);
if(target && target->details->online) {
/* TODO: One day, figure out how to complete the migration
* For now, consider it active in both locations so it gets stopped everywhere
*/
native_add_running(rsc, target, data_set);
} else {
/* Consider it failed here - forces a restart, prevents migration */
set_bit_inplace(rsc->flags, pe_rsc_failed);
}
}
}
} else if(rsc->role < RSC_ROLE_STARTED) {
/* start, migrate_to and migrate_from will land here */
crm_debug_3("%s active on %s",
rsc->id, node->details->uname);
set_active(rsc);
}
break;
case LRM_OP_ERROR:
case LRM_OP_TIMEOUT:
case LRM_OP_NOTSUPPORTED:
crm_warn("Processing failed op %s on %s: %s (%d)",
id, node->details->uname,
execra_code2string(actual_rc_i), actual_rc_i);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
add_node_copy(data_set->failed, xml_op);
}
if(*on_fail < action->on_fail) {
*on_fail = action->on_fail;
}
if(safe_str_eq(task, CRMD_ACTION_STOP)) {
resource_location(
rsc, node, -INFINITY, "__stop_fail__", data_set);
} else if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
int stop_id = 0;
int migrate_id = 0;
const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set);
xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target, data_set);
if(stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
if(migrate_op) {
crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
}
/* Get our state right */
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if(stop_op == NULL || stop_id < migrate_id) {
node_t *source = pe_find_node_id(data_set->nodes, migrate_source);
if(source && source->details->online) {
native_add_running(rsc, source, data_set);
}
}
} else if(safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
int stop_id = 0;
int migrate_id = 0;
const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set);
xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set);
if(stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
if(migrate_op) {
crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
}
/* Get our state right */
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if(stop_op == NULL || stop_id < migrate_id) {
node_t *target = pe_find_node_id(data_set->nodes, migrate_target);
crm_trace("Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op, migrate_id);
if(target && target->details->online) {
native_add_running(rsc, target, data_set);
}
} else if(migrate_op == NULL) {
/* Make sure it gets cleaned up, the stop may pre-date the migrate_from */
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
}
} else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
/*
* staying in role=master ends up putting the PE/TE into a loop
* setting role=slave is not dangerous because no master will be
* promoted until the failed resource has been fully stopped
*/
crm_warn("Forcing %s to stop after a failed demote action", rsc->id);
rsc->next_role = RSC_ROLE_STOPPED;
rsc->role = RSC_ROLE_SLAVE;
} else if(compare_version("2.0", op_version) > 0
&& safe_str_eq(task, CRMD_ACTION_START)) {
crm_warn("Compatibility handling for failed op %s on %s",
id, node->details->uname);
resource_location(
rsc, node, -INFINITY, "__legacy_start__", data_set);
}
if(rsc->role < RSC_ROLE_STARTED) {
set_active(rsc);
}
crm_debug_2("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
rsc->id, role2text(rsc->role),
node->details->unclean?"true":"false",
fail2text(action->on_fail),
role2text(action->fail_role));
if(action->fail_role != RSC_ROLE_STARTED
&& rsc->next_role < action->fail_role) {
rsc->next_role = action->fail_role;
}
if(action->fail_role == RSC_ROLE_STOPPED) {
int score = -INFINITY;
crm_err("Making sure %s doesn't come up again", rsc->id);
/* make sure it doesnt come up again */
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
g_hash_table_foreach(rsc->allowed_nodes, set_node_score, &score);
}
pe_free_action(action);
action = NULL;
break;
case LRM_OP_CANCELLED:
/* do nothing?? */
pe_err("Dont know what to do for cancelled ops yet");
break;
}
done:
crm_debug_3("Resource %s after %s: role=%s",
rsc->id, task, role2text(rsc->role));
pe_free_action(action);
return TRUE;
}
gboolean
add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set)
{
g_hash_table_insert(node->details->attrs,
crm_strdup("#"XML_ATTR_UNAME),
crm_strdup(node->details->uname));
g_hash_table_insert(node->details->attrs,
crm_strdup("#"XML_ATTR_ID),
crm_strdup(node->details->id));
if(safe_str_eq(node->details->id, data_set->dc_uuid)) {
data_set->dc_node = node;
node->details->is_dc = TRUE;
g_hash_table_insert(node->details->attrs,
crm_strdup("#"XML_ATTR_DC),
crm_strdup(XML_BOOLEAN_TRUE));
} else {
g_hash_table_insert(node->details->attrs,
crm_strdup("#"XML_ATTR_DC),
crm_strdup(XML_BOOLEAN_FALSE));
}
unpack_instance_attributes(
data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
node->details->attrs, NULL, overwrite, data_set->now);
return TRUE;
}
static GListPtr
extract_operations(const char *node, const char *rsc, xmlNode *rsc_entry, gboolean active_filter)
{
int counter = -1;
int stop_index = -1;
int start_index = -1;
xmlNode *rsc_op = NULL;
GListPtr gIter = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for(rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
if(crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
crm_xml_add(rsc_op, "resource", rsc);
crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
op_list = g_list_prepend(op_list, rsc_op);
}
}
if(op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
/* create active recurring operations as optional */
if(active_filter == FALSE) {
return sorted_op_list;
}
op_list = NULL;
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for(gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode*)gIter->data;
counter++;
if(start_index < stop_index) {
crm_debug_4("Skipping %s: not active", ID(rsc_entry));
break;
} else if(counter < start_index) {
crm_debug_4("Skipping %s: old", ID(rsc_op));
continue;
}
op_list = g_list_append(op_list, rsc_op);
}
g_list_free(sorted_op_list);
return op_list;
}
GListPtr find_operations(
const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set)
{
GListPtr output = NULL;
GListPtr intermediate = NULL;
xmlNode *tmp = NULL;
xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
const char *uname = NULL;
node_t *this_node = NULL;
xmlNode *node_state = NULL;
for(node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) {
if(crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
uname = crm_element_value(node_state, XML_ATTR_UNAME);
if(node != NULL && safe_str_neq(uname, node)) {
continue;
}
this_node = pe_find_node(data_set->nodes, uname);
CRM_CHECK(this_node != NULL, continue);
determine_online_status(node_state, this_node, data_set);
if(this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
/* offline nodes run no resources...
* unless stonith is enabled in which case we need to
* make sure rsc start events happen after the stonith
*/
xmlNode *lrm_rsc = NULL;
tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
for(lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next(lrm_rsc)) {
if(crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
if(rsc != NULL && safe_str_neq(rsc_id, rsc)) {
continue;
}
intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
output = g_list_concat(output, intermediate);
}
}
}
}
}
return output;
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jan 25, 12:28 PM (13 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1322566
Default Alt Text
(89 KB)

Event Timeline