Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F2020072
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
115 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index a6b8d0aa52..6bb0010c2b 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,3160 +1,3156 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/internal.h>
#include <unpack.h>
CRM_TRACE_INIT_DATA(pe_status);
#define set_config_flag(data_set, option, flag) do { \
const char *tmp = pe_pref(data_set->config_hash, option); \
if(tmp) { \
if(crm_is_true(tmp)) { \
set_bit(data_set->flags, flag); \
} else { \
clear_bit(data_set->flags, flag); \
} \
} \
} while(0)
gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
enum action_fail_response *failed, pe_working_set_t * data_set);
static gboolean determine_remote_online_status(node_t * this_node);
static void
pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
{
CRM_CHECK(node, return);
/* fence remote nodes living in a container by marking the container as failed. */
if (is_container_remote_node(node)) {
resource_t *rsc = node->details->remote_rsc->container;
if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
crm_warn("Remote node %s will be fenced by recovering container resource %s",
node->details->uname, rsc->id, reason);
set_bit(rsc->flags, pe_rsc_failed);
}
} else if (node->details->unclean == FALSE) {
if(pe_can_fence(data_set, node)) {
crm_warn("Node %s will be fenced %s", node->details->uname, reason);
} else {
crm_warn("Node %s is unclean %s", node->details->uname, reason);
}
node->details->unclean = TRUE;
}
}
gboolean
unpack_config(xmlNode * config, pe_working_set_t * data_set)
{
const char *value = NULL;
GHashTable *config_hash =
g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
xmlXPathObjectPtr xpathObj = NULL;
if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
xpathObj = xpath_search(data_set->input, "//nvpair[@name='provides' and @value='unfencing']");
if(xpathObj && numXpathResults(xpathObj) > 0) {
set_bit(data_set->flags, pe_flag_enable_unfencing);
}
freeXpathObject(xpathObj);
}
if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
xpathObj = xpath_search(data_set->input, "//nvpair[@name='requires' and @value='unfencing']");
if(xpathObj && numXpathResults(xpathObj) > 0) {
set_bit(data_set->flags, pe_flag_enable_unfencing);
}
freeXpathObject(xpathObj);
}
#ifdef REDHAT_COMPAT_6
if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
xpathObj = xpath_search(data_set->input, "//primitive[@type='fence_scsi']");
if(xpathObj && numXpathResults(xpathObj) > 0) {
set_bit(data_set->flags, pe_flag_enable_unfencing);
}
freeXpathObject(xpathObj);
}
#endif
data_set->config_hash = config_hash;
unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, data_set->now);
verify_pe_options(data_set->config_hash);
set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
crm_info("Startup probes: disabled (dangerous)");
}
value = pe_pref(data_set->config_hash, "stonith-timeout");
data_set->stonith_timeout = crm_get_msec(value);
crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
crm_debug("STONITH of failed nodes is %s",
is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
crm_trace("STONITH will %s nodes", data_set->stonith_action);
set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
crm_debug("Stop all active resources: %s",
is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
}
value = pe_pref(data_set->config_hash, "default-resource-stickiness");
data_set->default_resource_stickiness = char2score(value);
crm_debug("Default stickiness: %d", data_set->default_resource_stickiness);
value = pe_pref(data_set->config_hash, "no-quorum-policy");
if (safe_str_eq(value, "ignore")) {
data_set->no_quorum_policy = no_quorum_ignore;
} else if (safe_str_eq(value, "freeze")) {
data_set->no_quorum_policy = no_quorum_freeze;
} else if (safe_str_eq(value, "suicide")) {
gboolean do_panic = FALSE;
crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic);
if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
crm_config_err
("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false");
}
if (do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) {
data_set->no_quorum_policy = no_quorum_suicide;
} else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) {
crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
data_set->no_quorum_policy = no_quorum_stop;
}
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
crm_debug("On loss of CCM Quorum: Freeze resources");
break;
case no_quorum_stop:
crm_debug("On loss of CCM Quorum: Stop ALL resources");
break;
case no_quorum_suicide:
crm_notice("On loss of CCM Quorum: Fence all remaining nodes");
break;
case no_quorum_ignore:
crm_notice("On loss of CCM Quorum: Ignore");
break;
}
set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
crm_trace("Orphan resources are %s",
is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
crm_trace("Orphan resource actions are %s",
is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
crm_trace("Stopped resources are removed from the status section: %s",
is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
crm_trace("Maintenance mode: %s",
is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
clear_bit(data_set->flags, pe_flag_is_managed_default);
} else {
set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default);
}
crm_trace("By default resources are %smanaged",
is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not ");
set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
crm_trace("Start failures are %s",
is_set(data_set->flags,
pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
pe_pref(data_set->config_hash, "node-health-red"),
pe_pref(data_set->config_hash, "node-health-yellow"),
pe_pref(data_set->config_hash, "node-health-green"));
data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
crm_trace("Placement strategy: %s", data_set->placement_strategy);
return TRUE;
}
static void
destroy_digest_cache(gpointer ptr)
{
op_digest_cache_t *data = ptr;
free_xml(data->params_all);
free_xml(data->params_restart);
free(data->digest_all_calc);
free(data->digest_restart_calc);
free(data);
}
static node_t *
create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set)
{
node_t *new_node = NULL;
if (pe_find_node(data_set->nodes, uname) != NULL) {
crm_config_warn("Detected multiple node entries with uname=%s"
" - this is rarely intended", uname);
}
new_node = calloc(1, sizeof(node_t));
if (new_node == NULL) {
return NULL;
}
new_node->weight = char2score(score);
new_node->fixed = FALSE;
new_node->details = calloc(1, sizeof(struct node_shared_s));
if (new_node->details == NULL) {
free(new_node);
return NULL;
}
crm_trace("Creating node for entry %s/%s", uname, id);
new_node->details->id = id;
new_node->details->uname = uname;
new_node->details->online = FALSE;
new_node->details->shutdown = FALSE;
new_node->details->running_rsc = NULL;
new_node->details->type = node_ping;
if (safe_str_eq(type, "remote")) {
new_node->details->type = node_remote;
set_bit(data_set->flags, pe_flag_have_remote_nodes);
} else if (type == NULL || safe_str_eq(type, "member")
|| safe_str_eq(type, NORMALNODE)) {
new_node->details->type = node_member;
}
new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str,
g_hash_destroy_str);
new_node->details->utilization =
g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
g_hash_destroy_str);
new_node->details->digest_cache =
g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
destroy_digest_cache);
data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
return new_node;
}
static const char *
expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_check)
{
xmlNode *xml_rsc = NULL;
xmlNode *xml_tmp = NULL;
xmlNode *attr_set = NULL;
xmlNode *attr = NULL;
const char *container_id = ID(xml_obj);
const char *remote_name = NULL;
const char *remote_server = NULL;
const char *remote_port = NULL;
const char *connect_timeout = "60s";
const char *remote_allow_migrate=NULL;
char *tmp_id = NULL;
for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next(attr_set)) {
if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
continue;
}
for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next(attr)) {
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
if (safe_str_eq(name, "remote-node")) {
remote_name = value;
} else if (safe_str_eq(name, "remote-addr")) {
remote_server = value;
} else if (safe_str_eq(name, "remote-port")) {
remote_port = value;
} else if (safe_str_eq(name, "remote-connect-timeout")) {
connect_timeout = value;
} else if (safe_str_eq(name, "remote-allow-migrate")) {
remote_allow_migrate=value;
}
}
}
if (remote_name == NULL) {
return NULL;
}
if (*rsc_name_check == NULL) {
*rsc_name_check = g_hash_table_new(crm_str_hash, g_str_equal);
for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) {
const char *id = ID(xml_rsc);
/* avoiding heap allocation here because we know the duration of this hashtable allows us to */
g_hash_table_insert(*rsc_name_check, (char *) id, (char *) id);
}
}
if (g_hash_table_lookup(*rsc_name_check, remote_name)) {
crm_err("Naming conflict with remote-node=%s. remote-nodes can not have the same name as a resource.",
remote_name);
return NULL;
}
xml_rsc = create_xml_node(parent, XML_CIB_TAG_RESOURCE);
crm_xml_add(xml_rsc, XML_ATTR_ID, remote_name);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, "ocf");
crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, "pacemaker");
crm_xml_add(xml_rsc, XML_ATTR_TYPE, "remote");
xml_tmp = create_xml_node(xml_rsc, XML_TAG_META_SETS);
tmp_id = crm_concat(remote_name, XML_TAG_META_SETS, '_');
crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id);
free(tmp_id);
attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
tmp_id = crm_concat(remote_name, "meta-attributes-container", '_');
crm_xml_add(attr, XML_ATTR_ID, tmp_id);
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_CONTAINER);
crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, container_id);
free(tmp_id);
attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
tmp_id = crm_concat(remote_name, "meta-attributes-internal", '_');
crm_xml_add(attr, XML_ATTR_ID, tmp_id);
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_INTERNAL_RSC);
crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, "true");
free(tmp_id);
if (remote_allow_migrate) {
attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
tmp_id = crm_concat(remote_name, "meta-attributes-container", '_');
crm_xml_add(attr, XML_ATTR_ID, tmp_id);
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_ALLOW_MIGRATE);
crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_allow_migrate);
free(tmp_id);
}
xml_tmp = create_xml_node(xml_rsc, "operations");
attr = create_xml_node(xml_tmp, XML_ATTR_OP);
tmp_id = crm_concat(remote_name, "monitor-interval-30s", '_');
crm_xml_add(attr, XML_ATTR_ID, tmp_id);
crm_xml_add(attr, XML_ATTR_TIMEOUT, "30s");
crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "30s");
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor");
free(tmp_id);
if (connect_timeout) {
attr = create_xml_node(xml_tmp, XML_ATTR_OP);
tmp_id = crm_concat(remote_name, "start-interval-0", '_');
crm_xml_add(attr, XML_ATTR_ID, tmp_id);
crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout);
crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0");
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start");
free(tmp_id);
}
if (remote_port || remote_server) {
xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS);
tmp_id = crm_concat(remote_name, XML_TAG_ATTR_SETS, '_');
crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id);
free(tmp_id);
if (remote_server) {
attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
tmp_id = crm_concat(remote_name, "instance-attributes-addr", '_');
crm_xml_add(attr, XML_ATTR_ID, tmp_id);
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "addr");
crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_server);
free(tmp_id);
}
if (remote_port) {
attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR);
tmp_id = crm_concat(remote_name, "instance-attributes-port", '_');
crm_xml_add(attr, XML_ATTR_ID, tmp_id);
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "port");
crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_port);
free(tmp_id);
}
}
return remote_name;
}
static void
handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
{
static const char *blind_faith = NULL;
static gboolean unseen_are_unclean = TRUE;
- static gboolean init_startup_fence_params = FALSE;
if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
/* ignore fencing remote-nodes that don't have a conneciton resource associated
* with them. This happens when remote-node entries get left in the nodes section
* after the connection resource is removed */
return;
}
- if (init_startup_fence_params == FALSE) {
- blind_faith = pe_pref(data_set->config_hash, "startup-fencing");
- init_startup_fence_params = TRUE;
+ blind_faith = pe_pref(data_set->config_hash, "startup-fencing");
- if (crm_is_true(blind_faith) == FALSE) {
- unseen_are_unclean = FALSE;
- crm_warn("Blind faith: not fencing unseen nodes");
- }
+ if (crm_is_true(blind_faith) == FALSE) {
+ unseen_are_unclean = FALSE;
+ crm_warn("Blind faith: not fencing unseen nodes");
}
if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE
|| unseen_are_unclean == FALSE) {
/* blind faith... */
new_node->details->unclean = FALSE;
} else {
/* all nodes are unclean until we've seen their
* status entry
*/
new_node->details->unclean = TRUE;
}
/* We need to be able to determine if a node's status section
* exists or not separate from whether the node is unclean. */
new_node->details->unseen = TRUE;
}
gboolean
unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
node_t *new_node = NULL;
const char *id = NULL;
const char *uname = NULL;
const char *type = NULL;
const char *score = NULL;
for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
new_node = NULL;
id = crm_element_value(xml_obj, XML_ATTR_ID);
uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
type = crm_element_value(xml_obj, XML_ATTR_TYPE);
score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
crm_trace("Processing node %s/%s", uname, id);
if (id == NULL) {
crm_config_err("Must specify id tag in <node>");
continue;
}
new_node = create_node(id, uname, type, score, data_set);
if (new_node == NULL) {
return FALSE;
}
/* if(data_set->have_quorum == FALSE */
/* && data_set->no_quorum_policy == no_quorum_stop) { */
/* /\* start shutting resources down *\/ */
/* new_node->weight = -INFINITY; */
/* } */
handle_startup_fencing(data_set, new_node);
add_node_attrs(xml_obj, new_node, FALSE, data_set);
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
new_node->details->utilization, NULL, FALSE, data_set->now);
crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
}
}
if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
crm_info("Creating a fake local node");
create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set);
}
return TRUE;
}
static void
setup_container(resource_t * rsc, pe_working_set_t * data_set)
{
const char *container_id = NULL;
if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
setup_container(child_rsc, data_set);
}
return;
}
container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
if (container_id && safe_str_neq(container_id, rsc->id)) {
resource_t *container = pe_find_resource(data_set->resources, container_id);
if (container) {
rsc->container = container;
container->fillers = g_list_append(container->fillers, rsc);
pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
} else {
pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
}
}
}
gboolean
unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
GHashTable *rsc_name_check = NULL;
/* generate remote nodes from resource config before unpacking resources */
for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
const char *new_node_id = NULL;
/* remote rsc can be defined as primitive, or exist within the metadata of another rsc */
if (xml_contains_remote_node(xml_obj)) {
new_node_id = ID(xml_obj);
/* This check is here to make sure we don't iterate over
* an expanded node that has already been added to the node list. */
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) != NULL) {
continue;
}
} else {
/* expands a metadata defined remote resource into the xml config
* as an actual rsc primitive to be unpacked later. */
new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, &rsc_name_check);
}
if (new_node_id) {
crm_trace("detected remote node %s", new_node_id);
/* only create the remote node entry if the node didn't already exist */
if (pe_find_node(data_set->nodes, new_node_id) == NULL) {
create_node(new_node_id, new_node_id, "remote", NULL, data_set);
}
}
}
if (rsc_name_check) {
g_hash_table_destroy(rsc_name_check);
}
return TRUE;
}
/* Call this after all the nodes and resources have been
* unpacked, but before the status section is read.
*
* A remote node's online status is reflected by the state
* of the remote node's connection resource. We need to link
* the remote node to this connection resource so we can have
* easy access to the connection resource during the PE calculations.
*/
static void
link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
{
node_t *remote_node = NULL;
if (new_rsc->is_remote_node == FALSE) {
return;
}
if (is_set(data_set->flags, pe_flag_quick_location)) {
/* remote_nodes and remote_resources are not linked in quick location calculations */
return;
}
print_resource(LOG_DEBUG_3, "Linking remote-node connection resource, ", new_rsc, FALSE);
remote_node = pe_find_node(data_set->nodes, new_rsc->id);
CRM_CHECK(remote_node != NULL, return;);
remote_node->details->remote_rsc = new_rsc;
/* If this is a baremetal remote-node (no container resource
* associated with it) then we need to handle startup fencing the same way
* as cluster nodes. */
if (new_rsc->container == NULL) {
handle_startup_fencing(data_set, remote_node);
return;
}
}
static void
destroy_tag(gpointer data)
{
tag_t *tag = data;
if (tag) {
free(tag->id);
g_list_free_full(tag->refs, free);
free(tag);
}
}
gboolean
unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
GListPtr gIter = NULL;
data_set->template_rsc_sets =
g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
destroy_tag);
for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
resource_t *new_rsc = NULL;
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
const char *template_id = ID(xml_obj);
if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
template_id, NULL, NULL) == FALSE) {
/* Record the template's ID for the knowledge of its existence anyway. */
g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
}
continue;
}
crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
data_set->resources = g_list_append(data_set->resources, new_rsc);
if (xml_contains_remote_node(xml_obj)) {
new_rsc->is_remote_node = TRUE;
}
print_resource(LOG_DEBUG_3, "Added ", new_rsc, FALSE);
} else {
crm_config_err("Failed unpacking %s %s",
crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
if (new_rsc != NULL && new_rsc->fns != NULL) {
new_rsc->fns->free(new_rsc);
}
}
}
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
setup_container(rsc, data_set);
link_rsc2remotenode(data_set, rsc);
}
data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
if (is_not_set(data_set->flags, pe_flag_quick_location)
&& is_set(data_set->flags, pe_flag_stonith_enabled)
&& is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
}
return TRUE;
}
gboolean
unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
{
xmlNode *xml_tag = NULL;
data_set->tags =
g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag);
for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next(xml_tag)) {
xmlNode *xml_obj_ref = NULL;
const char *tag_id = ID(xml_tag);
if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
continue;
}
if (tag_id == NULL) {
crm_config_err("Failed unpacking %s: %s should be specified",
crm_element_name(xml_tag), XML_ATTR_ID);
continue;
}
for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next(xml_obj_ref)) {
const char *obj_ref = ID(xml_obj_ref);
if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
continue;
}
if (obj_ref == NULL) {
crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
continue;
}
if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
return FALSE;
}
}
}
return TRUE;
}
/* The ticket state section:
* "/cib/status/tickets/ticket_state" */
static gboolean
unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
{
const char *ticket_id = NULL;
const char *granted = NULL;
const char *last_granted = NULL;
const char *standby = NULL;
xmlAttrPtr xIter = NULL;
ticket_t *ticket = NULL;
ticket_id = ID(xml_ticket);
if (ticket_id == NULL || strlen(ticket_id) == 0) {
return FALSE;
}
crm_trace("Processing ticket state for %s", ticket_id);
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = ticket_new(ticket_id, data_set);
if (ticket == NULL) {
return FALSE;
}
}
for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_ticket, prop_name);
if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
continue;
}
g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
}
granted = g_hash_table_lookup(ticket->state, "granted");
if (granted && crm_is_true(granted)) {
ticket->granted = TRUE;
crm_info("We have ticket '%s'", ticket->id);
} else {
ticket->granted = FALSE;
crm_info("We do not have ticket '%s'", ticket->id);
}
last_granted = g_hash_table_lookup(ticket->state, "last-granted");
if (last_granted) {
ticket->last_granted = crm_parse_int(last_granted, 0);
}
standby = g_hash_table_lookup(ticket->state, "standby");
if (standby && crm_is_true(standby)) {
ticket->standby = TRUE;
if (ticket->granted) {
crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
}
} else {
ticket->standby = FALSE;
}
crm_trace("Done with ticket state for %s", ticket_id);
return TRUE;
}
static gboolean
unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) {
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
continue;
}
unpack_ticket_state(xml_obj, data_set);
}
return TRUE;
}
/* Compatibility with the deprecated ticket state section:
* "/cib/status/tickets/instance_attributes" */
static void
get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data)
{
const char *long_key = key;
char *state_key = NULL;
const char *granted_prefix = "granted-ticket-";
const char *last_granted_prefix = "last-granted-";
static int granted_prefix_strlen = 0;
static int last_granted_prefix_strlen = 0;
const char *ticket_id = NULL;
const char *is_granted = NULL;
const char *last_granted = NULL;
const char *sep = NULL;
ticket_t *ticket = NULL;
pe_working_set_t *data_set = user_data;
if (granted_prefix_strlen == 0) {
granted_prefix_strlen = strlen(granted_prefix);
}
if (last_granted_prefix_strlen == 0) {
last_granted_prefix_strlen = strlen(last_granted_prefix);
}
if (strstr(long_key, granted_prefix) == long_key) {
ticket_id = long_key + granted_prefix_strlen;
if (strlen(ticket_id)) {
state_key = strdup("granted");
is_granted = value;
}
} else if (strstr(long_key, last_granted_prefix) == long_key) {
ticket_id = long_key + last_granted_prefix_strlen;
if (strlen(ticket_id)) {
state_key = strdup("last-granted");
last_granted = value;
}
} else if ((sep = strrchr(long_key, '-'))) {
ticket_id = sep + 1;
state_key = strndup(long_key, strlen(long_key) - strlen(sep));
}
if (ticket_id == NULL || strlen(ticket_id) == 0) {
free(state_key);
return;
}
if (state_key == NULL || strlen(state_key) == 0) {
free(state_key);
return;
}
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = ticket_new(ticket_id, data_set);
if (ticket == NULL) {
free(state_key);
return;
}
}
g_hash_table_replace(ticket->state, state_key, strdup(value));
if (is_granted) {
if (crm_is_true(is_granted)) {
ticket->granted = TRUE;
crm_info("We have ticket '%s'", ticket->id);
} else {
ticket->granted = FALSE;
crm_info("We do not have ticket '%s'", ticket->id);
}
} else if (last_granted) {
ticket->last_granted = crm_parse_int(last_granted, 0);
}
}
/* remove nodes that are down, stopping */
/* create +ve rsc_to_node constraints between resources and the nodes they are running on */
/* anything else? */
gboolean
unpack_status(xmlNode * status, pe_working_set_t * data_set)
{
const char *id = NULL;
const char *uname = NULL;
xmlNode *state = NULL;
xmlNode *lrm_rsc = NULL;
node_t *this_node = NULL;
crm_trace("Beginning unpack");
if (data_set->tickets == NULL) {
data_set->tickets =
g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket);
}
for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
xmlNode *xml_tickets = state;
GHashTable *state_hash = NULL;
/* Compatibility with the deprecated ticket state section:
* Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */
state_hash =
g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
g_hash_destroy_str);
unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL,
state_hash, NULL, TRUE, data_set->now);
g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set);
if (state_hash) {
g_hash_table_destroy(state_hash);
}
/* Unpack the new "/cib/status/tickets/ticket_state"s */
unpack_tickets_state(xml_tickets, data_set);
}
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
xmlNode *attrs = NULL;
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (uname == NULL) {
/* error */
continue;
} else if (this_node == NULL) {
crm_config_warn("Node %s in status section no longer exists", uname);
continue;
} else if (is_remote_node(this_node)) {
/* online state for remote nodes is determined by the rsc state
* after all the unpacking is done. */
continue;
}
crm_trace("Processing node id=%s, uname=%s", id, uname);
/* Mark the node as provisionally clean
* - at least we have seen it in the current cluster's lifetime
*/
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
crm_info("Node %s is in standby-mode", this_node->details->uname);
this_node->details->standby = TRUE;
}
if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) {
crm_info("Node %s is in maintenance-mode", this_node->details->uname);
this_node->details->maintenance = TRUE;
}
crm_trace("determining node state");
determine_online_status(state, this_node, data_set);
if (this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) {
/* Everything else should flow from this automatically
* At least until the PE becomes able to migrate off healthy resources
*/
pe_fence_node(data_set, this_node, "because the cluster does not have quorum");
}
}
}
/* Now that we know all node states, we can safely handle migration ops */
for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
continue;
}
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (this_node == NULL) {
crm_info("Node %s is unknown", id);
continue;
} else if (is_remote_node(this_node)) {
/* online status of remote node can not be determined until all other
* resource status is unpacked. */
continue;
} else if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
crm_trace("Processing lrm resource entries on healthy node: %s",
this_node->details->uname);
lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
unpack_lrm_resources(this_node, lrm_rsc, data_set);
}
}
/* now that the rest of the cluster's status is determined
* calculate remote-nodes */
unpack_remote_status(status, data_set);
return TRUE;
}
gboolean
unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
{
const char *id = NULL;
const char *uname = NULL;
GListPtr gIter = NULL;
xmlNode *state = NULL;
xmlNode *lrm_rsc = NULL;
node_t *this_node = NULL;
if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
crm_trace("no remote nodes to unpack");
return TRUE;
}
/* get online status */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
this_node = gIter->data;
if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
continue;
}
determine_remote_online_status(this_node);
}
/* process attributes */
for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
xmlNode *attrs = NULL;
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
continue;
}
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
continue;
}
crm_trace("Processing remote node id=%s, uname=%s", id, uname);
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
crm_info("Node %s is in standby-mode", this_node->details->uname);
this_node->details->standby = TRUE;
}
}
/* process node rsc status */
for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
continue;
}
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
continue;
}
crm_trace("Processing lrm resource entries on healthy remote node: %s",
this_node->details->uname);
lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
unpack_lrm_resources(this_node, lrm_rsc, data_set);
}
return TRUE;
}
static gboolean
determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
node_t * this_node)
{
gboolean online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
if (!crm_is_true(in_cluster)) {
crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
} else if (safe_str_eq(is_peer, ONLINESTATUS)) {
if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
online = TRUE;
} else {
crm_debug("Node is not ready to run resources: %s", join);
}
} else if (this_node->details->expected_up == FALSE) {
crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster));
crm_trace("\tis_peer=%s, join=%s, expected=%s",
crm_str(is_peer), crm_str(join), crm_str(exp_state));
} else {
/* mark it unclean */
this_node->details->unclean = TRUE;
crm_warn("Node %s is partially & un-expectedly down", this_node->details->uname);
crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
}
return online;
}
static gboolean
determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
node_t * this_node)
{
gboolean online = FALSE;
gboolean do_terminate = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate");
/*
- XML_NODE_IN_CLUSTER ::= true|false
- XML_NODE_IS_PEER ::= true|false|online|offline
- XML_NODE_JOIN_STATE ::= member|down|pending|banned
- XML_NODE_EXPECTED ::= member|down
*/
if (crm_is_true(terminate)) {
do_terminate = TRUE;
} else if (terminate != NULL && strlen(terminate) > 0) {
/* could be a time() value */
char t = terminate[0];
if (t != '0' && isdigit(t)) {
do_terminate = TRUE;
}
}
crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
crm_str(join), crm_str(exp_state), do_terminate);
online = crm_is_true(in_cluster);
if (safe_str_eq(is_peer, ONLINESTATUS)) {
is_peer = XML_BOOLEAN_YES;
}
if (exp_state == NULL) {
exp_state = CRMD_JOINSTATE_DOWN;
}
if (this_node->details->shutdown) {
crm_debug("%s is shutting down", this_node->details->uname);
online = crm_is_true(is_peer); /* Slightly different criteria since we cant shut down a dead peer */
} else if (in_cluster == NULL) {
pe_fence_node(data_set, this_node, "because the peer has not been seen by the cluster");
} else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
pe_fence_node(data_set, this_node, "because it failed the pacemaker membership criteria");
} else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
if (crm_is_true(in_cluster) || crm_is_true(is_peer)) {
crm_info("- Node %s is not ready to run resources", this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
crm_trace("%s is down or still coming up", this_node->details->uname);
}
} else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
&& crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) {
crm_info("Node %s was just shot", this_node->details->uname);
online = FALSE;
} else if (crm_is_true(in_cluster) == FALSE) {
pe_fence_node(data_set, this_node, "because the node is no longer part of the cluster");
} else if (crm_is_true(is_peer) == FALSE) {
pe_fence_node(data_set, this_node, "because our peer process is no longer available");
/* Everything is running at this point, now check join state */
} else if (do_terminate) {
pe_fence_node(data_set, this_node, "because termination was requested");
} else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
crm_info("Node %s is active", this_node->details->uname);
} else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
|| safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
crm_info("Node %s is not ready to run resources", this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
pe_fence_node(data_set, this_node, "because the peer was in an unknown state");
crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
}
return online;
}
static gboolean
determine_remote_online_status(node_t * this_node)
{
resource_t *rsc = this_node->details->remote_rsc;
resource_t *container = NULL;
if (rsc == NULL) {
this_node->details->online = FALSE;
goto remote_online_done;
}
container = rsc->container;
CRM_ASSERT(rsc != NULL);
/* If the resource is currently started, mark it online. */
if (rsc->role == RSC_ROLE_STARTED) {
crm_trace("Remote node %s is set to ONLINE. role == started", this_node->details->id);
this_node->details->online = TRUE;
}
/* consider this node shutting down if transitioning start->stop */
if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
crm_trace("Remote node %s shutdown. transition from start to stop role", this_node->details->id);
this_node->details->shutdown = TRUE;
}
/* Now check all the failure conditions. */
if (is_set(rsc->flags, pe_rsc_failed) ||
(rsc->role == RSC_ROLE_STOPPED) ||
(container && is_set(container->flags, pe_rsc_failed)) ||
(container && container->role == RSC_ROLE_STOPPED)) {
crm_trace("Remote node %s is set to OFFLINE. node is stopped or rsc failed.", this_node->details->id);
this_node->details->online = FALSE;
}
remote_online_done:
crm_trace("Remote node %s online=%s",
this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
return this_node->details->online;
}
gboolean
determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
{
gboolean online = FALSE;
const char *shutdown = NULL;
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
if (this_node == NULL) {
crm_config_err("No node to check");
return online;
}
this_node->details->shutdown = FALSE;
this_node->details->expected_up = FALSE;
shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN);
if (shutdown != NULL && safe_str_neq("0", shutdown)) {
this_node->details->shutdown = TRUE;
} else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
this_node->details->expected_up = TRUE;
}
if (this_node->details->type == node_ping) {
this_node->details->unclean = FALSE;
online = FALSE; /* As far as resource management is concerned,
* the node is safely offline.
* Anyone caught abusing this logic will be shot
*/
} else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
online = determine_online_status_no_fencing(data_set, node_state, this_node);
} else {
online = determine_online_status_fencing(data_set, node_state, this_node);
}
if (online) {
this_node->details->online = TRUE;
} else {
/* remove node from contention */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (online && this_node->details->shutdown) {
/* dont run resources here */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (this_node->details->type == node_ping) {
crm_info("Node %s is not a pacemaker node", this_node->details->uname);
} else if (this_node->details->unclean) {
pe_proc_warn("Node %s is unclean", this_node->details->uname);
} else if (this_node->details->online) {
crm_info("Node %s is %s", this_node->details->uname,
this_node->details->shutdown ? "shutting down" :
this_node->details->pending ? "pending" :
this_node->details->standby ? "standby" :
this_node->details->maintenance ? "maintenance" : "online");
} else {
crm_trace("Node %s is offline", this_node->details->uname);
}
return online;
}
char *
clone_strip(const char *last_rsc_id)
{
int lpc = 0;
char *zero = NULL;
CRM_CHECK(last_rsc_id != NULL, return NULL);
lpc = strlen(last_rsc_id);
while (--lpc > 0) {
switch (last_rsc_id[lpc]) {
case 0:
crm_err("Empty string: %s", last_rsc_id);
return NULL;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
zero = calloc(1, lpc + 1);
memcpy(zero, last_rsc_id, lpc);
zero[lpc] = 0;
return zero;
default:
goto done;
}
}
done:
zero = strdup(last_rsc_id);
return zero;
}
char *
clone_zero(const char *last_rsc_id)
{
int lpc = 0;
char *zero = NULL;
CRM_CHECK(last_rsc_id != NULL, return NULL);
if (last_rsc_id != NULL) {
lpc = strlen(last_rsc_id);
}
while (--lpc > 0) {
switch (last_rsc_id[lpc]) {
case 0:
return NULL;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
zero = calloc(1, lpc + 3);
memcpy(zero, last_rsc_id, lpc);
zero[lpc] = ':';
zero[lpc + 1] = '0';
zero[lpc + 2] = 0;
return zero;
default:
goto done;
}
}
done:
lpc = strlen(last_rsc_id);
zero = calloc(1, lpc + 3);
memcpy(zero, last_rsc_id, lpc);
zero[lpc] = ':';
zero[lpc + 1] = '0';
zero[lpc + 2] = 0;
crm_trace("%s -> %s", last_rsc_id, zero);
return zero;
}
static resource_t *
create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
resource_t *rsc = NULL;
xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
copy_in_properties(xml_rsc, rsc_entry);
crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
crm_log_xml_debug(xml_rsc, "Orphan resource");
if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
return NULL;
}
if (xml_contains_remote_node(xml_rsc)) {
node_t *node;
crm_debug("Detected orphaned remote node %s", rsc_id);
rsc->is_remote_node = TRUE;
node = pe_find_node(data_set->nodes, rsc_id);
if (node == NULL) {
node = create_node(rsc_id, rsc_id, "remote", NULL, data_set);
}
link_rsc2remotenode(data_set, rsc);
if (node) {
crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
node->details->shutdown = TRUE;
}
}
if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
/* This orphaned rsc needs to be mapped to a container. */
crm_trace("Detected orphaned container filler %s", rsc_id);
set_bit(rsc->flags, pe_rsc_orphan_container_filler);
}
set_bit(rsc->flags, pe_rsc_orphan);
data_set->resources = g_list_append(data_set->resources, rsc);
return rsc;
}
extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set);
static resource_t *
find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
const char *rsc_id)
{
GListPtr rIter = NULL;
resource_t *rsc = NULL;
gboolean skip_inactive = FALSE;
CRM_ASSERT(parent != NULL);
CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master);
CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
/* Find an instance active (or partially active for grouped clones) on the specified node */
pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
GListPtr nIter = NULL;
GListPtr locations = NULL;
resource_t *child = rIter->data;
child->fns->location(child, &locations, TRUE);
if (locations == NULL) {
pe_rsc_trace(child, "Resource %s, skip inactive", child->id);
continue;
}
for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) {
node_t *childnode = nIter->data;
if (childnode->details == node->details) {
/* ->find_rsc() because we might be a cloned group */
rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
if(rsc) {
pe_rsc_trace(rsc, "Resource %s, active", rsc->id);
}
}
/* Keep this block, it means we'll do the right thing if
* anyone toggles the unique flag to 'off'
*/
if (rsc && rsc->running_on) {
crm_notice("/Anonymous/ clone %s is already running on %s",
parent->id, node->details->uname);
skip_inactive = TRUE;
rsc = NULL;
}
}
g_list_free(locations);
}
/* Find an inactive instance */
if (skip_inactive == FALSE) {
pe_rsc_trace(parent, "Looking for %s anywhere", rsc_id);
for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
GListPtr locations = NULL;
resource_t *child = rIter->data;
if (is_set(child->flags, pe_rsc_block)) {
pe_rsc_trace(child, "Skip: blocked in stopped state");
continue;
}
child->fns->location(child, &locations, TRUE);
if (locations == NULL) {
/* ->find_rsc() because we might be a cloned group */
rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
pe_rsc_trace(parent, "Resource %s, empty slot", rsc->id);
}
g_list_free(locations);
}
}
if (rsc == NULL) {
/* Create an extra orphan */
resource_t *top = create_child_clone(parent, -1, data_set);
/* ->find_rsc() because we might be a cloned group */
rsc = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
CRM_ASSERT(rsc != NULL);
pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id,
node->details->uname);
}
if (safe_str_neq(rsc_id, rsc->id)) {
pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
rsc_id, node->details->uname, rsc->id,
is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : "");
}
return rsc;
}
static resource_t *
unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
xmlNode * rsc_entry)
{
resource_t *rsc = NULL;
resource_t *parent = NULL;
crm_trace("looking for %s", rsc_id);
rsc = pe_find_resource(data_set->resources, rsc_id);
/* no match */
if (rsc == NULL) {
/* Even when clone-max=0, we still create a single :0 orphan to match against */
char *tmp = clone_zero(rsc_id);
resource_t *clone0 = pe_find_resource(data_set->resources, tmp);
if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
rsc = clone0;
} else {
crm_trace("%s is not known as %s either", rsc_id, tmp);
}
parent = uber_parent(clone0);
free(tmp);
crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan");
} else if (rsc->variant > pe_native) {
crm_trace("%s is no longer a primitve resource, the lrm_resource entry is obsolete",
rsc_id);
return NULL;
} else {
parent = uber_parent(rsc);
}
if (parent && parent->variant > pe_group) {
if (is_not_set(parent->flags, pe_rsc_unique)) {
char *base = clone_strip(rsc_id);
rsc = find_anonymous_clone(data_set, node, parent, base);
CRM_ASSERT(rsc != NULL);
free(base);
}
if (rsc && safe_str_neq(rsc_id, rsc->id)) {
free(rsc->clone_name);
rsc->clone_name = strdup(rsc_id);
}
}
return rsc;
}
static resource_t *
process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
{
resource_t *rsc = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
clear_bit(rsc->flags, pe_rsc_managed);
} else {
GListPtr gIter = NULL;
print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE);
CRM_CHECK(rsc != NULL, return NULL);
resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node->details->online && get_failcount(node, rsc, NULL, data_set)) {
action_t *clear_op = NULL;
action_t *ready = NULL;
if (is_remote_node(node)) {
char *pseudo_op_name = crm_concat(CRM_OP_PROBED, node->details->id, '_');
ready = get_pseudo_op(pseudo_op_name, data_set);
free(pseudo_op_name);
} else {
ready = get_pseudo_op(CRM_OP_PROBED, data_set);
}
clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'),
CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
pe_rsc_info(rsc, "Clearing failcount (%d) for orphaned resource %s on %s (%s)",
get_failcount(node, rsc, NULL, data_set), rsc->id, node->details->uname,
clear_op->uuid);
order_actions(clear_op, ready, pe_order_optional);
}
}
}
return rsc;
}
static void
process_rsc_state(resource_t * rsc, node_t * node,
enum action_fail_response on_fail,
xmlNode * migrate_op, pe_working_set_t * data_set)
{
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
/* process current state */
if (rsc->role != RSC_ROLE_UNKNOWN) {
resource_t *iter = rsc;
while (iter) {
if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
node_t *n = node_copy(node);
pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
n->details->uname);
g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
}
if (is_set(iter->flags, pe_rsc_unique)) {
break;
}
iter = iter->parent;
}
}
if (rsc->role > RSC_ROLE_STOPPED
&& node->details->online == FALSE && is_set(rsc->flags, pe_rsc_managed)) {
gboolean should_fence = FALSE;
/* if this is a remote_node living in a container, fence the container
* by recovering it. Mark the resource as unmanaged. Once the container
* and remote connenction are re-established, the status section will
* get reset in the crmd freeing up this resource to run again once we
* are sure we know the resources state. */
if (is_container_remote_node(node)) {
set_bit(rsc->flags, pe_rsc_failed);
should_fence = TRUE;
} else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
should_fence = TRUE;
}
if (should_fence) {
char *reason = g_strdup_printf("because %s is thought to be active there", rsc->id);
pe_fence_node(data_set, node, reason);
g_free(reason);
}
}
if (node->details->unclean) {
/* No extra processing needed
* Also allows resources to be started again after a node is shot
*/
on_fail = action_fail_ignore;
}
switch (on_fail) {
case action_fail_ignore:
/* nothing to do */
break;
case action_fail_fence:
/* treat it as if it is still running
* but also mark the node as unclean
*/
pe_fence_node(data_set, node, "because of resource failure(s)");
break;
case action_fail_standby:
node->details->standby = TRUE;
node->details->standby_onfail = TRUE;
break;
case action_fail_block:
/* is_managed == FALSE will prevent any
* actions being sent for the resource
*/
clear_bit(rsc->flags, pe_rsc_managed);
set_bit(rsc->flags, pe_rsc_block);
break;
case action_fail_migrate:
/* make sure it comes up somewhere else
* or not at all
*/
resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
break;
case action_fail_stop:
rsc->next_role = RSC_ROLE_STOPPED;
break;
case action_fail_recover:
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
set_bit(rsc->flags, pe_rsc_failed);
stop_action(rsc, node, FALSE);
}
break;
case action_fail_restart_container:
set_bit(rsc->flags, pe_rsc_failed);
if (rsc->container) {
stop_action(rsc->container, node, FALSE);
} else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
stop_action(rsc, node, FALSE);
}
break;
}
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
if (is_set(rsc->flags, pe_rsc_orphan)) {
if (is_set(rsc->flags, pe_rsc_managed)) {
crm_config_warn("Detected active orphan %s running on %s",
rsc->id, node->details->uname);
} else {
crm_config_warn("Cluster configured not to stop active orphans."
" %s must be stopped manually on %s",
rsc->id, node->details->uname);
}
}
native_add_running(rsc, node, data_set);
if (on_fail != action_fail_ignore) {
set_bit(rsc->flags, pe_rsc_failed);
}
} else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
/* Only do this for older status sections that included instance numbers
* Otherwise stopped instances will appear as orphans
*/
pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
free(rsc->clone_name);
rsc->clone_name = NULL;
} else {
char *key = stop_key(rsc);
GListPtr possible_matches = find_actions(rsc->actions, key, node);
GListPtr gIter = possible_matches;
for (; gIter != NULL; gIter = gIter->next) {
action_t *stop = (action_t *) gIter->data;
stop->flags |= pe_action_optional;
}
g_list_free(possible_matches);
free(key);
}
}
/* create active recurring operations as optional */
static void
process_recurring(node_t * node, resource_t * rsc,
int start_index, int stop_index,
GListPtr sorted_op_list, pe_working_set_t * data_set)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
int interval = 0;
char *key = NULL;
const char *id = ID(rsc_op);
const char *interval_s = NULL;
counter++;
if (node->details->online == FALSE) {
pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
break;
/* Need to check if there's a monitor for role="Stopped" */
} else if (start_index < stop_index && counter <= stop_index) {
pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
continue;
} else if (counter < start_index) {
pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
continue;
}
interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
interval = crm_parse_int(interval_s, "0");
if (interval == 0) {
pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
continue;
}
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (safe_str_eq(status, "-1")) {
pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
/* create the action */
key = generate_op_key(rsc->id, task, interval);
pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
}
}
void
calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
{
int counter = -1;
int implied_monitor_start = -1;
int implied_master_start = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
*stop_index = -1;
*start_index = -1;
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (safe_str_eq(task, CRMD_ACTION_STOP)
&& safe_str_eq(status, "0")) {
*stop_index = counter;
} else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
*start_index = counter;
} else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
implied_monitor_start = counter;
}
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
implied_master_start = counter;
}
}
if (*start_index == -1) {
if (implied_master_start != -1) {
*start_index = implied_master_start;
} else if (implied_monitor_start != -1) {
*start_index = implied_monitor_start;
}
}
}
static resource_t *
unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
int stop_index = -1;
int start_index = -1;
enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
const char *task = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
resource_t *rsc = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
xmlNode *migrate_op = NULL;
xmlNode *rsc_op = NULL;
enum action_fail_response on_fail = FALSE;
enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
crm_trace("[%s] Processing %s on %s",
crm_element_name(rsc_entry), rsc_id, node->details->uname);
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
/* find the resource */
rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
if (rsc == NULL) {
rsc = process_orphan_resource(rsc_entry, node, data_set);
}
CRM_ASSERT(rsc != NULL);
/* process operations */
saved_role = rsc->role;
on_fail = action_fail_ignore;
rsc->role = RSC_ROLE_UNKNOWN;
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
migrate_op = rsc_op;
}
unpack_rsc_op(rsc, node, rsc_op, &on_fail, data_set);
}
/* create active recurring operations as optional */
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
/* no need to free the contents */
g_list_free(sorted_op_list);
process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
if (get_target_role(rsc, &req_role)) {
if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
rsc->next_role = req_role;
} else if (req_role > rsc->next_role) {
pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
}
}
if (saved_role > rsc->role) {
rsc->role = saved_role;
}
return rsc;
}
static void
handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
{
xmlNode *rsc_entry = NULL;
for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
rsc_entry = __xml_next(rsc_entry)) {
resource_t *rsc;
resource_t *container;
const char *rsc_id;
const char *container_id;
if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
continue;
}
container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
if (container_id == NULL || rsc_id == NULL) {
continue;
}
container = pe_find_resource(data_set->resources, container_id);
if (container == NULL) {
continue;
}
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL ||
is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
rsc->container != NULL) {
continue;
}
pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to %s", rsc->id, container_id);
rsc->container = container;
container->fillers = g_list_append(container->fillers, rsc);
}
}
gboolean
unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
{
xmlNode *rsc_entry = NULL;
gboolean found_orphaned_container_filler = FALSE;
GListPtr unexpected_containers = NULL;
GListPtr gIter = NULL;
resource_t *remote = NULL;
CRM_CHECK(node != NULL, return FALSE);
crm_trace("Unpacking resources on %s", node->details->uname);
for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
rsc_entry = __xml_next(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
resource_t *rsc;
rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
if (!rsc) {
continue;
}
if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
found_orphaned_container_filler = TRUE;
}
if (is_set(rsc->flags, pe_rsc_unexpectedly_running)) {
remote = rsc_contains_remote_node(data_set, rsc);
if (remote) {
unexpected_containers = g_list_append(unexpected_containers, remote);
}
}
}
}
/* If a container resource is unexpectedly up... and the remote-node
* connection resource for that container is not up, the entire container
* must be recovered. */
for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) {
remote = (resource_t *) gIter->data;
if (remote->role != RSC_ROLE_STARTED) {
crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.");
set_bit(remote->container->flags, pe_rsc_failed);
}
}
/* now that all the resource state has been unpacked for this node
* we have to go back and map any orphaned container fillers to their
* container resource */
if (found_orphaned_container_filler) {
handle_orphaned_container_fillers(lrm_rsc_list, data_set);
}
g_list_free(unexpected_containers);
return TRUE;
}
static void
set_active(resource_t * rsc)
{
resource_t *top = uber_parent(rsc);
if (top && top->variant == pe_master) {
rsc->role = RSC_ROLE_SLAVE;
} else {
rsc->role = RSC_ROLE_STARTED;
}
}
static void
set_node_score(gpointer key, gpointer value, gpointer user_data)
{
node_t *node = value;
int *score = user_data;
node->weight = *score;
}
#define STATUS_PATH_MAX 1024
static xmlNode *
find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
pe_working_set_t * data_set)
{
int offset = 0;
char xpath[STATUS_PATH_MAX];
offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
resource);
/* Need to check against transition_magic too? */
if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
source);
} else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
source);
} else {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
}
CRM_LOG_ASSERT(offset > 0);
return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
}
static void
unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
{
/*
* The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src)
*
* So if a migrate_to is followed by a stop, then we dont need to care what
* happended on the target node
*
* Without the stop, we need to look for a successful migrate_from.
* This would also imply we're no longer running on the source
*
* Without the stop, and without a migrate_from op we make sure the resource
* gets stopped on both source and target (assuming the target is up)
*
*/
int stop_id = 0;
int task_id = 0;
xmlNode *stop_op =
find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set);
if (stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
if (stop_op == NULL || stop_id < task_id) {
int from_rc = 0, from_status = 0;
const char *migrate_source =
crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target =
crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
node_t *target = pe_find_node(data_set->nodes, migrate_target);
node_t *source = pe_find_node(data_set->nodes, migrate_source);
xmlNode *migrate_from =
find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
data_set);
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if (migrate_from) {
crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
ID(migrate_from), migrate_target, from_status, from_rc);
}
if (migrate_from && from_rc == PCMK_OCF_OK
&& from_status == PCMK_LRM_OP_DONE) {
pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
migrate_source);
/* all good
* just need to arrange for the stop action to get sent
* but _without_ affecting the target somehow
*/
rsc->role = RSC_ROLE_STOPPED;
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
} else if (migrate_from) { /* Failed */
if (target && target->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
target->details->online);
native_add_running(rsc, target, data_set);
}
} else { /* Pending or complete but erased */
if (target && target->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
target->details->online);
native_add_running(rsc, target, data_set);
if (source && source->details->online) {
/* If we make it here we have a partial migration. The migrate_to
* has completed but the migrate_from on the target has not. Hold on
* to the target and source on the resource. Later on if we detect that
* the resource is still going to run on that target, we may continue
* the migration */
rsc->partial_migration_target = target;
rsc->partial_migration_source = source;
}
} else {
/* Consider it failed here - forces a restart, prevents migration */
set_bit(rsc->flags, pe_rsc_failed);
clear_bit(rsc->flags, pe_rsc_allow_migrate);
}
}
}
}
static void
unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
{
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_ASSERT(rsc);
if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
int stop_id = 0;
int migrate_id = 0;
const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
xmlNode *stop_op =
find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set);
xmlNode *migrate_op =
find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target,
data_set);
if (stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
if (migrate_op) {
crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
}
/* Get our state right */
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if (stop_op == NULL || stop_id < migrate_id) {
node_t *source = pe_find_node(data_set->nodes, migrate_source);
if (source && source->details->online) {
native_add_running(rsc, source, data_set);
}
}
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
int stop_id = 0;
int migrate_id = 0;
const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
xmlNode *stop_op =
find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set);
xmlNode *migrate_op =
find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
data_set);
if (stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
if (migrate_op) {
crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
}
/* Get our state right */
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if (stop_op == NULL || stop_id < migrate_id) {
node_t *target = pe_find_node(data_set->nodes, migrate_target);
pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op,
migrate_id);
if (target && target->details->online) {
native_add_running(rsc, target, data_set);
}
} else if (migrate_op == NULL) {
/* Make sure it gets cleaned up, the stop may pre-date the migrate_from */
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
}
}
}
static const char *get_op_key(xmlNode *xml_op)
{
const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
if(key == NULL) {
key = ID(xml_op);
}
return key;
}
static void
unpack_rsc_op_failure(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, enum action_fail_response *on_fail, pe_working_set_t * data_set)
{
int interval = 0;
bool is_probe = FALSE;
action_t *action = NULL;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
CRM_ASSERT(rsc);
crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
is_probe = TRUE;
pe_rsc_trace(rsc, "is a probe: %s", key);
}
if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_warn("Processing failed op %s for %s on %s: %s (%d)",
task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
rc);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
add_node_copy(data_set->failed, xml_op);
}
} else {
crm_trace("Processing failed op %s for %s on %s: %s (%d)",
task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
rc);
}
action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
(action->on_fail == action_fail_restart_container
&& *on_fail <= action_fail_recover) || (*on_fail == action_fail_restart_container
&& action->on_fail >=
action_fail_migrate)) {
pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
fail2text(action->on_fail), action->uuid, key);
*on_fail = action->on_fail;
}
if (safe_str_eq(task, CRMD_ACTION_STOP)) {
resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
unpack_rsc_migration_failure(rsc, node, xml_op, data_set);
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
/*
* staying in role=master ends up putting the PE/TE into a loop
* setting role=slave is not dangerous because no master will be
* promoted until the failed resource has been fully stopped
*/
rsc->next_role = RSC_ROLE_STOPPED;
if (action->on_fail == action_fail_block) {
rsc->role = RSC_ROLE_MASTER;
} else {
crm_warn("Forcing %s to stop after a failed demote action", rsc->id);
rsc->role = RSC_ROLE_SLAVE;
}
} else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) {
crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname);
resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set);
}
if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
/* leave stopped */
pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
rsc->role = RSC_ROLE_STOPPED;
} else if (rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(rsc, "Setting %s active", rsc->id);
set_active(rsc);
}
pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
rsc->id, role2text(rsc->role),
node->details->unclean ? "true" : "false",
fail2text(action->on_fail), role2text(action->fail_role));
if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
rsc->next_role = action->fail_role;
}
if (action->fail_role == RSC_ROLE_STOPPED) {
int score = -INFINITY;
resource_t *fail_rsc = rsc;
if (fail_rsc->parent) {
resource_t *parent = uber_parent(fail_rsc);
if ((parent->variant == pe_clone || parent->variant == pe_master)
&& is_not_set(parent->flags, pe_rsc_unique)) {
/* for clone and master resources, if a child fails on an operation
* with on-fail = stop, all the resources fail. Do this by preventing
* the parent from coming up again. */
fail_rsc = parent;
}
}
crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
/* make sure it doesnt come up again */
g_hash_table_destroy(fail_rsc->allowed_nodes);
fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
}
pe_free_action(action);
}
static int
determine_op_status(
resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
int interval = 0;
int result = PCMK_LRM_OP_DONE;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
bool is_probe = FALSE;
CRM_ASSERT(rsc);
crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
is_probe = TRUE;
}
if (target_rc >= 0 && target_rc != rc) {
result = PCMK_LRM_OP_ERROR;
pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
key, node->details->uname,
services_ocf_exitcode_str(rc), rc,
services_ocf_exitcode_str(target_rc), target_rc);
}
/* we could clean this up significantly except for old LRMs and CRMs that
* didnt include target_rc and liked to remap status
*/
switch (rc) {
case PCMK_OCF_OK:
if (is_probe && target_rc == 7) {
result = PCMK_LRM_OP_DONE;
set_bit(rsc->flags, pe_rsc_unexpectedly_running);
pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
task, rsc->id, node->details->uname);
/* legacy code for pre-0.6.5 operations */
} else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) {
/* catch status ops that return 0 instead of 8 while they
* are supposed to be in master mode
*/
result = PCMK_LRM_OP_ERROR;
}
break;
case PCMK_OCF_NOT_RUNNING:
if (is_probe || target_rc == rc) {
result = PCMK_LRM_OP_DONE;
rsc->role = RSC_ROLE_STOPPED;
/* clear any previous failure actions */
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
} else if (safe_str_neq(task, CRMD_ACTION_STOP)) {
result = PCMK_LRM_OP_ERROR;
}
break;
case PCMK_OCF_RUNNING_MASTER:
if (is_probe) {
result = PCMK_LRM_OP_DONE;
pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
task, rsc->id, node->details->uname);
} else if (target_rc == rc) {
/* nothing to do */
} else if (target_rc >= 0) {
result = PCMK_LRM_OP_ERROR;
/* legacy code for pre-0.6.5 operations */
} else if (safe_str_neq(task, CRMD_ACTION_STATUS)
|| rsc->role != RSC_ROLE_MASTER) {
result = PCMK_LRM_OP_ERROR;
if (rsc->role != RSC_ROLE_MASTER) {
crm_err("%s reported %s in master mode on %s",
key, rsc->id, node->details->uname);
}
}
rsc->role = RSC_ROLE_MASTER;
break;
case PCMK_OCF_FAILED_MASTER:
rsc->role = RSC_ROLE_MASTER;
result = PCMK_LRM_OP_ERROR;
break;
case PCMK_OCF_NOT_CONFIGURED:
result = PCMK_LRM_OP_ERROR_FATAL;
break;
case PCMK_OCF_NOT_INSTALLED:
case PCMK_OCF_INVALID_PARAM:
case PCMK_OCF_INSUFFICIENT_PRIV:
case PCMK_OCF_UNIMPLEMENT_FEATURE:
if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) {
result = PCMK_LRM_OP_NOTSUPPORTED;
break;
} else if(pe_can_fence(data_set, node) == FALSE
&& safe_str_eq(task, CRMD_ACTION_STOP)) {
/* If a stop fails and we can't fence, there's nothing else we can do */
pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
rsc->id, task, services_ocf_exitcode_str(rc), rc);
clear_bit(rsc->flags, pe_rsc_managed);
set_bit(rsc->flags, pe_rsc_block);
}
result = PCMK_LRM_OP_ERROR_HARD;
break;
default:
if (result == PCMK_LRM_OP_DONE) {
crm_info("Treating %s (rc=%d) on %s as an ERROR",
key, rc, node->details->uname);
result = PCMK_LRM_OP_ERROR;
}
}
return result;
}
static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
{
bool expired = FALSE;
time_t last_failure = 0;
int clear_failcount = 0;
int interval = 0;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
if (rsc->failure_timeout > 0) {
int last_run = 0;
if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
time_t now = get_effective_time(data_set);
if (now > (last_run + rsc->failure_timeout)) {
expired = TRUE;
}
}
}
if (expired) {
if (rsc->failure_timeout > 0) {
int fc = get_failcount_full(node, rsc, &last_failure, FALSE, xml_op, data_set);
if(fc) {
if (get_failcount_full(node, rsc, &last_failure, TRUE, xml_op, data_set) == 0) {
clear_failcount = 1;
crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname);
} else {
expired = FALSE;
}
}
}
} else if (strstr(ID(xml_op), "last_failure") &&
((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
op_digest_cache_t *digest_data = NULL;
digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
if (digest_data->rc == RSC_DIGEST_UNKNOWN) {
crm_trace("rsc op %s on node %s does not have a op digest to compare against", rsc->id,
key, node->details->id);
} else if (digest_data->rc != RSC_DIGEST_MATCH) {
clear_failcount = 1;
crm_info
("Clearing failcount for %s on %s, %s failed and now resource parameters have changed.",
task, rsc->id, node->details->uname);
}
}
if (clear_failcount) {
action_t *clear_op = NULL;
clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'),
CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
}
crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
switch(rc) {
case PCMK_OCF_OK:
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_MASTER:
/* Don't expire probes that return these values */
expired = FALSE;
break;
}
}
return expired;
}
int get_target_rc(xmlNode *xml_op)
{
int dummy = 0;
int target_rc = 0;
char *dummy_string = NULL;
const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
if (key == NULL) {
return -1;
}
decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc);
free(dummy_string);
return target_rc;
}
static enum action_fail_response
get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
{
int result = action_fail_recover;
action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
result = action->on_fail;
pe_free_action(action);
return result;
}
static void
update_resource_state(resource_t *rsc, node_t * node, xmlNode * xml_op, const char *task, int rc,
enum action_fail_response *on_fail, pe_working_set_t * data_set)
{
gboolean clear_past_failure = FALSE;
CRM_ASSERT(rsc);
if (rc == PCMK_OCF_NOT_RUNNING) {
clear_past_failure = TRUE;
} else if (rc == PCMK_OCF_NOT_INSTALLED) {
rsc->role = RSC_ROLE_STOPPED;
} else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
clear_past_failure = TRUE;
if (rsc->role < RSC_ROLE_STARTED) {
set_active(rsc);
}
} else if (safe_str_eq(task, CRMD_ACTION_START)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
rsc->role = RSC_ROLE_STOPPED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
/* Demote from Master does not clear an error */
rsc->role = RSC_ROLE_SLAVE;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
unpack_rsc_migration(rsc, node, xml_op, data_set);
} else if (rsc->role < RSC_ROLE_STARTED) {
/* migrate_to and migrate_from will land here */
pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
set_active(rsc);
}
/* clear any previous failure actions */
if (clear_past_failure) {
switch (*on_fail) {
case action_fail_stop:
case action_fail_fence:
case action_fail_migrate:
case action_fail_standby:
pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
rsc->id, fail2text(*on_fail));
break;
case action_fail_block:
case action_fail_ignore:
case action_fail_recover:
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
break;
case action_fail_restart_container:
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
}
}
}
gboolean
unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op,
enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
int task_id = 0;
const char *key = NULL;
const char *task = NULL;
const char *task_key = NULL;
int rc = 0;
int status = PCMK_LRM_OP_PENDING-1;
int target_rc = get_target_rc(xml_op);
int interval = 0;
gboolean expired = FALSE;
resource_t *parent = rsc;
enum action_fail_response failure_strategy = action_fail_recover;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(node != NULL, return FALSE);
CRM_CHECK(xml_op != NULL, return FALSE);
task_key = get_op_key(xml_op);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
CRM_CHECK(task != NULL, return FALSE);
CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
/* safe to ignore these */
return TRUE;
}
if (is_not_set(rsc->flags, pe_rsc_unique)) {
parent = uber_parent(rsc);
}
pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
if (node->details->unclean) {
pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
" Further action depends on the value of the stop's on-fail attribue",
node->details->uname, rsc->id);
}
if (status == PCMK_LRM_OP_ERROR) {
/* Older versions set this if rc != 0 but its up to us to decide */
status = PCMK_LRM_OP_DONE;
}
if(status != PCMK_LRM_OP_NOT_INSTALLED) {
expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
}
if (expired && target_rc != rc) {
const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
key, node->details->uname,
services_ocf_exitcode_str(rc), rc,
services_ocf_exitcode_str(target_rc), target_rc);
if(interval == 0) {
crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
task_key, rc, magic, node->details->uname);
goto done;
} else if(node->details->online && node->details->unclean == FALSE) {
crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
task_key, rc, magic, node->details->uname);
/* This is SO horrible, but we don't have access to CancelXmlOp() yet */
crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
goto done;
}
}
if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
}
pe_rsc_trace(rsc, "Handling status: %d", status);
switch (status) {
case PCMK_LRM_OP_CANCELLED:
/* do nothing?? */
pe_err("Dont know what to do for cancelled ops yet");
break;
case PCMK_LRM_OP_PENDING:
if (safe_str_eq(task, CRMD_ACTION_START)) {
set_bit(rsc->flags, pe_rsc_start_pending);
set_active(rsc);
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
/* If a pending migrate_to action is out on a unclean node,
* we have to force the stop action on the target. */
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
node_t *target = pe_find_node(data_set->nodes, migrate_target);
if (target) {
stop_action(rsc, target, FALSE);
}
}
if (rsc->pending_task == NULL) {
if (safe_str_eq(task, CRMD_ACTION_STATUS) && interval == 0) {
/* Comment this out until someone requests it */
/* Comment this out until cl#5184 is fixed */
/*rsc->pending_task = strdup("probe");*/
} else {
rsc->pending_task = strdup(task);
}
}
break;
case PCMK_LRM_OP_DONE:
pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
update_resource_state(rsc, node, xml_op, task, rc, on_fail, data_set);
break;
case PCMK_LRM_OP_NOT_INSTALLED:
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if (failure_strategy == action_fail_ignore) {
crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
"Resource agent doesn't exist",
task_key, status, rc, node->details->uname);
/* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
*on_fail = action_fail_migrate;
}
resource_location(parent, node, -INFINITY, "hard-error", data_set);
unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set);
break;
case PCMK_LRM_OP_ERROR:
case PCMK_LRM_OP_ERROR_HARD:
case PCMK_LRM_OP_ERROR_FATAL:
case PCMK_LRM_OP_TIMEOUT:
case PCMK_LRM_OP_NOTSUPPORTED:
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if ((failure_strategy == action_fail_ignore)
|| (failure_strategy == action_fail_restart_container
&& safe_str_eq(task, CRMD_ACTION_STOP))) {
crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
task_key, rc, node->details->uname);
update_resource_state(rsc, node, xml_op, task, target_rc, on_fail, data_set);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
set_bit(rsc->flags, pe_rsc_failure_ignored);
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
add_node_copy(data_set->failed, xml_op);
}
if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
*on_fail = failure_strategy;
}
} else {
unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set);
if(status == PCMK_LRM_OP_ERROR_HARD) {
do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
"Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
parent->id, node->details->uname,
task, services_ocf_exitcode_str(rc), rc);
resource_location(parent, node, -INFINITY, "hard-error", data_set);
} else if(status == PCMK_LRM_OP_ERROR_FATAL) {
crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
parent->id, task, services_ocf_exitcode_str(rc), rc);
resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
}
}
break;
}
done:
pe_rsc_trace(rsc, "Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role));
return TRUE;
}
gboolean
add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set)
{
const char *cluster = NULL;
g_hash_table_insert(node->details->attrs,
strdup("#uname"), strdup(node->details->uname));
g_hash_table_insert(node->details->attrs,
strdup("#kind"), strdup(node->details->remote_rsc?"container":"cluster"));
g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id));
if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
data_set->dc_node = node;
node->details->is_dc = TRUE;
g_hash_table_insert(node->details->attrs,
strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_TRUE));
} else {
g_hash_table_insert(node->details->attrs,
strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_FALSE));
}
cluster = g_hash_table_lookup(data_set->config_hash, "cluster");
if (cluster) {
g_hash_table_insert(node->details->attrs, strdup("#cluster"), strdup(cluster));
}
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
node->details->attrs, NULL, overwrite, data_set->now);
return TRUE;
}
static GListPtr
extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
{
int counter = -1;
int stop_index = -1;
int start_index = -1;
xmlNode *rsc_op = NULL;
GListPtr gIter = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
crm_xml_add(rsc_op, "resource", rsc);
crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
/* create active recurring operations as optional */
if (active_filter == FALSE) {
return sorted_op_list;
}
op_list = NULL;
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
if (start_index < stop_index) {
crm_trace("Skipping %s: not active", ID(rsc_entry));
break;
} else if (counter < start_index) {
crm_trace("Skipping %s: old", ID(rsc_op));
continue;
}
op_list = g_list_append(op_list, rsc_op);
}
g_list_free(sorted_op_list);
return op_list;
}
GListPtr
find_operations(const char *rsc, const char *node, gboolean active_filter,
pe_working_set_t * data_set)
{
GListPtr output = NULL;
GListPtr intermediate = NULL;
xmlNode *tmp = NULL;
xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
node_t *this_node = NULL;
xmlNode *node_state = NULL;
for (node_state = __xml_first_child(status); node_state != NULL;
node_state = __xml_next(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
if (node != NULL && safe_str_neq(uname, node)) {
continue;
}
this_node = pe_find_node(data_set->nodes, uname);
if (is_remote_node(this_node)) {
determine_remote_online_status(this_node);
} else {
determine_online_status(node_state, this_node, data_set);
}
if(this_node == NULL) {
CRM_LOG_ASSERT(this_node != NULL);
} else if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
/* offline nodes run no resources...
* unless stonith is enabled in which case we need to
* make sure rsc start events happen after the stonith
*/
xmlNode *lrm_rsc = NULL;
tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL;
lrm_rsc = __xml_next(lrm_rsc)) {
if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
continue;
}
intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
output = g_list_concat(output, intermediate);
}
}
}
}
}
return output;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Dec 23, 12:25 PM (1 d, 8 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1128336
Default Alt Text
(115 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment