diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index df9a133f44..8caf522ce9 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,3490 +1,3492 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit(data_set->flags, flag); \ } else { \ clear_bit(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response *failed, pe_working_set_t * data_set); static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node); static gboolean is_dangling_container_remote_node(node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (is_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) { return TRUE; } return FALSE; } void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (is_container_remote_node(node)) { resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { if (!is_set(rsc->flags, pe_rsc_managed)) { crm_notice("Not fencing guest node %s because the container is " "unmanaged, otherwise we would do so recovering %s " "%s", node->details->uname, rsc->id, reason); } else { crm_warn("Guest node %s will be fenced (by recovering %s) %s", node->details->uname, rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; set_bit(rsc->flags, pe_rsc_failed); } } } else if (is_dangling_container_remote_node(node)) { crm_info("Cleaning up dangling connection resource for guest node %s %s" " (fencing is already done, guest resource no longer exists)", node->details->uname, reason); set_bit(node->details->remote_rsc->flags, pe_rsc_failed); } else if (is_baremetal_remote_node(node)) { resource_t *rsc = node->details->remote_rsc; if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) { crm_notice("Not fencing node %s because connection is unmanaged, " "otherwise would %s", node->details->uname, reason); - } else { + } else if(node->details->remote_requires_reset == FALSE) { + node->details->remote_requires_reset = TRUE; if (pe_can_fence(data_set, node)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } - node->details->remote_requires_reset = TRUE; } node->details->unclean = TRUE; + } else if (node->details->unclean == FALSE) { if (pe_can_fence(data_set, node)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } node->details->unclean = TRUE; + } else { - crm_trace("Huh? %s %s", node->details->uname, reason); + crm_trace("Node %s would also be fenced '%s'", node->details->uname, reason); } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); xmlXPathObjectPtr xpathObj = NULL; if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//nvpair[@name='provides' and @value='unfencing']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//nvpair[@name='requires' and @value='unfencing']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } #ifdef REDHAT_COMPAT_6 if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//primitive[@type='fence_scsi']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } #endif data_set->config_hash = config_hash; unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if(is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_notice("Watchdog will be used via SBD if fencing is required"); set_bit(data_set->flags, pe_flag_have_stonith_resource); } value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); crm_debug("Concurrent fencing is %s", is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled"); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if (safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if (safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err ("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if (do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); if (is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_trace("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); return TRUE; } static void destroy_digest_cache(gpointer ptr) { op_digest_cache_t *data = ptr; free_xml(data->params_all); free_xml(data->params_secure); free_xml(data->params_restart); free(data->digest_all_calc); free(data->digest_restart_calc); free(data->digest_secure_calc); free(data); } node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } new_node = calloc(1, sizeof(node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->type = node_ping; if (safe_str_eq(type, "remote")) { new_node->details->type = node_remote; set_bit(data_set->flags, pe_flag_have_remote_nodes); } else if (type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (is_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("cluster")); } new_node->details->utilization = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_digest_cache); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname); return new_node; } bool remote_id_conflict(const char *remote_name, pe_working_set_t *data) { bool match = FALSE; #if 1 pe_find_resource(data->resources, remote_name); #else if (data->name_check == NULL) { data->name_check = g_hash_table_new(crm_str_hash, g_str_equal); for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) { const char *id = ID(xml_rsc); /* avoiding heap allocation here because we know the duration of this hashtable allows us to */ g_hash_table_insert(data->name_check, (char *) id, (char *) id); } } if (g_hash_table_lookup(data->name_check, remote_name)) { match = TRUE; } #endif if (match) { crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name); return NULL; } return match; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data) { xmlNode *xml_rsc = NULL; xmlNode *xml_tmp = NULL; xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *container_managed = NULL; for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) { if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) { continue; } for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) { remote_name = value; } else if (safe_str_eq(name, "remote-addr")) { remote_server = value; } else if (safe_str_eq(name, "remote-port")) { remote_port = value; } else if (safe_str_eq(name, "remote-connect-timeout")) { connect_timeout = value; } else if (safe_str_eq(name, "remote-allow-migrate")) { remote_allow_migrate=value; } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) { container_managed = value; } } } if (remote_name == NULL) { return NULL; } if (remote_id_conflict(remote_name, data)) { return NULL; } xml_rsc = create_xml_node(parent, XML_CIB_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, remote_name); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, "pacemaker"); crm_xml_add(xml_rsc, XML_ATTR_TYPE, "remote"); xml_tmp = create_xml_node(xml_rsc, XML_TAG_META_SETS); crm_xml_set_id(xml_tmp, "%s_%s", remote_name, XML_TAG_META_SETS); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); crm_xml_set_id(attr, "%s_%s", remote_name, "meta-attributes-container"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_CONTAINER); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, container_id); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); crm_xml_set_id(attr, "%s_%s", remote_name, "meta-attributes-internal"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_INTERNAL_RSC); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, "true"); if (remote_allow_migrate) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); crm_xml_set_id(attr, "%s_%s", remote_name, "meta-attributes-migrate"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_ALLOW_MIGRATE); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_allow_migrate); } if (container_managed) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); crm_xml_set_id(attr, "%s_%s", remote_name, "meta-attributes-managed"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_MANAGED); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, container_managed); } xml_tmp = create_xml_node(xml_rsc, "operations"); attr = create_xml_node(xml_tmp, XML_ATTR_OP); crm_xml_set_id(attr, "%s_%s", remote_name, "monitor-interval-30s"); crm_xml_add(attr, XML_ATTR_TIMEOUT, "30s"); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "30s"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor"); if (connect_timeout) { attr = create_xml_node(xml_tmp, XML_ATTR_OP); crm_xml_set_id(attr, "%s_%s", remote_name, "start-interval-0"); crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start"); } if (remote_port || remote_server) { xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_tmp, "%s_%s", remote_name, XML_TAG_ATTR_SETS); if (remote_server) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); crm_xml_set_id(attr, "%s_%s", remote_name, "instance-attributes-addr"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "addr"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_server); } if (remote_port) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); crm_xml_set_id(attr, "%s_%s", remote_name, "instance-attributes-port"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "port"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_port); } } return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node) { static const char *blind_faith = NULL; static gboolean unseen_are_unclean = TRUE; static gboolean need_warning = TRUE; if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } blind_faith = pe_pref(data_set->config_hash, "startup-fencing"); if (crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; if (need_warning) { crm_warn("Blind faith: not fencing unseen nodes"); /* Warn once per run, not per node and transition */ need_warning = FALSE; } } if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { crm_config_err("Must specify id tag in "); continue; } new_node = pe_create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL, new_node->details->utilization, NULL, FALSE, data_set->now); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); pe_create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void setup_container(resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; setup_container(child_rsc, data_set); } return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && safe_str_neq(container_id, rsc->id)) { resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; /* generate remote nodes from resource config before unpacking resources */ for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { const char *new_node_id = NULL; /* first check if this is a bare metal remote node. Bare metal remote nodes * are defined as a resource primitive only. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found baremetal remote node %s in container resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Now check for guest remote nodes. * guest remote nodes are defined within a resource primitive. * Example1: a vm resource might be configured as a remote node. * Example2: a vm resource might be configured within a group to be a remote node. * Note: right now we only support guest remote nodes in as a standalone primitive * or a primitive within a group. No cloned primitives can be a guest remote node * right now */ if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) { /* expands a metadata defined remote resource into the xml config * as an actual rsc primitive to be unpacked later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest remote node %s in container resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } else if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) { xmlNode *xml_obj2 = NULL; /* search through a group to see if any of the primitive contain a remote node. */ for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest remote node %s in container resource %s which is in group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the PE calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc) { node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } print_resource(LOG_DEBUG_3, "Linking remote-node connection resource, ", new_rsc, FALSE); remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return;); remote_node->details->remote_rsc = new_rsc; /* If this is a baremetal remote-node (no container resource * associated with it) then we need to handle startup fencing the same way * as cluster nodes. */ if (new_rsc->container == NULL) { handle_startup_fencing(data_set, remote_node); } else { /* At this point we know if the remote node is a container or baremetal * remote node, update the #kind attribute if a container is involved */ g_hash_table_replace(remote_node->details->attrs, strdup("#kind"), strdup("container")); } } static void destroy_tag(gpointer data) { tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GListPtr gIter = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag); for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { resource_t *new_rsc = NULL; if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append(data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added ", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_set(data_set->flags, pe_flag_quick_location)) { /* Ignore */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) { xmlNode *xml_tag = NULL; data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag); for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) { continue; } if (tag_id == NULL) { crm_config_err("Failed unpacking %s: %s should be specified", crm_element_name(xml_tag), XML_ATTR_ID); continue; } for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) { continue; } if (obj_ref == NULL) { crm_config_err("Failed unpacking %s for tag %s: %s should be specified", crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID); continue; } if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (ticket_id == NULL || strlen(ticket_id) == 0) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } /* Compatibility with the deprecated ticket state section: * "/cib/status/tickets/instance_attributes" */ static void get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data) { const char *long_key = key; char *state_key = NULL; const char *granted_prefix = "granted-ticket-"; const char *last_granted_prefix = "last-granted-"; static int granted_prefix_strlen = 0; static int last_granted_prefix_strlen = 0; const char *ticket_id = NULL; const char *is_granted = NULL; const char *last_granted = NULL; const char *sep = NULL; ticket_t *ticket = NULL; pe_working_set_t *data_set = user_data; if (granted_prefix_strlen == 0) { granted_prefix_strlen = strlen(granted_prefix); } if (last_granted_prefix_strlen == 0) { last_granted_prefix_strlen = strlen(last_granted_prefix); } if (strstr(long_key, granted_prefix) == long_key) { ticket_id = long_key + granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("granted"); is_granted = value; } } else if (strstr(long_key, last_granted_prefix) == long_key) { ticket_id = long_key + last_granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("last-granted"); last_granted = value; } } else if ((sep = strrchr(long_key, '-'))) { ticket_id = sep + 1; state_key = strndup(long_key, strlen(long_key) - strlen(sep)); } if (ticket_id == NULL || strlen(ticket_id) == 0) { free(state_key); return; } if (state_key == NULL || strlen(state_key) == 0) { free(state_key); return; } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { free(state_key); return; } } g_hash_table_replace(ticket->state, state_key, strdup(value)); if (is_granted) { if (crm_is_true(is_granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } } else if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } } static void unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set) { const char *resource_discovery_enabled = NULL; xmlNode *attrs = NULL; resource_t *rsc = NULL; const char *shutdown = NULL; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { return; } if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { return; } crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname); this_node->details->remote_maintenance = crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0"); rsc = this_node->details->remote_rsc; if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { crm_info("Node %s is shutting down", this_node->details->uname); this_node->details->shutdown = TRUE; if (rsc) { rsc->next_role = RSC_ROLE_STOPPED; } } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance")) || (rsc && !is_set(rsc->flags, pe_rsc_managed))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } else { /* if we're here, this is either a baremetal node and fencing is enabled, * or this is a container node which we don't care if fencing is enabled * or not on. container nodes are 'fenced' by recovering the container resource * regardless of whether fencing is enabled. */ crm_info("Node %s has resource discovery disabled", this_node->details->uname); this_node->details->rsc_discovery_enabled = FALSE; } } } static bool unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) { bool changed = false; xmlNode *lrm_rsc = NULL; for (xmlNode *state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { const char *id = NULL; const char *uname = NULL; node_t *this_node = NULL; bool process = FALSE; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (this_node->details->unpacked) { crm_info("Node %s is already processed", id); continue; } else if (is_remote_node(this_node) == FALSE && is_set(data_set->flags, pe_flag_stonith_enabled)) { // A redundant test, but preserves the order for regression tests process = TRUE; } else if (is_remote_node(this_node)) { resource_t *rsc = this_node->details->remote_rsc; if (fence || (rsc && rsc->role == RSC_ROLE_STARTED)) { determine_remote_online_status(data_set, this_node); unpack_handle_remote_attrs(this_node, state, data_set); process = TRUE; } } else if (this_node->details->online) { process = TRUE; } else if (fence) { process = TRUE; } if(process) { crm_trace("Processing lrm resource entries on %shealthy%s node: %s", fence?"un":"", is_remote_node(this_node)?" remote":"", this_node->details->uname); changed = TRUE; this_node->details->unpacked = TRUE; lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } return changed; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *state = NULL; node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) { xmlNode *xml_tickets = state; GHashTable *state_hash = NULL; /* Compatibility with the deprecated ticket state section: * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */ state_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL, state_hash, NULL, TRUE, data_set->now); g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set); if (state_hash) { g_hash_table_destroy(state_hash); } /* Unpack the new "/cib/status/tickets/ticket_state"s */ unpack_tickets_state(xml_tickets, data_set); } if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) { xmlNode *attrs = NULL; const char *resource_discovery_enabled = NULL; id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } else if (is_remote_node(this_node)) { /* online state for remote nodes is determined by the * rsc state after all the unpacking is done. we do however * need to mark whether or not the node has been fenced as this plays * a role during unpacking cluster node resource state */ this_node->details->remote_was_fenced = crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0"); continue; } crm_trace("Processing node id=%s, uname=%s", id, uname); /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } crm_trace("determining node state"); determine_online_status(state, this_node, data_set); if (this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ pe_fence_node(data_set, this_node, "because the cluster does not have quorum"); } } } while(unpack_node_loop(status, FALSE, data_set)) { crm_trace("Start another loop"); } // Now catch any nodes we didnt see unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set); for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *this_node = gIter->data; if (this_node == NULL) { continue; } else if(is_remote_node(this_node) == FALSE) { continue; } else if(this_node->details->unpacked) { continue; } determine_remote_online_status(data_set, this_node); } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster)); } else if (safe_str_eq(is_peer, ONLINESTATUS)) { if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster)); crm_trace("\tis_peer=%s, join=%s, expected=%s", crm_str(is_peer), crm_str(join), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "because node is unexpectedly down"); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= true|false|online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate); online = crm_is_true(in_cluster); if (safe_str_eq(is_peer, ONLINESTATUS)) { is_peer = XML_BOOLEAN_YES; } if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", this_node->details->uname); /* Slightly different criteria since we can't shut down a dead peer */ online = crm_is_true(is_peer); } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "because the peer has not been seen by the cluster"); } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) { pe_fence_node(data_set, this_node, "because it failed the pacemaker membership criteria"); } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) { if (crm_is_true(in_cluster) || crm_is_true(is_peer)) { crm_info("- Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", this_node->details->uname); } } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN) && crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) { crm_info("Node %s was just shot", this_node->details->uname); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { pe_fence_node(data_set, this_node, "because the node is no longer part of the cluster"); } else if (crm_is_true(is_peer) == FALSE) { pe_fence_node(data_set, this_node, "because our peer process is no longer available"); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "because termination was requested"); } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is active", this_node->details->uname); } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING) || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "because the peer was in an unknown state"); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); } return online; } static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) { resource_t *rsc = this_node->details->remote_rsc; resource_t *container = NULL; pe_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will * be NULL. Consider it an offline remote node in that case. */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; if (container && (g_list_length(rsc->running_on) == 1)) { host = rsc->running_on->data; } /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && is_set(container->flags, pe_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if(is_set(rsc->flags, pe_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if (rsc->role == RSC_ROLE_STOPPED || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); return this_node->details->online; } gboolean determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("Node %s is not a pacemaker node", this_node->details->uname); } else if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } return online; } char * clone_strip(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); lpc = strlen(last_rsc_id); while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: crm_err("Empty string: %s", last_rsc_id); return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 1); memcpy(zero, last_rsc_id, lpc); zero[lpc] = 0; return zero; default: goto done; } } done: zero = strdup(last_rsc_id); return zero; } char * clone_zero(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { lpc = strlen(last_rsc_id); } while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; return zero; default: goto done; } } done: lpc = strlen(last_rsc_id); zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; crm_trace("%s -> %s", last_rsc_id, zero); return zero; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pe_find_node(data_set->nodes, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set); } link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); set_bit(rsc->flags, pe_rsc_orphan_container_filler); } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); static resource_t * find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; resource_t *rsc = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); /* Find an instance active (or partially active for grouped clones) on the specified node */ pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr nIter = NULL; GListPtr locations = NULL; resource_t *child = rIter->data; child->fns->location(child, &locations, TRUE); if (locations == NULL) { pe_rsc_trace(child, "Resource %s, skip inactive", child->id); continue; } for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) { node_t *childnode = nIter->data; if (childnode->details == node->details) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if(rsc) { pe_rsc_trace(rsc, "Resource %s, active", rsc->id); } } /* Keep this block, it means we'll do the right thing if * anyone toggles the unique flag to 'off' */ if (rsc && rsc->running_on) { crm_notice("/Anonymous/ clone %s is already running on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; } } g_list_free(locations); } /* Find an inactive instance */ if (skip_inactive == FALSE) { pe_rsc_trace(parent, "Looking for %s anywhere", rsc_id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr locations = NULL; resource_t *child = rIter->data; if (is_set(child->flags, pe_rsc_block)) { pe_rsc_trace(child, "Skip: blocked in stopped state"); continue; } child->fns->location(child, &locations, TRUE); if (locations == NULL) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); pe_rsc_trace(parent, "Resource %s, empty slot", rsc->id); } g_list_free(locations); } } if (rsc == NULL) { /* Create an extra orphan */ resource_t *top = create_child_clone(parent, -1, data_set); /* ->find_rsc() because we might be a cloned group */ rsc = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); CRM_ASSERT(rsc != NULL); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); } if (safe_str_neq(rsc_id, rsc->id)) { pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } return rsc; } static resource_t * unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id, xmlNode * rsc_entry) { resource_t *rsc = NULL; resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); /* no match */ if (rsc == NULL) { /* Even when clone-max=0, we still create a single :0 orphan to match against */ char *tmp = clone_zero(rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, tmp); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; } else { crm_trace("%s is not known as %s either", rsc_id, tmp); } parent = uber_parent(clone0); free(tmp); crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan"); } else if (rsc->variant > pe_native) { crm_trace("%s is no longer a primitive resource, the lrm_resource entry is obsolete", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if(parent && parent->parent) { rsc = find_container_child(rsc_id, rsc, node); } else if (pe_rsc_is_clone(parent)) { if (is_not_set(parent->flags, pe_rsc_unique)) { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); CRM_ASSERT(rsc != NULL); free(base); } if (rsc && safe_str_neq(rsc_id, rsc->id)) { free(rsc->clone_name); rsc->clone_name = strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t * rsc, node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { node_t *tmpnode = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { node_t *n = node_copy(node); pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name, n->details->uname); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && node->details->maintenance == FALSE && is_set(rsc->flags, pe_rsc_managed)) { char *reason = NULL; gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by stonithd). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (is_container_remote_node(node)) { set_bit(rsc->flags, pe_rsc_failed); should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { if (is_baremetal_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { /* setting unseen = true means that fencing of the remote node will * only occur if the connection resource is not going to start somewhere. * This allows connection resources on a failed cluster-node to move to * another node without requiring the baremetal remote nodes to be fenced * as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("because %s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("because %s is thought to be active there", rsc->id); } pe_fence_node(data_set, node, reason); } free(reason); } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ pe_fence_node(data_set, node, "because of resource failure(s)"); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: set_bit(rsc->flags, pe_rsc_failed); if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; case action_fail_reset_remote: set_bit(rsc->flags, pe_rsc_failed); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); } if (tmpnode && is_baremetal_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* connection resource to baremetal resource failed in a way that * should result in fencing the remote-node. */ pe_fence_node(data_set, tmpnode, "because of connection failure(s)"); } } /* require the stop action regardless if fencing is occuring or not. */ if (rsc->role > RSC_ROLE_STOPPED) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_interval) { rsc->next_role = RSC_ROLE_STOPPED; } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { crm_config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { crm_config_warn("Cluster configured not to stop active orphans." " %s must be stopped manually on %s", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); if (on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; stop->flags |= pe_action_optional; } g_list_free(possible_matches); free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t * node, resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0) { pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(status, "-1")) { pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_master_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = counter; } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { implied_monitor_start = counter; } } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { implied_master_start = counter; } } if (*start_index == -1) { if (implied_master_start != -1) { *start_index = implied_master_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } static resource_t * unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { resource_t *rsc; resource_t *container; const char *rsc_id; const char *container_id; if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } gboolean unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; gboolean found_orphaned_container_filler = FALSE; GListPtr unexpected_containers = NULL; GListPtr gIter = NULL; resource_t *remote = NULL; CRM_CHECK(node != NULL, return FALSE); crm_trace("Unpacking resources on %s", node->details->uname); for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { resource_t *rsc; rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set); if (!rsc) { continue; } if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = TRUE; } if (is_set(rsc->flags, pe_rsc_unexpectedly_running)) { remote = rsc_contains_remote_node(data_set, rsc); if (remote) { unexpected_containers = g_list_append(unexpected_containers, remote); } } } } /* If a container resource is unexpectedly up... and the remote-node * connection resource for that container is not up, the entire container * must be recovered. */ for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) { remote = (resource_t *) gIter->data; if (remote->role != RSC_ROLE_STARTED) { crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.", remote->container->id); set_bit(remote->container->flags, pe_rsc_failed); } } /* now that all the resource state has been unpacked for this node * we have to go back and map any orphaned container fillers to their * container resource */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(lrm_rsc_list, data_set); } g_list_free(unexpected_containers); return TRUE; } static void set_active(resource_t * rsc) { resource_t *top = uber_parent(rsc); if (top && top->variant == pe_master) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, pe_working_set_t * data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } CRM_LOG_ASSERT(offset > 0); return get_xpath_object(xpath, data_set->input, LOG_DEBUG); } static void unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { /* * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src) * * So if a migrate_to is followed by a stop, then we don't need to care what * happened on the target node * * Without the stop, we need to look for a successful migrate_from. * This would also imply we're no longer running on the source * * Without the stop, and without a migrate_from op we make sure the resource * gets stopped on both source and target (assuming the target is up) * */ int stop_id = 0; int task_id = 0; xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); if (stop_op == NULL || stop_id < task_id) { int from_rc = 0, from_status = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); node_t *source = pe_find_node(data_set->nodes, migrate_source); xmlNode *migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ID(migrate_from), migrate_target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_OCF_OK && from_status == PCMK_LRM_OP_DONE) { pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), migrate_source); /* all good * just need to arrange for the stop action to get sent * but _without_ affecting the target somehow */ rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from) { /* Failed */ if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); } } else { /* Pending or complete but erased */ if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); if (source && source->details->online) { /* If we make it here we have a partial migration. The migrate_to * has completed but the migrate_from on the target has not. Hold on * to the target and source on the resource. Later on if we detect that * the resource is still going to run on that target, we may continue * the migration */ rsc->partial_migration_target = target; rsc->partial_migration_source = source; } } else { /* Consider it failed here - forces a restart, prevents migration */ set_bit(rsc->flags, pe_rsc_failed); clear_bit(rsc->flags, pe_rsc_allow_migrate); } } } } static void unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); CRM_ASSERT(rsc); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *source = pe_find_node(data_set->nodes, migrate_source); if (source && source->details->online) { native_add_running(rsc, source, data_set); } } } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *target = pe_find_node(data_set->nodes, migrate_target); pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op, migrate_id); if (target && target->details->online) { native_add_running(rsc, target, data_set); } } else if (migrate_op == NULL) { /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } } static void record_failed_op(xmlNode *op, node_t* node, pe_working_set_t * data_set) { xmlNode *xIter = NULL; const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY); if (node->details->online == FALSE) { return; } for (xIter = data_set->failed->children; xIter; xIter = xIter->next) { const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY); const char *uname = crm_element_value(xIter, XML_ATTR_UNAME); if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) { crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname); return; } } crm_trace("Adding entry %s on %s", op_key, node->details->uname); crm_xml_add(op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, op); } static const char *get_op_key(xmlNode *xml_op) { const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if(key == NULL) { key = ID(xml_op); } return key; } static void unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int interval = 0; bool is_probe = FALSE; action_t *action = NULL; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); CRM_ASSERT(rsc); *last_failure = xml_op; crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; pe_rsc_trace(rsc, "is a probe: %s", key); } if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_warn("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); record_failed_op(xml_op, node, data_set); } else { crm_trace("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); } action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) || (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) || (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) || (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) { pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), fail2text(action->on_fail), action->uuid, key); *on_fail = action->on_fail; } if (safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { unpack_rsc_migration_failure(rsc, node, xml_op, data_set); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; rsc->next_role = RSC_ROLE_STOPPED; } else if(rc == PCMK_OCF_NOT_RUNNING) { rsc->role = RSC_ROLE_STOPPED; } else { crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->role = RSC_ROLE_SLAVE; rsc->next_role = RSC_ROLE_STOPPED; } } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname); resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set); } if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { /* leave stopped */ pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); rsc->role = RSC_ROLE_STOPPED; } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "Setting %s active", rsc->id); set_active(rsc); } pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; resource_t *fail_rsc = rsc; if (fail_rsc->parent) { resource_t *parent = uber_parent(fail_rsc); if (pe_rsc_is_clone(parent) && is_not_set(parent->flags, pe_rsc_unique)) { /* for clone and master resources, if a child fails on an operation * with on-fail = stop, all the resources fail. Do this by preventing * the parent from coming up again. */ fail_rsc = parent; } } crm_warn("Making sure %s doesn't come up again", fail_rsc->id); /* make sure it doesn't come up again */ g_hash_table_destroy(fail_rsc->allowed_nodes); fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); } static int determine_op_status( resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int interval = 0; int result = PCMK_LRM_OP_DONE; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); bool is_probe = FALSE; CRM_ASSERT(rsc); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if (target_rc >= 0 && target_rc != rc) { result = PCMK_LRM_OP_ERROR; pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); } /* we could clean this up significantly except for old LRMs and CRMs that * didn't include target_rc and liked to remap status */ switch (rc) { case PCMK_OCF_OK: if (is_probe && target_rc == 7) { result = PCMK_LRM_OP_DONE; set_bit(rsc->flags, pe_rsc_unexpectedly_running); pe_rsc_info(rsc, "Operation %s found resource %s active on %s", task, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) { result = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if (safe_str_neq(task, CRMD_ACTION_STOP)) { result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_RUNNING_MASTER: if (is_probe) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s", task, rsc->id, node->details->uname); } else if (target_rc == rc) { /* nothing to do */ } else if (target_rc >= 0) { result = PCMK_LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if (safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { result = PCMK_LRM_OP_ERROR; if (rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", key, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case PCMK_OCF_DEGRADED_MASTER: case PCMK_OCF_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; result = PCMK_LRM_OP_ERROR; break; case PCMK_OCF_NOT_CONFIGURED: result = PCMK_LRM_OP_ERROR_FATAL; break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: case PCMK_OCF_UNIMPLEMENT_FEATURE: if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) { result = PCMK_LRM_OP_NOTSUPPORTED; break; } else if (pe_can_fence(data_set, node) == FALSE && safe_str_eq(task, CRMD_ACTION_STOP)) { /* If a stop fails and we can't fence, there's nothing else we can do */ pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)", rsc->id, task, services_ocf_exitcode_str(rc), rc); clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); } result = PCMK_LRM_OP_ERROR_HARD; break; default: if (result == PCMK_LRM_OP_DONE) { crm_info("Treating %s (rc=%d) on %s as an ERROR", key, rc, node->details->uname); result = PCMK_LRM_OP_ERROR; } } return result; } static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set) { bool expired = FALSE; time_t last_failure = 0; int interval = 0; int failure_timeout = rsc->failure_timeout; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *clear_reason = NULL; /* clearing recurring monitor operation failures automatically * needs to be carefully considered */ if (safe_str_eq(crm_element_value(xml_op, XML_LRM_ATTR_TASK), "monitor") && safe_str_neq(crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL), "0")) { /* TODO, in the future we should consider not clearing recurring monitor * op failures unless the last action for a resource was a "stop" action. * otherwise it is possible that clearing the monitor failure will result * in the resource being in an undeterministic state. * * For now we handle this potential undeterministic condition for remote * node connection resources by not clearing a recurring monitor op failure * until after the node has been fenced. */ if (is_set(data_set->flags, pe_flag_stonith_enabled) && (rsc->remote_reconnect_interval)) { node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); if (remote_node && remote_node->details->remote_was_fenced == 0) { if (strstr(ID(xml_op), "last_failure")) { crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id); } /* disabling failure timeout for this operation because we believe * fencing of the remote node should occur first. */ failure_timeout = 0; } } } if (failure_timeout > 0) { int last_run = 0; if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) { time_t now = get_effective_time(data_set); if (now > (last_run + failure_timeout)) { expired = TRUE; } } } if (expired) { if (failure_timeout > 0) { int fc = get_failcount_full(node, rsc, &last_failure, FALSE, xml_op, data_set); if(fc) { if (get_failcount_full(node, rsc, &last_failure, TRUE, xml_op, data_set) == 0) { clear_reason = "it expired"; } else { expired = FALSE; } } else if (rsc->remote_reconnect_interval && strstr(ID(xml_op), "last_failure")) { /* always clear last failure when reconnect interval is set */ clear_reason = "reconnect interval is set"; } } } else if (strstr(ID(xml_op), "last_failure") && ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) { op_digest_cache_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); if (digest_data->rc == RSC_DIGEST_UNKNOWN) { crm_trace("rsc op %s/%s on node %s does not have a op digest to compare against", rsc->id, key, node->details->id); } else if (digest_data->rc != RSC_DIGEST_MATCH) { clear_reason = "resource parameters have changed"; } } if (clear_reason != NULL) { char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s", rsc->id, node->details->uname, clear_reason, clear_op->uuid); } crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { switch(rc) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Don't expire probes that return these values */ expired = FALSE; break; } } return expired; } int get_target_rc(xmlNode *xml_op) { int dummy = 0; int target_rc = 0; char *dummy_string = NULL; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); free(dummy_string); return target_rc; } static enum action_fail_response get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) { int result = action_fail_recover; action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); result = action->on_fail; pe_free_action(action); return result; } static void update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc, xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { gboolean clear_past_failure = FALSE; CRM_ASSERT(rsc); CRM_ASSERT(xml_op); if (rc == PCMK_OCF_NOT_RUNNING) { clear_past_failure = TRUE; } else if (rc == PCMK_OCF_NOT_INSTALLED) { rsc->role = RSC_ROLE_STOPPED; } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { if (last_failure) { const char *op_key = get_op_key(xml_op); const char *last_failure_key = get_op_key(last_failure); if (safe_str_eq(op_key, last_failure_key)) { clear_past_failure = TRUE; } } if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } } else if (safe_str_eq(task, CRMD_ACTION_START)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* Demote from Master does not clear an error */ rsc->role = RSC_ROLE_SLAVE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { unpack_rsc_migration(rsc, node, xml_op, data_set); } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_block: case action_fail_ignore: case action_fail_recover: case action_fail_restart_container: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; break; case action_fail_reset_remote: if (rsc->remote_reconnect_interval == 0) { /* when reconnect delay is not in use, the connection is allowed * to start again after the remote node is fenced and completely * stopped. Otherwise, with reconnect delay we wait for the failure * to be cleared entirely before reconnected can be attempted. */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; } } } gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int task_id = 0; const char *key = NULL; const char *task = NULL; const char *task_key = NULL; int rc = 0; int status = PCMK_LRM_OP_PENDING-1; int target_rc = get_target_rc(xml_op); int interval = 0; gboolean expired = FALSE; resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); task_key = get_op_key(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE); CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE); if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if (is_not_set(rsc->flags, pe_rsc_unique)) { parent = uber_parent(rsc); } pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role)); if (node->details->unclean) { pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribute", node->details->uname, rsc->id); } if (status == PCMK_LRM_OP_ERROR) { /* Older versions set this if rc != 0 but it's up to us to decide */ status = PCMK_LRM_OP_DONE; } if(status != PCMK_LRM_OP_NOT_INSTALLED) { expired = check_operation_expiry(rsc, node, rc, xml_op, data_set); } /* Degraded results are informational only, re-map them to their error-free equivalents */ if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) { rc = PCMK_OCF_OK; /* Add them to the failed list to highlight them for the user */ if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK); record_failed_op(xml_op, node, data_set); } } else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) { rc = PCMK_OCF_RUNNING_MASTER; /* Add them to the failed list to highlight them for the user */ if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED_MASTER, PCMK_OCF_RUNNING_MASTER); record_failed_op(xml_op, node, data_set); } } if (expired && target_rc != rc) { const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); if(interval == 0) { crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); goto done; } else if(node->details->online && node->details->unclean == FALSE) { crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); goto done; } } if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) { status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set); } pe_rsc_trace(rsc, "Handling status: %d", status); switch (status) { case PCMK_LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Don't know what to do for cancelled ops yet"); break; case PCMK_LRM_OP_PENDING: if (safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) { /* If a pending migrate_to action is out on a unclean node, * we have to force the stop action on the target. */ const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); if (target) { stop_action(rsc, target, FALSE); } } if (rsc->pending_task == NULL) { if (safe_str_eq(task, CRMD_ACTION_STATUS) && interval == 0) { /* Pending probes are not printed, even if pending * operations are requested. If someone ever requests that * behavior, uncomment this and the corresponding part of * native.c:native_pending_task(). */ /*rsc->pending_task = strdup("probe");*/ } else { rsc->pending_task = strdup(task); } } break; case PCMK_LRM_OP_DONE: pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname); update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_INSTALLED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: " "Resource agent doesn't exist", task_key, status, rc, node->details->uname); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } resource_location(parent, node, -INFINITY, "hard-error", data_set); unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); break; case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_ERROR_HARD: case PCMK_LRM_OP_ERROR_FATAL: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container && safe_str_eq(task, CRMD_ACTION_STOP))) { crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded", task_key, rc, node->details->uname); update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); set_bit(rsc->flags, pe_rsc_failure_ignored); record_failed_op(xml_op, node, data_set); if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); if(status == PCMK_LRM_OP_ERROR_HARD) { do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)", parent->id, node->details->uname, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, node, -INFINITY, "hard-error", data_set); } else if(status == PCMK_LRM_OP_ERROR_FATAL) { crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)", parent->id, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); } } break; } done: pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role)); return TRUE; } gboolean add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set) { const char *cluster_name = NULL; g_hash_table_insert(node->details->attrs, strdup("#uname"), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id)); if (safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup("#cluster-name"), strdup(cluster_name)); } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); if (g_hash_table_lookup(node->details->attrs, "#site-name") == NULL) { const char *site_name = g_hash_table_lookup(node->details->attrs, "site-name"); if (site_name) { /* Prefix '#' to the key */ g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(cluster_name)); } } return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (is_remote_node(this_node)) { determine_remote_online_status(data_set, this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next_element(lrm_rsc)) { if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 3d4e60d353..7acd90c2d4 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,1966 +1,1967 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include pe_working_set_t *pe_dataset = NULL; extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set); static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled); /*! * \internal * \brief Check whether we can fence a particular node * * \param[in] data_set Working set for cluster * \param[in] node Name of node to check * * \return TRUE if node can be fenced, FALSE otherwise * * \note This function should only be called for cluster nodes and baremetal * remote nodes; guest nodes are fenced by stopping their container * resource, so fence execution requirements do not apply to them. */ bool pe_can_fence(pe_working_set_t * data_set, node_t *node) { if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { return FALSE; /* Turned off */ } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) { return FALSE; /* No devices */ } else if (is_set(data_set->flags, pe_flag_have_quorum)) { return TRUE; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return TRUE; } else if(node == NULL) { return FALSE; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); return TRUE; } crm_trace("Cannot fence %s", node->details->uname); return FALSE; } node_t * node_copy(const node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); new_node = calloc(1, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores) { GHashTable *result = hash; node_t *other_node = NULL; GListPtr gIter = list; GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = merge_weights(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } GHashTable * node_hash_from_list(GListPtr list) { GListPtr gIter = list; GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *n = node_copy(node); g_hash_table_insert(result, (gpointer) n->details->id, n); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; GListPtr gIter = list1; for (; gIter != NULL; gIter = gIter->next) { node_t *new_node = NULL; node_t *this_node = (node_t *) gIter->data; if (filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if (reset) { new_node->weight = 0; } if (new_node != NULL) { result = g_list_prepend(result, new_node); } } return result; } gint sort_node_uname(gconstpointer a, gconstpointer b) { const node_t *node_a = a; const node_t *node_b = b; return strcmp(node_a->details->uname, node_b->details->uname); } void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes) { GHashTable *hash = nodes; GHashTableIter iter; node_t *node = NULL; if (rsc) { hash = rsc->allowed_nodes; } if (rsc && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't show the allocation scores for orphans */ return; } if (level == 0) { char score[128]; int len = sizeof(score); /* For now we want this in sorted order to keep the regression tests happy */ GListPtr gIter = NULL; GListPtr list = g_hash_table_get_values(hash); list = g_list_sort(list, sort_node_uname); gIter = list; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { printf("%s: %s allocation score on %s: %s\n", comment, rsc->id, node->details->uname, score); } else { printf("%s: %s = %s\n", comment, node->details->uname, score); } } g_list_free(list); } else if (hash) { char score[128]; int len = sizeof(score); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { do_crm_log_alias(LOG_TRACE, file, function, line, "%s: %s allocation score on %s: %s", comment, rsc->id, node->details->uname, score); } else { do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment, node->details->uname, score); } } } if (rsc && rsc->children) { GListPtr gIter = NULL; gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; dump_node_scores_worker(level, file, function, line, child, comment, nodes); } } } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; int len = 0; char *new_text = NULL; len = strlen(*dump_text) + strlen(" ") + strlen(key) + strlen("=") + strlen(value) + 1; new_text = calloc(1, len); sprintf(new_text, "%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } void dump_node_capacity(int level, const char *comment, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(node->details->uname) + strlen(" capacity:") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(rsc->id) + strlen(" utilization on ") + strlen(node->details->uname) + strlen(":") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s utilization on %s:", comment, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean save_action, pe_working_set_t * data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, free(key); return NULL); if (save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } else if(save_action) { #if 0 action = g_hash_table_lookup(data_set->singletons, key); #else /* More expensive but takes 'node' into account */ possible_matches = find_actions(data_set->actions, key, on_node); #endif } if(data_set->singletons == NULL) { data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); } if (possible_matches != NULL) { if (g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); pe_rsc_trace(rsc, "Found existing action (%d) %s for %s on %s", action->id, task, rsc ? rsc->id : "", on_node ? on_node->details->uname : ""); g_list_free(possible_matches); } if (action == NULL) { if (save_action) { pe_rsc_trace(rsc, "Creating%s action %d: %s for %s on %s %d", optional ? "" : " mandatory", data_set->action_id, key, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", optional); } action = calloc(1, sizeof(action_t)); if (save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = strdup(task); if (on_node) { action->node = node_copy(on_node); } action->uuid = strdup(key); pe_set_action_bit(action, pe_action_runnable); if (optional) { pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); } else { pe_clear_action_bit(action, pe_action_optional); pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); } /* Implied by calloc()... action->actions_before = NULL; action->actions_after = NULL; action->pseudo = FALSE; action->dumped = FALSE; action->processed = FALSE; action->seen_count = 0; */ action->extra = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); action->meta = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); if (save_action) { data_set->actions = g_list_prepend(data_set->actions, action); if(rsc == NULL) { g_hash_table_insert(data_set->singletons, action->uuid, action); } } if (rsc != NULL) { action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); unpack_operation(action, action->op_entry, rsc->container, data_set); if (save_action) { rsc->actions = g_list_prepend(rsc->actions, action); } } if (save_action) { pe_rsc_trace(rsc, "Action %d created", action->id); } } if (optional == FALSE) { pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); pe_clear_action_bit(action, pe_action_optional); } if (rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_TRACE; if (save_action) { warn_level = LOG_WARNING; } if (is_set(action->flags, pe_action_have_node_attrs) == FALSE && action->node != NULL && action->op_entry != NULL) { pe_set_action_bit(action, pe_action_have_node_attrs); unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if (is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ } else if (action->node == NULL) { pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid); pe_clear_action_bit(action, pe_action_runnable); } else if (is_not_set(rsc->flags, pe_rsc_managed) && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL) == NULL) { crm_debug("Action %s (unmanaged)", action->uuid); pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); /* action->runnable = FALSE; */ } else if (action->node->details->online == FALSE && (!is_container_remote_node(action->node) || action->node->details->remote_requires_reset)) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if (is_set(action->rsc->flags, pe_rsc_managed) - && save_action && a_task == stop_rsc) { - pe_fence_node(data_set, action->node, "because node is unclean"); + && save_action && a_task == stop_rsc + && action->node->details->unclean == FALSE) { + pe_fence_node(data_set, action->node, "because of unrunnable resource actions"); } } else if (action->node->details->pending) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid); pe_set_action_bit(action, pe_action_runnable); #if 0 /* * No point checking this * - if we don't have quorum we can't stonith anyway */ } else if (action->needs == rsc_req_stonith) { crm_trace("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_stop) { pe_clear_action_bit(action, pe_action_runnable); crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role)); if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) { pe_clear_action_bit(action, pe_action_runnable); pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { pe_rsc_trace(rsc, "Action %s is runnable", action->uuid); pe_set_action_bit(action, pe_action_runnable); } if (save_action) { switch (a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if (is_set(action->flags, pe_action_runnable)) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } free(key); return action; } static const char * unpack_operation_on_fail(action_t * action) { const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) { crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id); return NULL; } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) { /* demote on_fail defaults to master monitor value if present */ xmlNode *operation = NULL; const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval = NULL; const char *enabled = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = __xml_first_child(action->rsc->ops_xml); operation && !value; operation = __xml_next_element(operation)) { if (!crm_str_eq((const char *)operation->name, "op", TRUE)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); enabled = crm_element_value(operation, "enabled"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; } else if (enabled && !crm_is_true(enabled)) { continue; } else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) { continue; } else if (crm_get_interval(interval) <= 0) { continue; } value = on_fail; } } return value; } static xmlNode * find_min_interval_mon(resource_t * rsc, gboolean include_disabled) { int number = 0; int min_interval = -1; const char *name = NULL; const char *value = NULL; const char *interval = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } if (safe_str_neq(name, RSC_STATUS)) { continue; } number = crm_get_interval(interval); if (number < 0) { continue; } if (min_interval < 0 || number < min_interval) { min_interval = number; op = operation; } } } return op; } void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set) { int value_i = 0; unsigned long long interval = 0; unsigned long long start_delay = 0; char *value_ms = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); if (xml_obj) { xmlAttrPtr xIter = NULL; for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); g_hash_table_remove(action->meta, "id"); field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { interval = crm_get_interval(value); if (interval > 0) { value_ms = crm_itoa(interval); g_hash_table_replace(action->meta, strdup(field), value_ms); } else { g_hash_table_remove(action->meta, field); } } /* Begin compatibility code ("requires" set on start action not resource) */ value = g_hash_table_lookup(action->meta, "requires"); if (safe_str_neq(action->task, RSC_START) && safe_str_neq(action->task, RSC_PROMOTE)) { action->needs = rsc_req_nothing; value = "nothing (not start/promote)"; } else if (safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if (safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if (safe_str_eq(value, "unfencing")) { action->needs = rsc_req_stonith; set_bit(action->rsc->flags, pe_rsc_needs_unfencing); if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires unfencing but fencing is disabled", action->rsc->id); } } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires fencing but fencing is disabled", action->rsc->id); } /* End compatibility code */ } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing (resource)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum (resource)"; } else { action->needs = rsc_req_nothing; value = "nothing (resource)"; } pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->task, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (safe_str_eq(value, "block")) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); } else if (safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (safe_str_eq(value, "standby")) { action->on_fail = action_fail_standby; value = "node standby"; } else if (safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (safe_str_eq(value, "restart-container")) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; /* for baremetal remote nodes, ensure that any failure that results in * dropping an active connection to a remote node results in fencing of * the remote node. * * There are only two action failures that don't result in fencing. * 1. probes - probe failures are expected. * 2. start - a start failure indicates that an active connection does not already * exist. The user can set op on-fail=fence if they really want to fence start * failures. */ } else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) && (is_rsc_baremetal_remote_node(action->rsc, data_set) && !(safe_str_eq(action->task, CRMD_ACTION_STATUS) && interval == 0) && (safe_str_neq(action->task, CRMD_ACTION_START)))) { if (!is_set(action->rsc->flags, pe_rsc_managed)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop unmanaged baremetal remote node (enforcing default)"; } else { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fence baremetal remote node (default)"; } else { value = "recover baremetal remote node connection (default)"; } if (action->rsc->remote_reconnect_interval) { action->fail_role = RSC_ROLE_STOPPED; } action->on_fail = action_fail_reset_remote; } } else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task, role2text(action->fail_role)); field = XML_OP_ATTR_START_DELAY; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } else if (interval > 0 && g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN)) { crm_time_t *origin = NULL; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); origin = crm_time_new(value); if (origin == NULL) { crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s", ID(xml_obj), value); } else { crm_time_t *delay = NULL; int rc = crm_time_compare(origin, data_set->now); long long delay_s = 0; int interval_s = (interval / 1000); crm_trace("Origin: %s, interval: %d", value, interval_s); /* If 'origin' is in the future, find the most recent "multiple" that occurred in the past */ while(rc > 0) { crm_time_add_seconds(origin, -interval_s); rc = crm_time_compare(origin, data_set->now); } /* Now find the first "multiple" that occurs after 'now' */ while (rc < 0) { crm_time_add_seconds(origin, interval_s); rc = crm_time_compare(origin, data_set->now); } delay = crm_time_calculate_duration(origin, data_set->now); crm_time_log(LOG_TRACE, "origin", origin, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "now", data_set->now, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "delay", delay, crm_time_log_duration); delay_s = crm_time_get_seconds(delay); CRM_CHECK(delay_s >= 0, delay_s = 0); start_delay = delay_s * 1000; crm_info("Calculated a start delay of %llds for %s", delay_s, ID(xml_obj)); g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay)); crm_time_free(origin); crm_time_free(delay); } } field = XML_ATTR_TIMEOUT; value = g_hash_table_lookup(action->meta, field); if (value == NULL && xml_obj == NULL && safe_str_eq(action->task, RSC_STATUS) && interval == 0) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); pe_rsc_trace(action->rsc, "\t%s uses the timeout value '%s' from the minimum interval monitor", action->uuid, value); } } if (value == NULL) { value = pe_pref(data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } static xmlNode * find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled) { unsigned long long number = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } number = crm_get_interval(interval); match_key = generate_op_key(rsc->id, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = generate_op_key(rsc->clone_name, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = generate_op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = generate_op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(resource_t * rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } void print_node(const char *pre_text, node_t * node, gboolean details) { if (node == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } CRM_ASSERT(node->details); crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details) { char *pe_mutable = strdup("\t\t"); GListPtr gIter = node->details->running_rsc; crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); } } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details) { long options = pe_print_log | pe_print_pending; if (rsc == NULL) { do_crm_log(log_level - 1, "%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* action_wrapper_t* */ g_list_free_full(action->actions_after, free); /* action_wrapper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } free(action->cancel_task); free(action->task); free(action->uuid); free(action->node); free(action); } GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node) { const char *value = NULL; GListPtr result = NULL; GListPtr gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if (value == NULL) { /* skip */ } else if (safe_str_eq(value, "0")) { /* skip */ } else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; break; default: break; } } return task; } action_t * find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node) { GListPtr gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (uuid != NULL && safe_str_neq(uuid, action->uuid)) { continue; } else if (task != NULL && safe_str_neq(task, action->task)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (safe_str_neq(key, action->uuid)) { crm_trace("%s does not match action %s", key, action->uuid); continue; } else if (on_node == NULL) { crm_trace("Action %s matches (ignoring node)", key); result = g_list_prepend(result, action); } else if (action->node == NULL) { crm_trace("Action %s matches (unallocated, assigning to %s)", key, on_node->details->uname); action->node = node_copy(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { crm_trace("Action %s on %s matches", key, on_node->details->uname); result = g_list_prepend(result, action); } else { crm_trace("Action %s on node %s does not match requested node %s", key, action->node->details->uname, on_node->details->uname); } } return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { crm_trace("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if (on_node == NULL || action->node == NULL) { crm_trace("on_node=%p, action->node=%p", on_node, action->node); continue; } else if (safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_prepend(result, action); } crm_trace("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); } return result; } static void resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag) { node_t *match = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = node_copy(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GListPtr gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node_iter = (node_t *) gIter->data; resource_node_score(rsc, node_iter, score, tag); } } else { GHashTableIter iter; node_t *node_iter = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) { resource_node_score(rsc, node_iter, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); if (safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_const_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_const_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ int last_a = -1; int last_b = -1; crm_element_value_const_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_const_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %d vs %d", last_a, last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; int dummy = -1; const char *a_magic = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if(!decode_transition_magic(a_magic, &a_uuid, &a_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic a"); } if(!decode_transition_magic(b_magic, &b_uuid, &b_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (ie. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations don't survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } gboolean get_target_role(resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (value == NULL || safe_str_eq("started", value) || safe_str_eq("default", value)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (uber_parent(rsc)->variant == pe_master) { if (local_role > RSC_ROLE_SLAVE) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { crm_config_err("%s is not part of a master/slave resource, a %s of '%s' makes no sense", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order) { GListPtr gIter = NULL; action_wrapper_t *wrapper = NULL; GListPtr list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... its happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *after = (action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_then; */ /* order ^= pe_order_implies_then; */ wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { action_t *op = NULL; if(data_set->singletons) { op = g_hash_table_lookup(data_set->singletons, name); } if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set); set_bit(op->flags, pe_action_pseudo); set_bit(op->flags, pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { ticket_t *ticket = NULL; if (ticket_id == NULL || strlen(ticket_id) == 0) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } static void filter_parameters(xmlNode * param_set, const char *param_string, bool need_present) { int len = 0; char *name = NULL; char *match = NULL; if (param_set == NULL) { return; } if (param_set) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; xIter = xIter->next; name = NULL; len = strlen(prop_name) + 3; name = malloc(len); if(name) { sprintf(name, " %s ", prop_name); name[len - 1] = 0; match = strstr(param_string, name); } if (need_present && match == NULL) { crm_trace("%s not found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } else if (need_present == FALSE && match) { crm_trace("%s found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } free(name); } } } bool fix_remote_addr(resource_t * rsc) { const char *name; const char *value; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; const char *value_list[] = { "remote", "ocf", "pacemaker" }; if(rsc == NULL) { return FALSE; } name = "addr"; value = g_hash_table_lookup(rsc->parameters, name); if (safe_str_eq(value, "#uname") == FALSE) { return FALSE; } for (int lpc = 0; lpc < DIMOF(attr_list); lpc++) { name = attr_list[lpc]; value = crm_element_value(rsc->xml, attr_list[lpc]); if (safe_str_eq(value, value_list[lpc]) == FALSE) { return FALSE; } } return TRUE; } op_digest_cache_t * rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, pe_working_set_t * data_set) { op_digest_cache_t *data = NULL; GHashTable *local_rsc_params = NULL; #ifdef ENABLE_VERSIONED_ATTRS xmlNode *local_versioned_params = NULL; #endif action_t *action = NULL; char *key = NULL; int interval = 0; const char *op_id = ID(xml_op); const char *interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_all; const char *digest_restart; const char *secure_list; const char *restart_list; const char *op_version; CRM_ASSERT(node != NULL); data = g_hash_table_lookup(node->details->digest_cache, op_id); if (data) { return data; } data = calloc(1, sizeof(op_digest_cache_t)); CRM_ASSERT(data != NULL); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); /* key is freed in custom_action */ interval = crm_parse_int(interval_s, "0"); key = generate_op_key(rsc->id, task, interval); action = custom_action(rsc, key, task, node, TRUE, FALSE, data_set); key = NULL; local_rsc_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(local_rsc_params, rsc, node, data_set); #ifdef ENABLE_VERSIONED_ATTRS local_versioned_params = create_xml_node(NULL, XML_TAG_VER_ATTRS); pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set); #endif data->params_all = create_xml_node(NULL, XML_TAG_PARAMS); if(fix_remote_addr(rsc) && node) { // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside crm_xml_add(data->params_all, "addr", node->details->uname); crm_trace("Fixing addr for %s on %s", rsc->id, node->details->uname); } g_hash_table_foreach(local_rsc_params, hash2field, data->params_all); g_hash_table_foreach(action->extra, hash2field, data->params_all); g_hash_table_foreach(rsc->parameters, hash2field, data->params_all); g_hash_table_foreach(action->meta, hash2metafield, data->params_all); filter_action_parameters(data->params_all, op_version); #ifdef ENABLE_VERSIONED_ATTRS crm_summarize_versioned_params(data->params_all, rsc->versioned_parameters); crm_summarize_versioned_params(data->params_all, local_versioned_params); #endif data->digest_all_calc = calculate_operation_digest(data->params_all, op_version); if (secure_list && is_set(data_set->flags, pe_flag_sanitized)) { data->params_secure = copy_xml(data->params_all); if (secure_list) { filter_parameters(data->params_secure, secure_list, FALSE); } data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version); } if (digest_restart) { data->params_restart = copy_xml(data->params_all); if (restart_list) { filter_parameters(data->params_restart, restart_list, TRUE); } data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version); } data->rc = RSC_DIGEST_MATCH; if (digest_restart && strcmp(data->digest_restart_calc, digest_restart) != 0) { data->rc = RSC_DIGEST_RESTART; } else if (digest_all == NULL) { /* it is unknown what the previous op digest was */ data->rc = RSC_DIGEST_UNKNOWN; } else if (strcmp(digest_all, data->digest_all_calc) != 0) { data->rc = RSC_DIGEST_ALL; } g_hash_table_insert(node->details->digest_cache, strdup(op_id), data); g_hash_table_destroy(local_rsc_params); #ifdef ENABLE_VERSIONED_ATTRS free_xml(local_versioned_params); #endif pe_free_action(action); return data; } const char *rsc_printable_id(resource_t *rsc) { if (is_not_set(rsc->flags, pe_rsc_unique)) { return ID(rsc->xml); } return rsc->id; } void clear_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; clear_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; clear_bit_recursive(child_rsc, flag); } } void set_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; set_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; set_bit_recursive(child_rsc, flag); } } action_t * pe_fence_op(node_t * node, const char *op, bool optional, pe_working_set_t * data_set) { char *key = NULL; action_t *stonith_op = NULL; if(op == NULL) { op = data_set->stonith_action; } key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op); if(data_set->singletons) { stonith_op = g_hash_table_lookup(data_set->singletons, key); } if(stonith_op == NULL) { stonith_op = custom_action(NULL, key, CRM_OP_FENCE, node, optional, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op); } else { free(key); } if(optional == FALSE) { crm_trace("%s is no longer optional", stonith_op->uuid); pe_clear_action_bit(stonith_op, pe_action_optional); } return stonith_op; } void trigger_unfencing( resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set) { if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) { /* Wasn't a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { action_t *unfence = pe_fence_op(node, "on", FALSE, data_set); crm_notice("Unfencing %s: %s", node->details->uname, reason); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { tag_t *tag = NULL; GListPtr gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (crm_str_eq(existing_ref, obj_ref, TRUE)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; }