diff --git a/crm/pengine/master.c b/crm/pengine/master.c index d40e0892fa..3f904c0a09 100644 --- a/crm/pengine/master.c +++ b/crm/pengine/master.c @@ -1,578 +1,579 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #define VARIANT_CLONE 1 #include #define NO_MASTER_PREFS 0 extern gint sort_clone_instance(gconstpointer a, gconstpointer b); extern void clone_create_notifications( resource_t *rsc, action_t *action, action_t *action_complete, pe_working_set_t *data_set); static void child_promoting_constraints( clone_variant_data_t *clone_data, enum pe_ordering type, resource_t *rsc, resource_t *child, resource_t *last, pe_working_set_t *data_set) { /* if(clone_data->ordered */ /* || clone_data->self->restart_type == pe_restart_restart) { */ /* type = pe_order_implies_left; */ /* } */ if(child == NULL) { if(clone_data->ordered && last != NULL) { crm_debug_4("Ordered version (last node)"); /* last child promote before promoted started */ custom_action_order( last, promote_key(last), NULL, rsc, promoted_key(rsc), NULL, type, data_set); } } else if(clone_data->ordered) { crm_debug_4("Ordered version"); if(last == NULL) { /* global promote before first child promote */ last = rsc; } /* else: child/child relative promote */ order_start_start(last, child, type); custom_action_order( last, promote_key(last), NULL, child, promote_key(child), NULL, type, data_set); } else { crm_debug_4("Un-ordered version"); /* child promote before global promoted */ custom_action_order( child, promote_key(child), NULL, rsc, promoted_key(rsc), NULL, type, data_set); /* global promote before child promote */ custom_action_order( rsc, promote_key(rsc), NULL, child, promote_key(child), NULL, type, data_set); } } static void child_demoting_constraints( clone_variant_data_t *clone_data, enum pe_ordering type, resource_t *rsc, resource_t *child, resource_t *last, pe_working_set_t *data_set) { /* if(clone_data->ordered */ /* || clone_data->self->restart_type == pe_restart_restart) { */ /* type = pe_order_implies_left; */ /* } */ if(child == NULL) { if(clone_data->ordered && last != NULL) { crm_debug_4("Ordered version (last node)"); /* global demote before first child demote */ custom_action_order( rsc, demote_key(rsc), NULL, last, demote_key(last), NULL, pe_order_implies_left, data_set); } } else if(clone_data->ordered && last != NULL) { crm_debug_4("Ordered version"); /* child/child relative demote */ custom_action_order(child, demote_key(child), NULL, last, demote_key(last), NULL, type, data_set); } else if(clone_data->ordered) { crm_debug_4("Ordered version (1st node)"); /* first child stop before global stopped */ custom_action_order( child, demote_key(child), NULL, rsc, demoted_key(rsc), NULL, type, data_set); } else { crm_debug_4("Un-ordered version"); /* child demote before global demoted */ custom_action_order( child, demote_key(child), NULL, rsc, demoted_key(rsc), NULL, type, data_set); /* global demote before child demote */ custom_action_order( rsc, demote_key(rsc), NULL, child, demote_key(child), NULL, type, data_set); } } static void master_update_pseudo_status( resource_t *child, gboolean *demoting, gboolean *promoting) { CRM_ASSERT(demoting != NULL); CRM_ASSERT(promoting != NULL); slist_iter( action, action_t, child->actions, lpc, if(*promoting && *demoting) { return; } else if(action->optional) { continue; } else if(safe_str_eq(CRMD_ACTION_DEMOTE, action->task)) { *demoting = TRUE; } else if(safe_str_eq(CRMD_ACTION_PROMOTE, action->task)) { *promoting = TRUE; } ); } #define apply_master_location(list) \ slist_iter( \ cons, rsc_to_node_t, list, lpc2, \ cons_node = NULL; \ if(cons->role_filter == RSC_ROLE_MASTER) { \ crm_debug("Applying %s to %s", \ cons->id, child_rsc->id); \ cons_node = pe_find_node_id( \ cons->node_list_rh, chosen->details->id); \ } \ if(cons_node != NULL) { \ int new_priority = merge_weights( \ child_rsc->priority, cons_node->weight); \ crm_debug("\t%s: %d->%d", child_rsc->id, \ child_rsc->priority, new_priority); \ child_rsc->priority = new_priority; \ } \ ); #define apply_master_colocation(list) \ slist_iter( \ cons, rsc_colocation_t, list, lpc2, \ cons_node = cons->rsc_lh->allocated_to; \ if(cons->role_lh == RSC_ROLE_MASTER \ && cons_node != NULL \ && chosen->details == cons_node->details) { \ int new_priority = merge_weights( \ child_rsc->priority, cons->score); \ crm_debug("Applying %s to %s", \ cons->id, child_rsc->id); \ crm_debug("\t%s: %d->%d", child_rsc->id, \ child_rsc->priority, new_priority); \ child_rsc->priority = new_priority; \ } \ ); static node_t * can_be_master(resource_t *rsc) { node_t *node = NULL; node_t *local_node = NULL; clone_variant_data_t *clone_data = NULL; node = rsc->allocated_to; if(rsc->priority < 0) { crm_debug_2("%s cannot be master: preference", rsc->id); return NULL; } else if(node == NULL) { crm_debug_2("%s cannot be master: not allocated", rsc->id); return NULL; } else if(can_run_resources(node) == FALSE) { crm_debug_2("Node cant run any resources: %s", node->details->uname); return NULL; } get_clone_variant_data(clone_data, rsc->parent); local_node = pe_find_node_id( rsc->parent->allowed_nodes, node->details->id); if(local_node == NULL) { crm_err("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); return NULL; } else if(local_node->count < clone_data->master_node_max) { return local_node; } else { crm_debug_2("%s cannot be master on %s: node full", rsc->id, node->details->uname); } return NULL; } static gint sort_master_instance(gconstpointer a, gconstpointer b) { int rc; const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); rc = sort_rsc_priority(a, b); if( rc != 0 ) { return rc; } if(resource1->role > resource2->role) { return -1; } else if(resource1->role < resource2->role) { return 1; } return sort_clone_instance(a, b); } node_t * master_color(resource_t *rsc, pe_working_set_t *data_set) { int len = 0; int promoted = 0; node_t *chosen = NULL; node_t *cons_node = NULL; char *attr_name = NULL; const char *attr_value = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); clone_color(rsc, data_set); /* count now tracks the number of masters allocated */ slist_iter(node, node_t, rsc->allowed_nodes, lpc, node->count = 0; ); /* * assign priority */ slist_iter( child_rsc, resource_t, clone_data->child_list, lpc, crm_debug_2("Assigning priority for %s", child_rsc->id); chosen = child_rsc->allocated_to; if(chosen == NULL) { continue; } else if(child_rsc->role == RSC_ROLE_STARTED) { child_rsc->role = RSC_ROLE_SLAVE; } switch(child_rsc->next_role) { case RSC_ROLE_STARTED: if(NO_MASTER_PREFS) { child_rsc->priority = clone_data->clone_max - lpc; break; } child_rsc->priority = -1; CRM_CHECK(chosen != NULL, break); len = 8 + strlen(child_rsc->id); crm_malloc0(attr_name, len); sprintf(attr_name, "master-%s", child_rsc->id); crm_debug_2("looking for %s on %s", attr_name, chosen->details->uname); attr_value = g_hash_table_lookup( chosen->details->attrs, attr_name); if(attr_value == NULL) { crm_free(attr_name); len = 8 + strlen(child_rsc->long_name); crm_malloc0(attr_name, len); sprintf(attr_name, "master-%s", child_rsc->long_name); crm_debug_2("looking for %s on %s", attr_name, chosen->details->uname); attr_value = g_hash_table_lookup( chosen->details->attrs, attr_name); } if(attr_value != NULL) { crm_debug("%s=%s for %s", attr_name, crm_str(attr_value), chosen->details->uname); child_rsc->priority = char2score( attr_value); } crm_free(attr_name); break; case RSC_ROLE_SLAVE: case RSC_ROLE_STOPPED: child_rsc->priority = -INFINITY; break; case RSC_ROLE_MASTER: /* the only reason we should be here is if * we're re-creating actions after a stonith */ promoted++; break; default: CRM_CHECK(FALSE/* unhandled */, crm_err("Unknown resource role: %d for %s", child_rsc->next_role, child_rsc->id)); } apply_master_location(child_rsc->rsc_location); apply_master_location(rsc->rsc_location); apply_master_colocation(rsc->rsc_cons); apply_master_colocation(child_rsc->rsc_cons); ); /* sort based on the new "promote" priority */ clone_data->child_list = g_list_sort( clone_data->child_list, sort_master_instance); /* mark the first N as masters */ slist_iter( child_rsc, resource_t, clone_data->child_list, lpc, chosen = NULL; crm_debug_2("Processing %s", child_rsc->id); if(promoted < clone_data->master_max) { chosen = can_be_master(child_rsc); } if(chosen == NULL) { if(child_rsc->next_role == RSC_ROLE_STARTED) { child_rsc->next_role = RSC_ROLE_SLAVE; } continue; } chosen->count++; crm_info("Promoting %s", child_rsc->id); child_rsc->next_role = RSC_ROLE_MASTER; promoted++; add_hash_param(child_rsc->parameters, crm_meta_name("role"), role2text(child_rsc->next_role)); ); crm_info("Promoted %d instances of a possible %d to master", promoted, clone_data->master_max); return NULL; } void master_create_actions(resource_t *rsc, pe_working_set_t *data_set) { action_t *action = NULL; action_t *action_complete = NULL; gboolean any_promoting = FALSE; gboolean any_demoting = FALSE; resource_t *last_promote_rsc = NULL; resource_t *last_demote_rsc = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_debug("Creating actions for %s", rsc->id); /* create actions as normal */ clone_create_actions(rsc, data_set); slist_iter( child_rsc, resource_t, clone_data->child_list, lpc, gboolean child_promoting = FALSE; gboolean child_demoting = FALSE; crm_debug_2("Creating actions for %s", child_rsc->id); child_rsc->cmds->create_actions(child_rsc, data_set); master_update_pseudo_status( child_rsc, &child_demoting, &child_promoting); any_demoting = any_demoting || child_demoting; any_promoting = any_promoting || child_promoting; ); /* promote */ action = promote_action(rsc, NULL, !any_promoting); action_complete = custom_action( rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, NULL, !any_promoting, TRUE, data_set); action->pseudo = TRUE; action->runnable = TRUE; action_complete->pseudo = TRUE; action_complete->runnable = TRUE; action_complete->priority = INFINITY; child_promoting_constraints(clone_data, pe_order_optional, rsc, NULL, last_promote_rsc, data_set); clone_create_notifications(rsc, action, action_complete, data_set); /* demote */ action = demote_action(rsc, NULL, !any_demoting); action_complete = custom_action( rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, NULL, !any_demoting, TRUE, data_set); action_complete->priority = INFINITY; action->pseudo = TRUE; action->runnable = TRUE; action_complete->pseudo = TRUE; action_complete->runnable = TRUE; child_demoting_constraints(clone_data, pe_order_optional, rsc, NULL, last_demote_rsc, data_set); clone_create_notifications(rsc, action, action_complete, data_set); } void master_internal_constraints(resource_t *rsc, pe_working_set_t *data_set) { resource_t *last_rsc = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); clone_internal_constraints(rsc, data_set); /* global demoted before start */ custom_action_order( rsc, demoted_key(rsc), NULL, rsc, start_key(rsc), NULL, pe_order_optional, data_set); /* global started before promote */ custom_action_order( rsc, started_key(rsc), NULL, rsc, promote_key(rsc), NULL, pe_order_optional, data_set); /* global demoted before stop */ custom_action_order( rsc, demoted_key(rsc), NULL, rsc, stop_key(rsc), NULL, pe_order_optional, data_set); /* global demote before demoted */ custom_action_order( rsc, demote_key(rsc), NULL, rsc, demoted_key(rsc), NULL, pe_order_optional, data_set); /* global demoted before promote */ custom_action_order( rsc, demoted_key(rsc), NULL, rsc, promote_key(rsc), NULL, pe_order_optional, data_set); slist_iter( child_rsc, resource_t, clone_data->child_list, lpc, /* child demote before promote */ custom_action_order( child_rsc, demote_key(child_rsc), NULL, child_rsc, promote_key(child_rsc), NULL, pe_order_optional, data_set); child_promoting_constraints(clone_data, pe_order_optional, rsc, child_rsc, last_rsc, data_set); child_demoting_constraints(clone_data, pe_order_optional, rsc, child_rsc, last_rsc, data_set); last_rsc = child_rsc; ); } void master_rsc_colocation_rh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc_rh); + CRM_CHECK(rsc_rh != NULL, return); if(rsc_rh->provisional) { return; } else if(constraint->role_rh == RSC_ROLE_UNKNOWN) { crm_debug_3("Handling %s as a clone colocation", constraint->id); clone_rsc_colocation_rh(rsc_lh, rsc_rh, constraint); return; } CRM_CHECK(rsc_lh != NULL, return); CRM_CHECK(rsc_lh->variant == pe_native, return); crm_info("Processing constraint %s: %d", constraint->id, constraint->score); if(constraint->score < INFINITY) { slist_iter( child_rsc, resource_t, clone_data->child_list, lpc, child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint); ); } else { GListPtr lhs = NULL, rhs = NULL; lhs = rsc_lh->allowed_nodes; slist_iter( child_rsc, resource_t, clone_data->child_list, lpc, crm_info("Processing: %s", child_rsc->id); if(child_rsc->allocated_to != NULL && child_rsc->next_role == constraint->role_rh) { crm_info("Applying: %s %s", child_rsc->id, role2text(child_rsc->next_role)); rhs = g_list_append(rhs, child_rsc->allocated_to); } ); rsc_lh->allowed_nodes = node_list_and(lhs, rhs, FALSE); pe_free_shallow_adv(rhs, FALSE); pe_free_shallow(lhs); } return; } diff --git a/cts/CIB.py.in b/cts/CIB.py.in index cb70a5d14f..30e7a2c411 100644 --- a/cts/CIB.py.in +++ b/cts/CIB.py.in @@ -1,246 +1,247 @@ #!@PYTHON@ '''CTS: Cluster Testing System: CIB generator ''' __copyright__=''' Author: Jia Ming Pan Copyright (C) 2006 International Business Machines ''' from UserDict import UserDict import sys, time, types, syslog, os, struct, string, signal, traceback from CTS import ClusterManager from CM_hb import HeartbeatCM class CIB: cib_option_template = ''' ''' ipaddr_template = ''' ''' hb_ipaddr_template = ''' ''' lsb_resource = ''' ''' dummy_resource_template = ''' ''' clustermon_resource_template = ''' ''' clustermon_location_constraint = ''' ''' master_slave_resource = ''' ''' resource_group_template = '''%s %s %s''' per_node_constraint_template = ''' ''' stonith_resource_template = """ - - + - + - + - + """ cib_template =''' %s %s %s ''' def NextIP(self): fields = string.split(self.CM.Env["IPBase"], '.') fields[3] = str(int(fields[3])+1) ip = string.join(fields, '.') self.CM.Env["IPBase"]=ip return ip def __init__(self, CM): self.CM = CM #make up crm config cib_options = self.cib_option_template % CM.Env["DoFencing"] #create resources and their constraints resources = "" constraints = "" if self.CM.Env["DoBSC"] == 1: cib_options = cib_options + ''' ''' if self.CM.Env["CIBResource"] != 1: # generate cib self.cts_cib = self.cib_template % (cib_options, resources, constraints) return if self.CM.cluster_monitor == 1: resources += self.clustermon_resource_template constraints += self.clustermon_location_constraint ip1=self.NextIP() ip2=self.NextIP() ip3=self.NextIP() ip1_rsc = self.ipaddr_template % (ip1, ip1, ip1, ip1, ip1) ip2_rsc = self.hb_ipaddr_template % (ip2, ip2, ip2, ip2, ip2) ip3_rsc = self.ipaddr_template % (ip3, ip3, ip3, ip3, ip3) resources += self.resource_group_template % (ip1_rsc, ip2_rsc, ip3_rsc) # lsb resource resources += self.lsb_resource # Mirgator resources += self.dummy_resource_template % \ ("migrator", "migrator", "migrator", "migrator") # per node resource fields = string.split(self.CM.Env["IPBase"], '.') for node in self.CM.Env["nodes"]: ip = self.NextIP() per_node_resources = self.ipaddr_template % \ ("rsc_"+node, "rsc_"+node, "rsc_"+node, "rsc_"+node, ip) per_node_constraint = self.per_node_constraint_template % \ ("rsc_"+node, "rsc_"+node, "rsc_"+node, "rsc_"+node, node) resources += per_node_resources constraints += per_node_constraint # fencing resource nodelist = "" len = 0 for node in self.CM.Env["nodes"]: nodelist += node + " " len = len + 1 - stonith_resource = self.stonith_resource_template % (len, nodelist) + + stonith_resource = self.stonith_resource_template % \ + (self.CM.Env["reset"].stonithtype, self.CM.Env["reset"].configName, self.CM.Env["reset"].configValue) resources += stonith_resource #master slave resource resources += self.master_slave_resource % (2*len, 2, len, 1) # generate cib self.cts_cib = self.cib_template % (cib_options, resources, constraints) def cib(self): return self.cts_cib diff --git a/cts/CTSlab.py.in b/cts/CTSlab.py.in index f7238f0806..1a73f24a08 100755 --- a/cts/CTSlab.py.in +++ b/cts/CTSlab.py.in @@ -1,808 +1,823 @@ #!@PYTHON@ '''CTS: Cluster Testing System: Lab environment module ''' __copyright__=''' Copyright (C) 2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from UserDict import UserDict import sys, time, types, string, syslog, random, os, string, signal, traceback from CTS import ClusterManager from CM_hb import HeartbeatCM from CTStests import BSC_AddResource from socket import gethostbyname_ex tests = None cm = None old_handler = None def sig_handler(signum, frame) : if cm != None: cm.log("Interrupted by signal %d"%signum) if signum == 10 and tests != None : tests.summarize() if signum == 15 : sys.exit(1) class ResetMechanism: def reset(self, node): raise ValueError("Abstract class member (reset)") class Stonith(ResetMechanism): - def __init__(self, sttype="external/ssh", parm=None + def __init__(self, sttype="external/ssh", pName=None, pValue=None , path="@sbindir@/stonith"): self.pathname=path - self.configstring=parm + self.configName=pName + self.configValue=pValue self.stonithtype=sttype def reset(self, node): - if self.configstring == None : + if self.configValue == None : config=node else: - config=self.configstring + config=self.configValue cmdstring = "%s -t '%s' -p '%s' '%s' 2>/dev/null" % (self.pathname , self.stonithtype, config, node) return (os.system(cmdstring) == 0) class Stonithd(ResetMechanism): def __init__(self, nodes, sttype = 'external/ssh'): self.sttype = sttype self.nodes = nodes self.query_cmd_pat = '@libdir@/heartbeat/stonithdtest/apitest 0 %s 20000 0' self.reset_cmd_pat = '@libdir@/heartbeat/stonithdtest/apitest 1 %s 20000 0' self.poweron_cmd_pat = '@libdir@/heartbeat/stonithdtest/apitest 2 %s 20000 0' self.poweroff_cmd_pat= '@libdir@/heartbeat/stonithdtest/apitest 3 %s 20000 0' self.lrmd_add_pat = '@libdir@/heartbeat/lrmadmin -A %s stonith ' + sttype + ' NULL hostlist=%s' self.lrmd_start_pat = '@libdir@/heartbeat/lrmadmin -E %s start 0 0 EVERYTIME' self.lrmd_stop_pat = '@libdir@/heartbeat/lrmadmin -E %s stop 0 0 EVERYTIME' self.lrmd_del_pat = '@libdir@/heartbeat/lrmadmin -D %s' self.rsc_id = 'my_stonithd_id' self.command = "@SSH@ -l root -n -x" self.command_noblock = "@SSH@ -f -l root -n -x" self.stonithd_started_nodes = [] self.fail_reason = '' def _remote_exec(self, node, cmnd): return (os.system("%s %s %s > /dev/null" % (self.command, node, cmnd)) == 0) def _remote_readlines(self, node, cmnd): f = os.popen("%s %s %s" % (self.command, node, cmnd)) return f.readlines() def _stonithd_started(self, node): return node in self.stonithd_started_nodes def _start_stonithd(self, node, hosts): hostlist = string.join(hosts, ',') lrmd_add_cmd = self.lrmd_add_pat % (self.rsc_id, hostlist) ret = self._remote_exec(node, lrmd_add_cmd) if not ret:return ret lrmd_start_cmd = self.lrmd_start_pat % self.rsc_id ret = self._remote_exec(node, lrmd_start_cmd) if not ret:return ret self.stonithd_started_nodes.append(node) return 1 def _stop_stonithd(self, node): lrmd_stop_cmd = self.lrmd_stop_pat % self.rsc_id ret = self._remote_exec(node, lrmd_stop_cmd) if not ret:return ret lrmd_del_cmd = self.lrmd_del_pat % self.rsc_id ret = self._remote_exec(node, lrmd_del_cmd) if not ret:return ret self.stonithd_started_nodes.remove(node) return 1 def _do_stonith(self, init_node, target_node, action): stonithd_started = self._stonithd_started(init_node) if not stonithd_started: ret = self._start_stonithd(init_node, [target_node]) if not ret: self.fail_reason = "failed to start stonithd on node %s" % init_node return ret command = "" if action == "RESET": command = self.reset_cmd_pat % target_node elif action == "POWEROFF": command = self.poweroff_cmd_pat % target_node elif action == "POWERON": command = self.poweron_cmd_pat % target_node else: self.fail_reason = "unknown opration type %s" % action return 0 lines = self._remote_readlines(init_node, command) result = "".join(lines) if not stonithd_started: self._stop_stonithd(init_node) index = result.find("result=0") if index == -1: self.fail_reason = "unexpected stonithd status: %s" % result return 0 return 1 # Should we randomly choose a node as init_node here if init_node not specified? def reset(self, init_node, target_node): return self._do_stonith(init_node, target_node, "RESET") def poweron(self, init_node, target_node): return self._do_stonith(init_node, target_node, "POWERON") def poweroff(self, init_node, target_node): return self._do_stonith(init_node, target_node, "POWEROFF") class Logger: TimeFormat = "%b %d %H:%M:%S\t" def __call__(self, lines): raise ValueError("Abstract class member (__call__)") def write(self, line): return self(line.rstrip()) def writelines(self, lines): for s in lines: self.write(s) return 1 def flush(self): return 1 def isatty(self): return None class SysLog(Logger): # http://docs.python.org/lib/module-syslog.html defaultsource="CTS" defaultfacility= syslog.LOG_LOCAL7 map = { "kernel": syslog.LOG_KERN, "user": syslog.LOG_USER, "mail": syslog.LOG_MAIL, "daemon": syslog.LOG_MAIL, "auth": syslog.LOG_AUTH, "lpr": syslog.LOG_LPR, "news": syslog.LOG_NEWS, "uucp": syslog.LOG_UUCP, "cron": syslog.LOG_CRON, "local0": syslog.LOG_LOCAL0, "local1": syslog.LOG_LOCAL1, "local2": syslog.LOG_LOCAL2, "local3": syslog.LOG_LOCAL3, "local4": syslog.LOG_LOCAL4, "local5": syslog.LOG_LOCAL5, "local6": syslog.LOG_LOCAL6, "local7": syslog.LOG_LOCAL7, } def __init__(self, labinfo): if labinfo.has_key("syslogsource"): self.source=labinfo["syslogsource"] else: self.source=SysLog.defaultsource if labinfo.has_key("SyslogFacility"): self.facility=labinfo["SyslogFacility"] if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] else: self.facility=SysLog.defaultfacility syslog.openlog(self.source, 0, self.facility) def setfacility(self, facility): self.facility = facility if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] syslog.closelog() syslog.openlog(self.source, 0, self.facility) def __call__(self, lines): if isinstance(lines, types.StringType): syslog.syslog(lines) else: for line in lines: syslog.syslog(line) def name(self): return "Syslog" class StdErrLog(Logger): def __init__(self, labinfo): pass def __call__(self, lines): t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): sys.__stderr__.writelines([t, lines, "\n"]) else: for line in lines: sys.__stderr__.writelines([t, line, "\n"]) sys.__stderr__.flush() def name(self): return "StdErrLog" class FileLog(Logger): def __init__(self, labinfo, filename=None): if filename == None: filename=labinfo["LogFileName"] self.logfile=filename import os self.hostname = os.uname()[1]+" " self.source = "CTS: " def __call__(self, lines): fd = open(self.logfile, "a") t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): fd.writelines([t, self.hostname, self.source, lines, "\n"]) else: for line in lines: fd.writelines([t, self.hostname, self.source, line, "\n"]) fd.close() def name(self): return "FileLog" class CtsLab(UserDict): '''This class defines the Lab Environment for the Cluster Test System. It defines those things which are expected to change from test environment to test environment for the same cluster manager. It is where you define the set of nodes that are in your test lab what kind of reset mechanism you use, etc. This class is derived from a UserDict because we hold many different parameters of different kinds, and this provides provide a uniform and extensible interface useful for any kind of communication between the user/administrator/tester and CTS. At this point in time, it is the intent of this class to model static configuration and/or environmental data about the environment which doesn't change as the tests proceed. Well-known names (keys) are an important concept in this class. The HasMinimalKeys member function knows the minimal set of well-known names for the class. The following names are standard (well-known) at this time: nodes An array of the nodes in the cluster reset A ResetMechanism object logger An array of objects that log strings... CMclass The type of ClusterManager we are running (This is a class object, not a class instance) RandSeed Random seed. It is a triple of bytes. (optional) HAdir Base directory for HA installation The CTS code ignores names it doesn't know about/need. The individual tests have access to this information, and it is perfectly acceptable to provide hints, tweaks, fine-tuning directions or other information to the tests through this mechanism. ''' def __init__(self, nodes): self.data = {} self["nodes"] = nodes self.MinimalKeys=["nodes", "reset", "logger", "CMclass", "HAdir"] def HasMinimalKeys(self): 'Return TRUE if our object has the minimal set of keys/values in it' result = 1 for key in self.MinimalKeys: if not self.has_key(key): result = None return result def SupplyDefaults(self): if not self.has_key("logger"): self["logger"] = (SysLog(self), StdErrLog(self)) if not self.has_key("reset"): self["reset"] = Stonith() if not self.has_key("CMclass"): self["CMclass"] = HeartbeatCM if not self.has_key("LogFileName"): self["LogFileName"] = "/var/log/ha-log" if not self.has_key("logfacility"): LogFacility = "local7" # # Now set up our random number generator... # self.RandomGen = random.Random() # Get a random seed for the random number generator. if self.has_key("RandSeed"): randseed = self["RandSeed"] self.log("Random seed is: " + str(randseed)) self.RandomGen.seed(str(randseed)) else: randseed = int(time.time()) self.log("Random seed is: " + str(randseed)) self.RandomGen.seed(str(randseed)) def log(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: logfcn(string.strip(args)) def debug(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: if logfcn.name() != "StdErrLog": logfcn("debug: %s" % string.strip(args)) def __setitem__(self, key, value): '''Since this function gets called whenever we modify the dictionary (object), we can (and do) validate those keys that we know how to validate. For the most part, we know how to validate the "MinimalKeys" elements. ''' # # List of nodes in the system # if key == "nodes": self.Nodes = {} for node in value: # I don't think I need the IP address, etc. but this validates # the node name against /etc/hosts and/or DNS, so it's a # GoodThing(tm). try: self.Nodes[node] = gethostbyname_ex(node) except: print node+" not found in DNS... aborting" raise # # Reset Mechanism # elif key == "reset": if not issubclass(value.__class__, ResetMechanism): raise ValueError("'reset' Value must be a subclass" " of ResetMechanism") # # List of Logging Mechanism(s) # elif key == "logger": if len(value) < 1: raise ValueError("Must have at least one logging mechanism") for logger in value: if not callable(logger): raise ValueError("'logger' elements must be callable") self._logfunctions = value # # Cluster Manager Class # elif key == "CMclass": if not issubclass(value, ClusterManager): raise ValueError("'CMclass' must be a subclass of" " ClusterManager") # # Initial Random seed... # #elif key == "RandSeed": # if len(value) != 3: # raise ValueError("'Randseed' must be a 3-element list/tuple") # for elem in value: # if not isinstance(elem, types.IntType): # raise ValueError("'Randseed' list must all be ints") self.data[key] = value def IsValidNode(self, node): 'Return TRUE if the given node is valid' return self.Nodes.has_key(node) def __CheckNode(self, node): "Raise a ValueError if the given node isn't valid" if not self.IsValidNode(node): raise ValueError("Invalid node [%s] in CheckNode" % node) def RandomNode(self): '''Choose a random node from the cluster''' return self.RandomGen.choice(self["nodes"]) def ResetNode(self, node): "Reset a node, (normally) using a hardware mechanism" self.__CheckNode(node) return self["reset"].reset(node) def ResetNode2(self, init_node, target_node, reasons): self.__CheckNode(target_node) stonithd = Stonithd(self["nodes"]) ret = stonithd.reset(init_node, target_node) if not ret: reasons.append(stonithd.fail_reason) return ret def usage(arg): print "Illegal argument " + arg print "usage: " + sys.argv[0] \ + " --directory config-directory" \ + " -D config-directory" \ + " --logfile system-logfile-name" \ + " --trunc (truncate logfile before starting)" \ + " -L system-logfile-name" \ + " --limit-nodes maxnumnodes" \ + " --xmit-loss lost-rate(0.0-1.0)" \ + " --recv-loss lost-rate(0.0-1.0)" \ + " --suppressmonitoring" \ + " --syslog-facility syslog-facility" \ + " --facility syslog-facility" \ + " --choose testcase-name" \ + " --test-ip-base ip" \ + " --oprofile \"whitespace separated list of nodes to oprofile\"" \ + " (-2 |"\ + " -v2 |"\ + " --crm |"\ + " --classic)"\ + " (--populate-resources | -r)" \ + " --resource-can-stop" \ + " --stonith (1 | 0 | yes | no)" \ + + " --stonith-type type" \ + + " --stonith-args name=value" \ + " --standby (1 | 0 | yes | no)" \ + " --fencing (1 | 0 | yes | no)" \ + " --suppress_cib_writes (1 | 0 | yes | no)" \ + " -lstests" \ + " --seed" \ + " [number-of-iterations]" sys.exit(1) # # A little test code... # if __name__ == '__main__': from CTSaudits import AuditList from CTStests import TestList,RandomTests from CTS import Scenario, InitClusterManager, PingFest, PacketLoss, BasicSanityCheck import CM_hb HAdir = "@sysconfdir@/ha.d" LogFile = "/var/log/ha-log-local7" DoStonith = 1 DoStandby = 1 DoFencing = 1 NumIter = 500 SuppressMonitoring = None Version = 1 CIBfilename = None CIBResource = 0 ClobberCIB = 0 LimitNodes = 0 TestCase = None LogFacility = None TruncateLog = 0 ResCanStop = 0 XmitLoss = "0.0" RecvLoss = "0.0" IPBase = "127.0.0.10" SuppressCib = 1 DoBSC = 0 ListTests = 0 HaveSeed = 0 oprofile = None + StonithType = "ssh" + StonithParams = None + StonithParams = "hostlist=dynamic".split('=') # # The values of the rest of the parameters are now properly derived from # the configuration files. # # Stonith is configurable because it's slow, I have a few machines which # don't reboot very reliably, and it can mild damage to your machine if # you're using a real power switch. # # Standby is configurable because the test is very heartbeat specific # and I haven't written the code to set it properly yet. Patches are # being accepted... # Set the signal handler signal.signal(15, sig_handler) signal.signal(10, sig_handler) # Process arguments... skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-D" or args[i] == "--directory": skipthis=1 HAdir = args[i+1] elif args[i] == "-l" or args[i] == "--limit-nodes": skipthis=1 LimitNodes = int(args[i+1]) elif args[i] == "-r" or args[i] == "--populate-resources": CIBResource = 1 elif args[i] == "-L" or args[i] == "--logfile": skipthis=1 LogFile = args[i+1] elif args[i] == "--test-ip-base": skipthis=1 IPBase = args[i+1] elif args[i] == "--oprofile": skipthis=1 oprofile = args[i+1].split(' ') elif args[i] == "--trunc": TruncateLog=1 elif args[i] == "-v2": Version=2 elif args[i] == "-lstests": ListTests=1 elif args[i] == "--stonith": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoStonith=1 elif args[i+1] == "0" or args[i+1] == "no": DoStonith=0 else: usage(args[i+1]) + elif args[i] == "--stonith-type": + StonithType = args[i+1] + skipthis=1 + elif args[i] == "--stonith-args": + StonithParams = args[i+1].split('=') + skipthis=1 elif args[i] == "--suppress-cib-writes": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": SuppressCib=1 elif args[i+1] == "0" or args[i+1] == "no": SuppressCib=0 else: usage(args[i+1]) elif args[i] == "--bsc": DoBSC=1 elif args[i] == "--standby": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoStandby=1 elif args[i+1] == "0" or args[i+1] == "no": DoStandby=0 else: usage(args[i+1]) elif args[i] == "--fencing": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoFencing=1 elif args[i+1] == "0" or args[i+1] == "no": DoFencing=0 else: usage(args[i+1]) elif args[i] == "--suppressmonitoring": SuppressMonitoring = 1 elif args[i] == "--resource-can-stop": ResCanStop = 1 elif args[i] == "-2" or args[i] == "--crm": Version = 2 elif args[i] == "-1" or args[i] == "--classic": Version = 1 elif args[i] == "--clobber-cib" or args[i] == "-c": ClobberCIB = 1 elif args[i] == "--cib-filename": skipthis=1 CIBfilename = args[i+1] elif args[i] == "--xmit-loss": try: float(args[i+1]) except ValueError: print ("--xmit-loss parameter should be float") usage(args[i+1]) skipthis=1 XmitLoss = args[i+1] elif args[i] == "--recv-loss": try: float(args[i+1]) except ValueError: print ("--recv-loss parameter should be float") usage(args[i+1]) skipthis=1 RecvLoss = args[i+1] elif args[i] == "--choose": skipthis=1 TestCase = args[i+1] elif args[i] == "--syslog-facility" or args[i] == "--facility": skipthis=1 LogFacility = args[i+1] elif args[i] == "--seed": skipthis=1 Seed=args[i+1] HaveSeed = 1 else: NumIter=int(args[i]) if not oprofile: oprofile = [] # # This reading of HBconfig here is ugly, and I suppose ought to # be done by the Cluster manager. This would probably mean moving the # list of cluster nodes into the ClusterManager class. A good thought # for our Copious Spare Time in the future... # config = CM_hb.HBConfig(HAdir) node_list = config.Parameters["node"] if DoBSC: NumIter = 2 Version = 2 while len(node_list) > 1: node_list.pop(len(node_list)-1) if LogFacility == None: if config.Parameters.has_key("logfacility"): LogFacility = config.Parameters["logfacility"][0] else: LogFacility = "local7" if LimitNodes > 0: if len(node_list) > LimitNodes: print("Limiting the number of nodes configured=%d (max=%d)" %(len(node_list), LimitNodes)) while len(node_list) > LimitNodes: node_list.pop(len(node_list)-1) + if StonithParams[0] == "hostlist": + StonithParams[1] = string.join(node_list, " ") + # alt_list = [] # for node in node_list: # alt_list.append(string.lower(node)) # node_list = alt_list Environment = CtsLab(node_list) Environment["HAdir"] = HAdir Environment["ClobberCIB"] = ClobberCIB Environment["CIBfilename"] = CIBfilename Environment["CIBResource"] = CIBResource Environment["LogFileName"] = LogFile Environment["DoStonith"] = DoStonith Environment["SyslogFacility"] = LogFacility Environment["DoStandby"] = DoStandby Environment["DoFencing"] = DoFencing Environment["ResCanStop"] = ResCanStop Environment["SuppressMonitoring"] = SuppressMonitoring Environment["XmitLoss"] = XmitLoss Environment["RecvLoss"] = RecvLoss Environment["IPBase"] = IPBase Environment["SuppressCib"] = SuppressCib Environment["DoBSC"] = 0 Environment["use_logd"] = 0 Environment["logfacility"] = LogFacility Environment["oprofile"] = oprofile if config.Parameters.has_key("use_logd"): Environment["use_logd"] = 1 if Version == 2: from CM_LinuxHAv2 import LinuxHAv2 Environment['CMclass']=LinuxHAv2 if HaveSeed: Environment["RandSeed"] = Seed - Environment["reset"] = Stonith(sttype="external/ssh", parm=string.join(node_list, " ")) + Environment["reset"] = Stonith(sttype=StonithType, pName=StonithParams[0], pValue=StonithParams[1]) if DoBSC: Environment["DoBSC"] = 1 Environment["ClobberCIB"] = 1 Environment["CIBResource"] = 0 Environment["logger"] = (FileLog(Environment), StdErrLog(Environment)) scenario = Scenario([ BasicSanityCheck(Environment) ]) else: scenario = Scenario( [ InitClusterManager(Environment), PacketLoss(Environment)]) Environment.SupplyDefaults() # Your basic start up the world type of test scenario... #scenario = Scenario( #[ InitClusterManager(Environment) #, PingFest(Environment)]) # Create the Cluster Manager object cm = Environment['CMclass'](Environment) if TruncateLog: cm.log("Truncating %s" % LogFile) lf = open(LogFile, "w"); if lf != None: lf.truncate(0) lf.close() cm.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ") cm.log("HA configuration directory: " + Environment["HAdir"]) cm.log("System log files: " + Environment["LogFileName"]) cm.log("Enable Stonith: %d" % Environment["DoStonith"]) cm.log("Enable Fencing: %d" % Environment["DoFencing"]) cm.log("Enable Standby: %d" % Environment["DoStandby"]) cm.log("Enable Resources: %d" % Environment["CIBResource"]) if Environment.has_key("SuppressMonitoring") \ and Environment["SuppressMonitoring"]: cm.log("Resource Monitoring is disabled") cm.log("Cluster nodes: ") for node in config.Parameters["node"]: (rc, lines) = cm.rsh.remote_py(node, "os", "system", "@sbindir@/crm_uuid") if not lines: cm.log(" * %s: __undefined_uuid__" % node) else: out=lines[0] out = out[:-1] cm.log(" * %s: %s" % (node, out)) Audits = AuditList(cm) Tests = [] if Environment["DoBSC"]: test = BSC_AddResource(cm) Tests.append(test) elif TestCase != None: for test in TestList(cm): if test.name == TestCase: Tests.append(test) if Tests == []: usage("--choose: No applicable/valid tests chosen") else: Tests = TestList(cm) if ListTests == 1 : cm.log("Total %d tests"%len(Tests)) for test in Tests : cm.log(str(test.name)); sys.exit(0) tests = RandomTests(scenario, cm, Tests, Audits) Environment.RandomTests = tests try : overall, detailed = tests.run(NumIter) except : cm.Env.log("Exception by %s" % sys.exc_info()[0]) for logmethod in Environment["logger"]: traceback.print_exc(50, logmethod) tests.summarize() if tests.Stats["failure"] > 0: sys.exit(tests.Stats["failure"]) elif tests.Stats["success"] != NumIter: cm.Env.log("No failure count but success != requested iterations") sys.exit(1) diff --git a/lib/crm/pengine/unpack.c b/lib/crm/pengine/unpack.c index 3b345fee69..32e6b5056b 100644 --- a/lib/crm/pengine/unpack.c +++ b/lib/crm/pengine/unpack.c @@ -1,1285 +1,1290 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include /* for ONLINESTATUS */ #include #include #include #include gboolean unpack_config(crm_data_t *config, pe_working_set_t *data_set) { const char *name = NULL; const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes( config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, data_set->now); #if CRM_DEPRECATED_SINCE_2_0_1 xml_child_iter_filter( config, a_child, XML_CIB_TAG_NVPAIR, name = crm_element_value(a_child, XML_NVPAIR_ATTR_NAME); value = crm_element_value(a_child, XML_NVPAIR_ATTR_VALUE); if(g_hash_table_lookup(config_hash, name) == NULL) { g_hash_table_insert( config_hash,crm_strdup(name),crm_strdup(value)); } crm_config_err("Creating directly" "beneath has been depreciated since" " 2.0.1", ID(a_child), name); ); #else xml_child_iter_filter( config, a_child, XML_CIB_TAG_NVPAIR, name = crm_element_value(a_child, XML_NVPAIR_ATTR_NAME); crm_config_err("Creating directly" "beneath has been depreciated since" " 2.0.1 and is now disabled", ID(a_child), name); ); #endif verify_pe_options(data_set->config_hash); value = pe_pref(data_set->config_hash, "default-action-timeout"); data_set->transition_idle_timeout = crm_strdup(value); crm_debug("Default action timeout: %s", data_set->transition_idle_timeout); value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "default-resource-failure-stickiness"); data_set->default_resource_fail_stickiness = char2score(value); crm_debug("Default failure stickiness: %d", data_set->default_resource_fail_stickiness); value = pe_pref(data_set->config_hash, "stonith-enabled"); cl_str_to_boolean(value, &data_set->stonith_enabled); crm_debug("STONITH of failed nodes is %s", data_set->stonith_enabled?"enabled":"disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_debug_2("STONITH will %s nodes", data_set->stonith_action); value = pe_pref(data_set->config_hash, "symmetric-cluster"); cl_str_to_boolean(value, &data_set->symmetric_cluster); if(data_set->symmetric_cluster) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "no-quorum-policy"); if(safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if(safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } value = pe_pref(data_set->config_hash, "stop-orphan-resources"); cl_str_to_boolean(value, &data_set->stop_rsc_orphans); crm_debug_2("Orphan resources are %s", data_set->stop_rsc_orphans?"stopped":"ignored"); value = pe_pref(data_set->config_hash, "stop-orphan-actions"); cl_str_to_boolean(value, &data_set->stop_action_orphans); crm_debug_2("Orphan resource actions are %s", data_set->stop_action_orphans?"stopped":"ignored"); value = pe_pref(data_set->config_hash, "remove-after-stop"); cl_str_to_boolean(value, &data_set->remove_after_stop); crm_debug_2("Stopped resources are removed from the status section: %s", data_set->remove_after_stop?"true":"false"); value = pe_pref(data_set->config_hash, "is-managed-default"); cl_str_to_boolean(value, &data_set->is_managed_default); crm_debug_2("By default resources are %smanaged", data_set->is_managed_default?"":"not "); return TRUE; } gboolean unpack_nodes(crm_data_t * xml_nodes, pe_working_set_t *data_set) { node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; gboolean unseen_are_unclean = TRUE; const char *blind_faith = pe_pref( data_set->config_hash, "startup-fencing"); if(crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } xml_child_iter_filter( xml_nodes, xml_obj, XML_CIB_TAG_NODE, new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); crm_debug_3("Processing node %s/%s", uname, id); if(id == NULL) { crm_config_err("Must specify id tag in "); continue; } if(type == NULL) { crm_config_err("Must specify type tag in "); continue; } crm_malloc0(new_node, sizeof(node_t)); if(new_node == NULL) { return FALSE; } new_node->weight = 0; new_node->fixed = FALSE; crm_malloc0(new_node->details, sizeof(struct node_shared_s)); if(new_node->details == NULL) { crm_free(new_node); return FALSE; } crm_debug_3("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if(data_set->stonith_enabled == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } if(type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, data_set); if(crm_is_true(g_hash_table_lookup( new_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", new_node->details->uname); new_node->weight = -INFINITY; new_node->details->standby = TRUE; } data_set->nodes = g_list_append(data_set->nodes, new_node); crm_debug_3("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); ); return TRUE; } gboolean unpack_resources(crm_data_t * xml_resources, pe_working_set_t *data_set) { xml_child_iter( xml_resources, xml_obj, resource_t *new_rsc = NULL; crm_debug_3("Begining unpack... %s", xml_obj?crm_element_name(xml_obj):""); if(common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append( data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if(new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } ); data_set->resources = g_list_sort( data_set->resources, sort_rsc_priority); return TRUE; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(crm_data_t * status, pe_working_set_t *data_set) { const char *id = NULL; const char *uname = NULL; crm_data_t * lrm_rsc = NULL; crm_data_t * attrs = NULL; node_t *this_node = NULL; crm_debug_3("Begining unpack"); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); crm_debug_3("Processing node %s", uname); this_node = pe_find_node_id(data_set->nodes, id); if(uname == NULL) { /* error */ continue; } else if(this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; crm_debug_3("Adding runtime node attrs"); add_node_attrs(attrs, this_node, data_set); crm_debug_3("determining node state"); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || data_set->stonith_enabled) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ crm_debug_3("Processing lrm resource entries"); unpack_lrm_resources(this_node, lrm_rsc, data_set); } ); return TRUE; } static gboolean determine_online_status_no_fencing(crm_data_t * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)){ crm_debug_2("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { } else if(safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join_state); } } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s is partially & un-expectedly down", this_node->details->uname); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(crm_data_t * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(crm_is_true(ccm_state) && safe_str_eq(ha_state, ACTIVESTATUS) && safe_str_eq(crm_state, ONLINESTATUS)) { online = TRUE; if(safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; } } else if(crm_is_true(ccm_state) == FALSE && safe_str_eq(ha_state, DEADSTATUS) && safe_str_eq(crm_state, OFFLINESTATUS) && this_node->details->expected_up == FALSE) { crm_debug("Node %s is down: join_state=%s, expected=%s", this_node->details->uname, crm_str(join_state), crm_str(exp_state)); } else if(this_node->details->expected_up) { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { crm_info("Node %s is comming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } gboolean determine_online_status( crm_data_t * node_state, node_t *this_node, pe_working_set_t *data_set) { int shutdown = 0; gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(this_node == NULL) { crm_config_err("No node to check"); return online; } ha_msg_value_int(node_state, XML_CIB_ATTR_SHUTDOWN, &shutdown); this_node->details->expected_up = FALSE; if(safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } this_node->details->shutdown = FALSE; if(shutdown != 0) { this_node->details->shutdown = TRUE; this_node->details->expected_up = FALSE; } if(data_set->stonith_enabled == FALSE) { online = determine_online_status_no_fencing( node_state, this_node); } else { online = determine_online_status_fencing( node_state, this_node); } if(online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if(this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown?"shutting down": this_node->details->standby?"standby":"online"); } else { crm_debug_2("Node %s is offline", this_node->details->uname); } return online; } #define set_char(x) last_rsc_id[len] = x; complete = TRUE; static void increment_clone(char *last_rsc_id) { gboolean complete = FALSE; int len = 0; CRM_CHECK(last_rsc_id != NULL, return); if(last_rsc_id != NULL) { len = strlen(last_rsc_id); } len--; while(complete == FALSE && len > 0) { switch (last_rsc_id[len]) { case 0: len--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[len] = '0'; len--; break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[len], len); break; } } } static resource_t * create_fake_resource(const char *rsc_id, crm_data_t *rsc_entry, pe_working_set_t *data_set) { resource_t *rsc = NULL; crm_data_t *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); crm_log_xml_info(rsc_entry, "Orphan resource"); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); common_unpack(xml_rsc, &rsc, NULL, data_set); rsc->orphan = TRUE; data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } static resource_t * unpack_find_resource( pe_working_set_t *data_set, node_t *node, const char *rsc_id, crm_data_t *rsc_entry) { resource_t *rsc = NULL; gboolean is_duped_clone = FALSE; char *alt_rsc_id = crm_strdup(rsc_id); while(rsc == NULL) { crm_debug_3("looking for: %s", alt_rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); /* no match */ if(rsc == NULL) { crm_debug_2("%s not found: %d", alt_rsc_id, is_duped_clone); if(is_duped_clone) { /* create one */ rsc = create_fake_resource(alt_rsc_id, rsc_entry, data_set); crm_info("Making sure orphan %s/%s is stopped on %s", rsc_id, rsc->id, node->details->uname); resource_location(rsc, NULL, -INFINITY, "__orphan_clone_dont_run__", data_set); } break; /* not running anywhere else */ } else if(rsc->running_on == NULL) { crm_debug_3("not active yet"); break; /* always unique */ } else if(rsc->globally_unique) { crm_debug_3("unique"); break; /* running somewhere already but we dont care * find another clone instead */ } else { crm_debug_2("find another one"); rsc = NULL; is_duped_clone = TRUE; increment_clone(alt_rsc_id); } } crm_free(alt_rsc_id); if(rsc != NULL) { crm_free(rsc->clone_name); rsc->clone_name = NULL; if(is_duped_clone) { crm_info("Internally renamed %s on %s to %s", rsc_id, node->details->uname, rsc->id); rsc->clone_name = crm_strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(crm_data_t *rsc_entry, node_t *node, pe_working_set_t *data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_log_xml_info(rsc_entry, "Orphan resource"); crm_config_warn("Nothing known about resource %s running on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if(data_set->stop_rsc_orphans == FALSE) { rsc->is_managed = FALSE; } else { crm_info("Making sure orphan %s is stopped", rsc_id); print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t *rsc, node_t *node, enum action_fail_response on_fail, crm_data_t *migrate_op, pe_working_set_t *data_set) { int fail_count = 0; char *fail_attr = NULL; const char *value = NULL; GHashTable *meta_hash = NULL; if(on_fail == action_migrate_failure) { node_t *from = NULL; const char *uuid = NULL; uuid = crm_element_value(migrate_op, CRMD_ACTION_MIGRATED); from = pe_find_node_id(data_set->nodes, uuid); process_rsc_state(rsc, from, action_fail_recover,NULL,data_set); on_fail = action_fail_recover; } crm_debug_2("Resource %s is %s on %s", rsc->id, role2text(rsc->role), node->details->uname); meta_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_meta_attributes(meta_hash, rsc, node, data_set); /* update resource preferences that relate to the current node */ value = g_hash_table_lookup(meta_hash, "resource_stickiness"); if(value != NULL && safe_str_neq("default", value)) { rsc->stickiness = char2score(value); } else { rsc->stickiness = data_set->default_resource_stickiness; } value = g_hash_table_lookup(meta_hash, XML_RSC_ATTR_FAIL_STICKINESS); if(value != NULL && safe_str_neq("default", value)) { rsc->fail_stickiness = char2score(value); } else { rsc->fail_stickiness = data_set->default_resource_fail_stickiness; } /* process failure stickiness */ fail_attr = crm_concat("fail-count", rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, fail_attr); if(value != NULL) { crm_debug("%s: %s", fail_attr, value); fail_count = crm_parse_int(value, "0"); } crm_free(fail_attr); if(fail_count > 0 && rsc->fail_stickiness != 0) { resource_location(rsc, node, fail_count * rsc->fail_stickiness, "fail_stickiness", data_set); crm_debug("Setting failure stickiness for %s on %s: %d", rsc->id, node->details->uname, fail_count * rsc->fail_stickiness); } /* process current state */ if(rsc->role != RSC_ROLE_UNKNOWN) { rsc->known_on = g_list_append(rsc->known_on, node); } if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if(on_fail != action_fail_ignore) { rsc->failed = TRUE; crm_debug_2("Force stop"); } native_add_running(rsc, node, data_set); if(rsc->is_managed && rsc->stickiness != 0) { resource_location(rsc, node, rsc->stickiness, "stickiness", data_set); crm_debug_2("Resource %s: preferring current location" " (node=%s, weight=%d)", rsc->id, node->details->uname, rsc->stickiness); } if(on_fail == action_fail_ignore) { /* nothing to do */ } else if(node->details->unclean) { stop_action(rsc, node, FALSE); } else if(on_fail == action_fail_fence) { /* treat it as if it is still running * but also mark the node as unclean */ node->details->unclean = TRUE; stop_action(rsc, node, FALSE); } else if(on_fail == action_fail_block) { /* is_managed == FALSE will prevent any * actions being sent for the resource */ rsc->is_managed = FALSE; } else if(on_fail == action_fail_migrate) { stop_action(rsc, node, FALSE); /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__",data_set); } else { stop_action(rsc, node, FALSE); } } else if(rsc->clone_name) { crm_debug_2("Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); crm_free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); slist_iter(stop, action_t, possible_matches, lpc, stop->optional = TRUE; ); crm_free(key); } g_hash_table_destroy(meta_hash); } /* create active recurring operations as optional */ static void process_recurring(node_t *node, resource_t *rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t *data_set) { const char *task = NULL; const char *status = NULL; crm_debug_3("%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); slist_iter(rsc_op, crm_data_t, sorted_op_list, lpc, int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; if(node->details->online == FALSE) { crm_debug_4("Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; } else if(start_index < stop_index) { crm_debug_4("Skipping %s/%s: not active", rsc->id, node->details->uname); break; } else if(lpc <= start_index) { crm_debug_4("Skipping %s/%s: old", id, node->details->uname); continue; } interval_s = crm_element_value(rsc_op,XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0) { crm_debug_4("Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(status, "-1")) { crm_debug_4("Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); crm_debug_3("Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); ); } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; slist_iter( rsc_op, crm_data_t, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = lpc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { *start_index = lpc; } else if(*start_index <= *stop_index && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if(safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { *start_index = lpc; } } ); } static void unpack_lrm_rsc_state( node_t *node, crm_data_t * rsc_entry, pe_working_set_t *data_set) { int stop_index = -1; int start_index = -1; int max_call_id = -1; const char *task = NULL; const char *value = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; crm_data_t *migrate_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_debug_3("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if(rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ max_call_id = -1; saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter( rsc_op, crm_data_t, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &max_call_id, &on_fail, data_set); ); /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if(value != NULL && safe_str_neq("default", value)) { enum rsc_role_e req_role = text2role(value); if(req_role != RSC_ROLE_UNKNOWN && req_role != rsc->next_role){ if(rsc->next_role != RSC_ROLE_UNKNOWN) { crm_debug("%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } rsc->next_role = req_role; } } if(saved_role > rsc->role) { rsc->role = saved_role; } } gboolean unpack_lrm_resources(node_t *node, crm_data_t * lrm_rsc_list, pe_working_set_t *data_set) { CRM_CHECK(node != NULL, return FALSE); crm_debug_3("Unpacking resources on %s", node->details->uname); xml_child_iter_filter( lrm_rsc_list, rsc_entry, XML_LRM_TAG_RESOURCE, unpack_lrm_rsc_state(node, rsc_entry, data_set); ); return TRUE; } gboolean unpack_rsc_op(resource_t *rsc, node_t *node, crm_data_t *xml_op, int *max_call_id, enum action_fail_response *on_fail, pe_working_set_t *data_set) { const char *id = NULL; const char *task = NULL; const char *task_id = NULL; const char *actual_rc = NULL; /* const char *target_rc = NULL; */ const char *task_status = NULL; const char *interval_s = NULL; const char *op_digest = NULL; int interval = 0; int task_id_i = -1; int task_status_i = -2; int actual_rc_i = 0; action_t *action = NULL; gboolean is_probe = FALSE; gboolean is_stop_action = FALSE; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); op_digest = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(task_status != NULL, return FALSE); task_status_i = crm_parse_int(task_status, NULL); CRM_CHECK(task_status_i <= LRM_OP_ERROR, return FALSE); CRM_CHECK(task_status_i >= LRM_OP_PENDING, return FALSE); if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } crm_debug_2("Unpacking task %s/%s (call_id=%s, status=%s) on %s (role=%s)", id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if(safe_str_eq(task, CRMD_ACTION_STOP)) { is_stop_action = TRUE; } if(task_status_i != LRM_OP_PENDING) { task_id_i = crm_parse_int(task_id, "-1"); CRM_CHECK(task_id != NULL, return FALSE); CRM_CHECK(task_id_i >= 0, return FALSE); CRM_CHECK(task_id_i > *max_call_id, return FALSE); } if(*max_call_id < task_id_i) { *max_call_id = task_id_i; } if(node->details->unclean) { crm_debug_2("Node %s (where %s is running) is unclean." " Further action depends on the value of %s", node->details->uname, rsc->id, XML_RSC_ATTR_STOPFAIL); } actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); CRM_CHECK(actual_rc != NULL, return FALSE); actual_rc_i = crm_parse_int(actual_rc, NULL); if(EXECRA_NOT_INSTALLED == actual_rc_i) { resource_location(rsc, node, -INFINITY, "not-installed", data_set); if(is_probe) { /* treat these like stops */ is_stop_action = TRUE; task_status_i = LRM_OP_DONE; actual_rc_i = EXECRA_NOT_RUNNING; } else { task_status_i = LRM_OP_ERROR; } } else if(EXECRA_NOT_RUNNING == actual_rc_i) { if(is_probe) { /* treat these like stops */ is_stop_action = TRUE; } if(is_stop_action) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; } } else if(EXECRA_RUNNING_MASTER == actual_rc_i) { if(is_probe || (rsc->role == RSC_ROLE_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS))) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; if(rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", id, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; } else if(EXECRA_FAILED_MASTER == actual_rc_i) { rsc->role = RSC_ROLE_MASTER; task_status_i = LRM_OP_ERROR; } else if(EXECRA_OK == actual_rc_i && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ task_status_i = LRM_OP_ERROR; + + } else if(task_status_i == LRM_OP_DONE && EXECRA_OK != actual_rc_i) { + crm_err("Remapping %s (rc=%d) on %s to an ERROR", + id, actual_rc_i, node->details->uname); + task_status_i = LRM_OP_ERROR; } if(task_status_i == LRM_OP_ERROR || task_status_i == LRM_OP_TIMEOUT || task_status_i == LRM_OP_NOTSUPPORTED) { action = custom_action(rsc, crm_strdup(id), task, NULL, TRUE, FALSE, data_set); if(action->on_fail == action_fail_ignore) { task_status_i = LRM_OP_DONE; } } switch(task_status_i) { case LRM_OP_PENDING: if(safe_str_eq(task, CRMD_ACTION_START)) { rsc->start_pending = TRUE; rsc->role = RSC_ROLE_STARTED; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } break; case LRM_OP_DONE: crm_debug_3("%s/%s completed on %s", rsc->id, task, node->details->uname); if(is_stop_action) { rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { crm_debug_3("%s active on %s", rsc->id, node->details->uname); rsc->role = RSC_ROLE_STARTED; } break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Processing failed op (%s) on %s", id, node->details->uname); if(*on_fail < action->on_fail) { *on_fail = action->on_fail; } if(task_status_i == LRM_OP_NOTSUPPORTED || is_stop_action || safe_str_eq(task, CRMD_ACTION_START) ) { crm_warn("Handling failed %s for %s on %s", task, rsc->id, node->details->uname); resource_location(rsc, node, -INFINITY, "__dont_run__failed_stopstart__", data_set); } if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(rsc->role < RSC_ROLE_STARTED) { rsc->role = RSC_ROLE_STARTED; } crm_debug_2("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean?"true":"false", fail2text(action->on_fail), role2text(action->fail_role)); if(action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if(action->fail_role == RSC_ROLE_STOPPED) { crm_err("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ pe_free_shallow_adv(rsc->allowed_nodes, TRUE); rsc->allowed_nodes = node_list_dup( data_set->nodes, FALSE, FALSE); slist_iter( node, node_t, rsc->allowed_nodes, lpc, node->weight = -INFINITY; ); } pe_free_action(action); action = NULL; break; case LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } crm_debug_3("Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); pe_free_action(action); return TRUE; } gboolean add_node_attrs(crm_data_t *xml_obj, node_t *node, pe_working_set_t *data_set) { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_UNAME), crm_strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_ID), crm_strdup(node->details->id)); if(safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes( xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, data_set->now); return TRUE; }