diff --git a/crmd/control.c b/crmd/control.c
index cee8a70588..242eaa2c57 100644
--- a/crmd/control.c
+++ b/crmd/control.c
@@ -1,916 +1,919 @@
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 
 #include <crm/pengine/rules.h>
 #include <crm/common/cluster.h>
+#include "../lib/common/stack.h"
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <fsa_proto.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 #include <tengine.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
 
 
 char *ipc_server = NULL;
 
 extern gboolean crm_connect_corosync(void);
 extern void crmd_ha_connection_destroy(gpointer user_data);
 
 void crm_shutdown(int nsig);
 gboolean crm_read_options(gpointer user_data);
 
 gboolean      fsa_has_quorum = FALSE;
 GHashTable   *ipc_clients = NULL;
 crm_trigger_t  *fsa_source = NULL;
 crm_trigger_t  *config_read = NULL;
 
 /*	 A_HA_CONNECT	*/
 void
 do_ha_control(long long action,
 	       enum crmd_fsa_cause cause,
 	       enum crmd_fsa_state cur_state,
 	       enum crmd_fsa_input current_input,
 	       fsa_data_t *msg_data)
 {
 	gboolean registered = FALSE;
 	
 	if(action & A_HA_DISCONNECT) {
 	    if(is_openais_cluster()) {
 		crm_peer_destroy();
+		terminate_ais_connection();
 		crm_info("Disconnected from OpenAIS");
+
 #if SUPPORT_HEARTBEAT
 	    } else if(fsa_cluster_conn != NULL) {
 		set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED);
 		fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE);
 		crm_info("Disconnected from Heartbeat");
 #endif
 	    }
 	}
 	
 	if(action & A_HA_CONNECT) {
 	    crm_set_status_callback(&ais_status_callback);
 
 	    if(is_openais_cluster()) {
 #if SUPPORT_COROSYNC
 		registered = crm_connect_corosync();
 #endif
 	    } else if(is_heartbeat_cluster()) {
 #if SUPPORT_HEARTBEAT
 		registered = crm_cluster_connect(
 		    &fsa_our_uname, &fsa_our_uuid, crmd_ha_msg_callback, crmd_ha_connection_destroy,
 		    &fsa_cluster_conn);
 #endif
 	    }
 	    
 	    
 #if SUPPORT_HEARTBEAT
 	    if(is_heartbeat_cluster()) {	
 		crm_debug_3("Be informed of Node Status changes");
 		if (registered &&
 		    fsa_cluster_conn->llc_ops->set_nstatus_callback(
 			fsa_cluster_conn, crmd_ha_status_callback,
 			fsa_cluster_conn) != HA_OK){
 		    
 		    crm_err("Cannot set nstatus callback: %s",
 			    fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn));
 		    registered = FALSE;
 		}
 		
 		crm_debug_3("Be informed of CRM Client Status changes");
 		if (registered &&
 		    fsa_cluster_conn->llc_ops->set_cstatus_callback(
 			fsa_cluster_conn, crmd_client_status_callback,
 			fsa_cluster_conn) != HA_OK) {
 		    
 		    crm_err("Cannot set cstatus callback: %s",
 			    fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn));
 		    registered = FALSE;
 		}
 
 		if(registered) {
 		    crm_debug_3("Requesting an initial dump of CRMD client_status");
 		    fsa_cluster_conn->llc_ops->client_status(
 			fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1);
 		}
 	    }
 #endif
 
 	    if(registered == FALSE) {
 		set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED);
 		register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 		return;
 	    }
 
 	    clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED);
 	    crm_info("Connected to the cluster");
 	} 
 	
 	if(action & ~(A_HA_CONNECT|A_HA_DISCONNECT)) {
 		crm_err("Unexpected action %s in %s",
 		       fsa_action2string(action), __FUNCTION__);
 	}
 }
 
 /*	 A_SHUTDOWN	*/
 void
 do_shutdown(long long action,
 	    enum crmd_fsa_cause cause,
 	    enum crmd_fsa_state cur_state,
 	    enum crmd_fsa_input current_input,
 	    fsa_data_t *msg_data)
 {
 	/* just in case */
 	set_bit_inplace(fsa_input_register, R_SHUTDOWN);
 
 	if(is_heartbeat_cluster()) {
 	    if(is_set(fsa_input_register, pe_subsystem->flag_connected)) {
 		crm_info("Terminating the %s", pe_subsystem->name);
 		if(stop_subsystem(pe_subsystem, TRUE) == FALSE) {
 		    /* its gone... */
 		    crm_err("Faking %s exit", pe_subsystem->name);
 		    clear_bit_inplace(fsa_input_register,
 				      pe_subsystem->flag_connected);
 		} else {
 		    crm_info("Waiting for subsystems to exit");
 		    crmd_fsa_stall(NULL);
 		}
 	    }
 	    crm_info("All subsystems stopped, continuing");
 	}
 
 	if(stonith_api) {
 	    /* Prevent it from comming up again */
 	    clear_bit_inplace(fsa_input_register, R_ST_REQUIRED);
 
 	    crm_info("Disconnecting STONITH...");
 	    stonith_api->cmds->disconnect(stonith_api);
 	}
 }
 
 /*	 A_SHUTDOWN_REQ	*/
 void
 do_shutdown_req(long long action,
 	    enum crmd_fsa_cause cause,
 	    enum crmd_fsa_state cur_state,
 	    enum crmd_fsa_input current_input,
 	    fsa_data_t *msg_data)
 {
 	xmlNode *msg = NULL;
  	
 	crm_info("Sending shutdown request to DC: %s", crm_str(fsa_our_dc));
 	msg = create_request(
 		CRM_OP_SHUTDOWN_REQ, NULL, NULL,
 		CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
  
 /* 	set_bit_inplace(fsa_input_register, R_STAYDOWN); */
 	if(send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
 	    register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 	}
 	free_xml(msg);
 }
 
 extern char *max_generation_from;
 extern xmlNode *max_generation_xml;
 extern GHashTable *resources;
 extern GHashTable *voted;
 extern GHashTable *reload_hash;
 
 void log_connected_client(gpointer key, gpointer value, gpointer user_data);
 
 void
 log_connected_client(gpointer key, gpointer value, gpointer user_data)
 {
 	crmd_client_t *client = value;
 	crm_err("%s is still connected at exit", client->table_key);
 }
 
 
 static void free_mem(fsa_data_t *msg_data) 
 {
 	g_main_loop_quit(crmd_mainloop);
 	g_main_loop_unref(crmd_mainloop);
 	
 #if SUPPORT_HEARTBEAT
 	if(fsa_cluster_conn) {
 		fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn);
 		fsa_cluster_conn = NULL;
 	}
 #endif	
 	slist_destroy(fsa_data_t, fsa_data, fsa_message_queue, 
 		      crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
 			       fsa_input2string(fsa_data->fsa_input),
 			       fsa_state2string(fsa_state),
 			       fsa_cause2string(fsa_data->fsa_cause),
 			       fsa_data->origin);
 		      delete_fsa_input(fsa_data);
 		);
 	delete_fsa_input(msg_data);
 
 	if(ipc_clients) {
 		crm_debug("Number of connected clients: %d",
 			  g_hash_table_size(ipc_clients));
 /* 		g_hash_table_foreach(ipc_clients, log_connected_client, NULL); */
 		g_hash_table_destroy(ipc_clients);
 	}
 
 	empty_uuid_cache();
 	crm_peer_destroy();
 	clear_bit_inplace(fsa_input_register, R_CCM_DATA);
 
 	if(te_subsystem->client && te_subsystem->client->client_source) {
 		crm_debug("Full destroy: TE");
 		G_main_del_IPC_Channel(te_subsystem->client->client_source);
 	} else {
 		crm_debug("Partial destroy: TE");
 		crmd_ipc_connection_destroy(te_subsystem->client);
 	}
 	crm_free(te_subsystem);
 	
 	if(pe_subsystem->client && pe_subsystem->client->client_source) {
 		crm_debug("Full destroy: PE");
 		G_main_del_IPC_Channel(pe_subsystem->client->client_source);
 	} else {
 		crm_debug("Partial destroy: PE");
 		crmd_ipc_connection_destroy(pe_subsystem->client);
 	}
 	crm_free(pe_subsystem);
 	
 	crm_free(cib_subsystem);
 	
 	if(integrated_nodes) {
 		g_hash_table_destroy(integrated_nodes);
 	}
 	if(finalized_nodes) {
 		g_hash_table_destroy(finalized_nodes);
 	}
 	if(confirmed_nodes) {
 		g_hash_table_destroy(confirmed_nodes);
 	}
 	if(reload_hash) {
 		g_hash_table_destroy(reload_hash);
 	}
 	if(resources) {
 		g_hash_table_destroy(resources);
 	}
 	if(voted) {
 		g_hash_table_destroy(voted);
 	}
 
 	cib_delete(fsa_cib_conn);
 	fsa_cib_conn = NULL;
 
 	if(fsa_lrm_conn) {
 		fsa_lrm_conn->lrm_ops->delete(fsa_lrm_conn);
 	}
 	
 	crm_free(integration_timer);
 	crm_free(finalization_timer);
 	crm_free(election_trigger);
 	crm_free(election_timeout);
 	crm_free(shutdown_escalation_timer);
 	crm_free(wait_timer);
 	crm_free(recheck_timer);
 
 	crm_free(fsa_our_dc_version);
 	crm_free(fsa_our_uname);
 	crm_free(fsa_our_uuid);
 	crm_free(fsa_our_dc);
 	crm_free(ipc_server);
 
  	crm_free(max_generation_from);
  	free_xml(max_generation_xml);
 
 	crm_xml_cleanup();
 }
 
 /*	 A_EXIT_0, A_EXIT_1	*/
 void
 do_exit(long long action,
 	enum crmd_fsa_cause cause,
 	enum crmd_fsa_state cur_state,
 	enum crmd_fsa_input current_input,
 	fsa_data_t *msg_data)
 {
 	int exit_code = 0;
 	int log_level = LOG_INFO;
 	const char *exit_type = "gracefully";
 	
 	if(action & A_EXIT_1) {
 		exit_code = 1;
 		log_level = LOG_ERR;
 		exit_type = "forcefully";
 	}
 	
 	verify_stopped(cur_state, LOG_ERR);
 	do_crm_log(log_level, "Performing %s - %s exiting the CRMd",
 		      fsa_action2string(action), exit_type);
 	
 	if(is_set(fsa_input_register, R_IN_RECOVERY)) {
 		crm_err("Could not recover from internal error");
 		exit_code = 2;		
 	} 
 	if(is_set(fsa_input_register, R_STAYDOWN)) {
 		crm_warn("Inhibiting respawn by Heartbeat");
 		exit_code = 100;
 	}
 
 	free_mem(msg_data);
 	
 	crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
 	cl_flush_logs();
 	exit(exit_code);
 }
 
 /*	 A_STARTUP	*/
 void
 do_startup(long long action,
 	   enum crmd_fsa_cause cause,
 	   enum crmd_fsa_state cur_state,
 	   enum crmd_fsa_input current_input,
 	   fsa_data_t *msg_data)
 {
 	int was_error = 0;
 	int interval = 1; /* seconds between DC heartbeats */
 
 	crm_debug("Registering Signal Handlers");
 	mainloop_add_signal(SIGTERM, crm_shutdown);
 
 	fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
 	config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
 
 	ipc_clients = g_hash_table_new(g_str_hash, g_str_equal);
 	
 	crm_debug("Creating CIB and LRM objects");
 	fsa_cib_conn = cib_new();
 	fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM);	
 	
 	/* set up the timers */
 	crm_malloc0(integration_timer, sizeof(fsa_timer_t));
 	crm_malloc0(finalization_timer, sizeof(fsa_timer_t));
 	crm_malloc0(election_trigger, sizeof(fsa_timer_t));
 	crm_malloc0(election_timeout, sizeof(fsa_timer_t));
 	crm_malloc0(shutdown_escalation_timer, sizeof(fsa_timer_t));
 	crm_malloc0(wait_timer, sizeof(fsa_timer_t));
 	crm_malloc0(recheck_timer, sizeof(fsa_timer_t));
 
 	interval = interval * 1000;
 
 	if(election_trigger != NULL) {
 		election_trigger->source_id = 0;
 		election_trigger->period_ms = -1;
 		election_trigger->fsa_input = I_DC_TIMEOUT;
 		election_trigger->callback = crm_timer_popped;
 		election_trigger->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(election_timeout != NULL) {
 		election_timeout->source_id = 0;
 		election_timeout->period_ms = -1;
 		election_timeout->fsa_input = I_ELECTION_DC;
 		election_timeout->callback = crm_timer_popped;
 		election_timeout->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(integration_timer != NULL) {
 		integration_timer->source_id = 0;
 		integration_timer->period_ms = -1;
 		integration_timer->fsa_input = I_INTEGRATED;
 		integration_timer->callback = crm_timer_popped;
 		integration_timer->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(finalization_timer != NULL) {
 		finalization_timer->source_id = 0;
 		finalization_timer->period_ms = -1;
 		finalization_timer->fsa_input = I_FINALIZED;
 		finalization_timer->callback = crm_timer_popped;
 		finalization_timer->repeat = FALSE;
 		/* for possible enabling... a bug in the join protocol left
 		 *    a slave in S_PENDING while we think its in S_NOT_DC
 		 *
 		 * raising I_FINALIZED put us into a transition loop which is
 		 *    never resolved.
 		 * in this loop we continually send probes which the node
 		 *    NACK's because its in S_PENDING
 		 *
 		 * if we have nodes where heartbeat is active but the
 		 *    CRM is not... then this will be handled in the
 		 *    integration phase
 		 */
 		finalization_timer->fsa_input = I_ELECTION;
 
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(shutdown_escalation_timer != NULL) {
 		shutdown_escalation_timer->source_id = 0;
 		shutdown_escalation_timer->period_ms = -1;
 		shutdown_escalation_timer->fsa_input = I_STOP;
 		shutdown_escalation_timer->callback = crm_timer_popped;
 		shutdown_escalation_timer->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(wait_timer != NULL) {
 		wait_timer->source_id = 0;
 		wait_timer->period_ms = 2000;
 		wait_timer->fsa_input = I_NULL;
 		wait_timer->callback = crm_timer_popped;
 		wait_timer->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 
 	if(recheck_timer != NULL) {
 		recheck_timer->source_id = 0;
 		recheck_timer->period_ms = -1;
 		recheck_timer->fsa_input = I_PE_CALC;
 		recheck_timer->callback = crm_timer_popped;
 		recheck_timer->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	/* set up the sub systems */
 	crm_malloc0(cib_subsystem, sizeof(struct crm_subsystem_s));
 	crm_malloc0(te_subsystem,  sizeof(struct crm_subsystem_s));
 	crm_malloc0(pe_subsystem,  sizeof(struct crm_subsystem_s));
 
 	if(cib_subsystem != NULL) {
 		cib_subsystem->pid      = -1;	
 		cib_subsystem->path     = CRM_DAEMON_DIR;
 		cib_subsystem->name     = CRM_SYSTEM_CIB;
 		cib_subsystem->command  = CRM_DAEMON_DIR"/"CRM_SYSTEM_CIB;
 		cib_subsystem->args     = "-VVc";
 		cib_subsystem->flag_connected = R_CIB_CONNECTED;	
 		cib_subsystem->flag_required  = R_CIB_REQUIRED;	
 
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(te_subsystem != NULL) {
 		te_subsystem->pid      = -1;	
 		te_subsystem->path     = CRM_DAEMON_DIR;
 		te_subsystem->name     = CRM_SYSTEM_TENGINE;
 		te_subsystem->command  = CRM_DAEMON_DIR"/"CRM_SYSTEM_TENGINE;
 		te_subsystem->args     = NULL;
 		te_subsystem->flag_connected = R_TE_CONNECTED;	
 		te_subsystem->flag_required  = R_TE_REQUIRED;	
 		
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(pe_subsystem != NULL) {
 		pe_subsystem->pid      = -1;	
 		pe_subsystem->path     = CRM_DAEMON_DIR;
 		pe_subsystem->name     = CRM_SYSTEM_PENGINE;
 		pe_subsystem->command  = CRM_DAEMON_DIR"/"CRM_SYSTEM_PENGINE;
 		pe_subsystem->args     = NULL;
 		pe_subsystem->flag_connected = R_PE_CONNECTED;	
 		pe_subsystem->flag_required  = R_PE_REQUIRED;	
 		
 	} else {
 		was_error = TRUE;
 	}
 
 	if(was_error) {
 		register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 	}
 	
 	welcomed_nodes = g_hash_table_new_full(
 		g_str_hash, g_str_equal,
 		g_hash_destroy_str, g_hash_destroy_str);
 	integrated_nodes = g_hash_table_new_full(
 		g_str_hash, g_str_equal,
 		g_hash_destroy_str, g_hash_destroy_str);
 	finalized_nodes = g_hash_table_new_full(
 		g_str_hash, g_str_equal,
 		g_hash_destroy_str, g_hash_destroy_str);
 	confirmed_nodes = g_hash_table_new_full(
 		g_str_hash, g_str_equal,
 		g_hash_destroy_str, g_hash_destroy_str);
 
 	set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME);
 }
 
 /*	 A_STOP	*/
 void
 do_stop(long long action,
 	enum crmd_fsa_cause cause,
 	enum crmd_fsa_state cur_state,
 	enum crmd_fsa_input current_input,
 	fsa_data_t *msg_data)
 {
     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 }
 
 /*	 A_STARTED	*/
 void
 do_started(long long action,
 	   enum crmd_fsa_cause cause,
 	   enum crmd_fsa_state cur_state,
 	   enum crmd_fsa_input current_input,
 	   fsa_data_t *msg_data)
 {
 	if(cur_state != S_STARTING) {
 	    crm_err("Start cancelled... %s", fsa_state2string(cur_state));
 	    return;
 	    
 	} else if(is_set(fsa_input_register, R_CCM_DATA) == FALSE) {
 		crm_info("Delaying start, no membership data (%.16llx)", R_CCM_DATA);
 
 		crmd_fsa_stall(NULL);
 		return;
 
 	} else if(is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
 		crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED);
 
 		crmd_fsa_stall(NULL);
 		return;
 
 	} else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
 		crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
 
 		crmd_fsa_stall(NULL);
 		return;
 
 	} else if(is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
 		crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
 
 		crmd_fsa_stall(NULL);
 		return;
 
 	} else if(is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
 		HA_Message *msg = NULL;
 
 		/* try reading from HA */
 		crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
 
 		crm_debug_3("Looking for a HA message");
 #if SUPPORT_HEARTBEAT
 		if(is_heartbeat_cluster()) {
 		    msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0);
 		}
 #endif		
 		if(msg != NULL) {
 			crm_debug_3("There was a HA message");
  			crm_msg_del(msg);
 		}
 		crmd_fsa_stall(NULL);
 		return;
 	}
 	
 	crm_debug("Init server comms");
 	if(ipc_server == NULL) {
 		ipc_server = crm_strdup(CRM_SYSTEM_CRMD);
 	}
 
 	if(init_server_ipc_comms(ipc_server, crmd_client_connect,
 				 default_ipc_connection_destroy)) {
 	    crm_err("Couldn't start IPC server");
 	    register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 	}
 
 	if(stonith_reconnect == NULL) {
 	    int dummy;
 	    stonith_reconnect = mainloop_add_trigger(
 		G_PRIORITY_LOW, te_connect_stonith, &dummy);
 	}
 	set_bit_inplace(fsa_input_register, R_ST_REQUIRED);
 	mainloop_set_trigger(stonith_reconnect);
 	
 	crm_info("The local CRM is operational");
 	clear_bit_inplace(fsa_input_register, R_STARTING);
 	register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
 }
 
 /*	 A_RECOVER	*/
 void
 do_recover(long long action,
 	   enum crmd_fsa_cause cause,
 	   enum crmd_fsa_state cur_state,
 	   enum crmd_fsa_input current_input,
 	   fsa_data_t *msg_data)
 {
 	set_bit_inplace(fsa_input_register, R_IN_RECOVERY);
 	crm_err("Action %s (%.16llx) not supported",
 	       fsa_action2string(action), action);
 
 	register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 }
 
 pe_cluster_option crmd_opts[] = {
 	/* name, old-name, validate, default, description */
 	{ "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact Mercurial changeset it was built from.  Used for diagnostic purposes." },
 	{ "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." },
 	{ XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "60s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." },
 	{ XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time",
 	  "Zero disables polling.  Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer,
 	  "Polling interval for time based changes to options, resource parameters and constraints.",
 	  "The Cluster is primarily event driven, however the configuration can have elements that change based on time."
 	  "  To ensure these changes take effect, we can optionally poll the cluster's status for changes." },
 	{ XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." },
 	{ XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." },
 	{ "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." },
 	{ "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." },
 	{ XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." },
 };
 
 void
 crmd_metadata(void)
 {
 	config_metadata("CRM Daemon", "1.0",
 			"CRM Daemon Options",
 			"This is a fake resource that details the options that can be configured for the CRM Daemon.",
 			crmd_opts, DIMOF(crmd_opts));
 }
 
 static void
 verify_crmd_options(GHashTable *options)
 {
 	verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
 }
 
 static const char *
 crmd_pref(GHashTable *options, const char *name)
 {
 	return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
 }
 
 static void
 config_query_callback(xmlNode *msg, int call_id, int rc,
 		      xmlNode *output, void *user_data) 
 {
 	const char *value = NULL;
 	GHashTable *config_hash = NULL;
 	ha_time_t *now = new_ha_date(TRUE);
 
 	if(rc != cib_ok) {
 		fsa_data_t *msg_data = NULL;
 		crm_err("Local CIB query resulted in an error: %s",
 			cib_error2string(rc));
 		register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 
 		if(rc == cib_bad_permissions
 		   || rc == cib_dtd_validation
 		   || rc == cib_bad_digest
 		   || rc == cib_bad_config) {
 			crm_err("The cluster is mis-configured - shutting down and staying down");
 			set_bit_inplace(fsa_input_register, R_STAYDOWN);
 		}
 		goto bail;
 	}
 
 	crm_debug("Call %d : Parsing CIB options", call_id);
 	config_hash = g_hash_table_new_full(
 		g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str);
 
 	unpack_instance_attributes(
 		output, output, XML_CIB_TAG_PROPSET, NULL, config_hash,
 		CIB_OPTIONS_FIRST, FALSE, now);
 	
 	verify_crmd_options(config_hash);
 
 	value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
 	election_trigger->period_ms = crm_get_msec(value);
 	
 	value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
 	shutdown_escalation_timer->period_ms = crm_get_msec(value);
 	crm_info("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms);
 
 	value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
 	election_timeout->period_ms = crm_get_msec(value);
 	
 	value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
 	recheck_timer->period_ms = crm_get_msec(value);
 	crm_info("Checking for expired actions every %dms", recheck_timer->period_ms);
 
 	value = crmd_pref(config_hash, "crmd-integration-timeout");
 	integration_timer->period_ms  = crm_get_msec(value);
 
 	value = crmd_pref(config_hash, "crmd-finalization-timeout");
 	finalization_timer->period_ms = crm_get_msec(value);
 
 #if SUPPORT_COROSYNC
 	if(is_classic_ais_cluster()) {
 	    value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES);
 	    crm_info("Sending expected-votes=%s to corosync", value);
 	    send_ais_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais);	
 	}
 #endif
 	
 	set_bit_inplace(fsa_input_register, R_READ_CONFIG);
 	crm_debug_3("Triggering FSA: %s", __FUNCTION__);
 	mainloop_set_trigger(fsa_source);
 	
 	g_hash_table_destroy(config_hash);
   bail:
 	free_ha_date(now);
 }
 
 gboolean
 crm_read_options(gpointer user_data)
 {
     int call_id = fsa_cib_conn->cmds->query(
 	fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local);
     
     add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, config_query_callback);
     crm_debug_2("Querying the CIB... call %d", call_id);
     return TRUE;
 }
 
 /*	 A_READCONFIG	*/
 void
 do_read_config(long long action,
 	       enum crmd_fsa_cause cause,
 	       enum crmd_fsa_state cur_state,
 	       enum crmd_fsa_input current_input,
 	       fsa_data_t *msg_data)
 {
     mainloop_set_trigger(config_read);	    
 }
 
 
 void
 crm_shutdown(int nsig)
 {
 	if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) {
 		if(is_set(fsa_input_register, R_SHUTDOWN)) {
 			crm_err("Escalating the shutdown");
 			register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
 
 		} else {
 			crm_info("Requesting shutdown");
 			set_bit_inplace(fsa_input_register, R_SHUTDOWN);
 			register_fsa_input(C_SHUTDOWN,I_SHUTDOWN,NULL);
 
 			if(shutdown_escalation_timer->period_ms < 1) {
 				const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
 				int msec = crm_get_msec(value);
 				crm_info("Using default shutdown escalation: %dms", msec);
 				shutdown_escalation_timer->period_ms = msec;
 			}
 
 			/* cant rely on this... */
 			crm_notice("Forcing shutdown in: %dms", shutdown_escalation_timer->period_ms);
 			crm_timer_start(shutdown_escalation_timer);
 		}
 		
 	} else {
 		crm_info("exit from shutdown");
 		exit(LSB_EXIT_OK);
 	    
 	}
 }
 
 static void
 default_cib_update_callback(xmlNode *msg, int call_id, int rc,
 			    xmlNode *output, void *user_data) 
 {
 	if(rc != cib_ok) {
 		fsa_data_t *msg_data = NULL;
 		crm_err("CIB Update failed: %s", cib_error2string(rc));
 		crm_log_xml_warn(output, "update:failed");
 		
 		register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 	}
 }
 
 #if SUPPORT_HEARTBEAT
 static void
 populate_cib_nodes_ha(gboolean with_client_status)
 {
 	int call_id = 0;
 	const char *ha_node = NULL;
 	xmlNode *cib_node_list = NULL;
 
 	if(fsa_cluster_conn == NULL) {
 	    crm_debug("Not connected");
 	    return;
 	}
 	
 	/* Async get client status information in the cluster */
 	crm_info("Requesting the list of configured nodes");
 	fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn);
 
 	cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
 	do {
 		const char *ha_node_type = NULL;
 		const char *ha_node_uuid = NULL;
 		xmlNode *cib_new_node = NULL;
 
 		ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn);
 		if(ha_node == NULL) {
 			continue;
 		}
 		
 		ha_node_type = fsa_cluster_conn->llc_ops->node_type(
 			fsa_cluster_conn, ha_node);
 		if(safe_str_neq(NORMALNODE, ha_node_type)) {
 			crm_debug("Node %s: skipping '%s'",
 				  ha_node, ha_node_type);
 			continue;
 		}
 
 		ha_node_uuid = get_uuid(ha_node);
 		if(ha_node_uuid == NULL) {
 			crm_warn("Node %s: no uuid found", ha_node);
 			continue;	
 		}
 		
 		crm_debug("Node: %s (uuid: %s)", ha_node, ha_node_uuid);
 		cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE);
 		crm_xml_add(cib_new_node, XML_ATTR_ID,    ha_node_uuid);
 		crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node);
 		crm_xml_add(cib_new_node, XML_ATTR_TYPE,  ha_node_type);
 
 	} while(ha_node != NULL);
 
 	fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn);
 	
 	/* Now update the CIB with the list of nodes */
 	fsa_cib_update(
 		XML_CIB_TAG_NODES, cib_node_list,
 		cib_scope_local|cib_quorum_override, call_id, NULL);
 	add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback);
 
 	free_xml(cib_node_list);
 	crm_debug_2("Complete");
 }
 
 #endif
 
 static void create_cib_node_definition(
     gpointer key, gpointer value, gpointer user_data)
 {
     crm_node_t *node = value;
     xmlNode *cib_nodes = user_data;
     xmlNode *cib_new_node = NULL;
     
     cib_new_node = create_xml_node(cib_nodes, XML_CIB_TAG_NODE);
     crm_xml_add(cib_new_node, XML_ATTR_ID,    node->uuid);
     crm_xml_add(cib_new_node, XML_ATTR_UNAME, node->uname);
     crm_xml_add(cib_new_node, XML_ATTR_TYPE,  NORMALNODE);
 }
 
 void
 populate_cib_nodes(gboolean with_client_status)
 {
     int call_id = 0;
     xmlNode *cib_node_list = NULL;
 #if SUPPORT_HEARTBEAT
     if(is_heartbeat_cluster()) {
 	populate_cib_nodes_ha(with_client_status);
 	return;
     }
 #endif	
 
     cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
     g_hash_table_foreach(
 	crm_peer_cache, create_cib_node_definition, cib_node_list);    
     
     fsa_cib_update(
 	XML_CIB_TAG_NODES, cib_node_list, cib_scope_local|cib_quorum_override, call_id, NULL);
     add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback);
     
     free_xml(cib_node_list);
     crm_debug_2("Complete");
 }
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
index 06cbc3f0d3..fb78a5376b 100644
--- a/crmd/te_utils.c
+++ b/crmd/te_utils.c
@@ -1,393 +1,390 @@
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 #include <crm/common/msg.h>
 #include <crm/common/xml.h>
 #include <tengine.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 
 GCHSource *stonith_src = NULL;
 crm_trigger_t *stonith_reconnect = NULL;
 
 static gboolean
 fail_incompletable_stonith(crm_graph_t *graph) 
 {
     GListPtr lpc = NULL;
     const char *task = NULL;
     xmlNode *last_action = NULL;
 
     if(graph == NULL) {
 	return FALSE;
     }
     
     for(lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
 	GListPtr lpc2 = NULL;
 	synapse_t *synapse = (synapse_t*)lpc->data;    
 	if (synapse->confirmed) {
 	    continue;
 	}
 
 	for(lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
 	    crm_action_t *action = (crm_action_t*)lpc2->data;
 	
 
 	    if(action->type != action_type_crm || action->confirmed) {
 		continue;
 	    }
 
 	    task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
 	    if(task && safe_str_eq(task, CRM_OP_FENCE)) {
 		action->failed = TRUE;
 		last_action = action->xml;
 		update_graph(graph, action);
 		crm_notice("Failing action %d (%s): STONITHd terminated",
 			   action->id, ID(action->xml));
 	    }
 	}
     }
     
     if(last_action != NULL) {
 	crm_warn("STONITHd failure resulted in un-runnable actions");
 	abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
 	return TRUE;
     }
 	
     return FALSE;
 }
 
 static void
 tengine_stonith_connection_destroy(stonith_t *st, const char *event, xmlNode *msg)
 {
     if(is_set(fsa_input_register, R_ST_REQUIRED)) {
 	crm_crit("Fencing daemon connection failed");	
 	mainloop_set_trigger(stonith_reconnect);
 
     } else {
 	crm_info("Fencing daemon disconnected");
     }
 
     /* cbchan will be garbage at this point, arrange for it to be reset */
     stonith_api->state = stonith_disconnected;
 
     if(AM_I_DC) {
 	fail_incompletable_stonith(transition_graph);
 	trigger_graph();
     }
 }
 
 /*
 <notify t="st_notify" subt="st_fence" st_op="st_fence" st_rc="0" >
   <st_calldata >
     <st-reply st_origin="stonith_construct_reply" t="stonith-ng" st_rc="0" st_op="st_query" st_callid="0" st_clientid="09fcbd8b-156a-4727-ab37-4f8b2071847c" st_remote_op="1230801d-dba5-42ac-8e2c-bf444fb2a401" st_callopt="0" st_delegate="pcmk-4" >
       <st_calldata >
         <st-reply st_origin="stonith_construct_async_reply" t="stonith-ng" st_op="reboot" st_remote_op="1230801d-dba5-42ac-8e2c-bf444fb2a401" st_callid="0" st_callopt="0" st_rc="0" src="pcmk-4" seq="2" state="0" st_target="pcmk-1" />
 */
 #ifdef SUPPORT_CMAN
 #  include <libfenced.h>
 #  include "../lib/common/stack.h"
 #endif
 
 static void
 tengine_stonith_notify(stonith_t *st, const char *event, xmlNode *msg)
 {
     int rc = -99;
     const char *origin = NULL;
     const char *target = NULL;
     const char *executioner = NULL;
     xmlNode *action = get_xpath_object("//st-data", msg, LOG_ERR);
 
     if(action == NULL) {
 	crm_log_xml(LOG_ERR, "Notify data not found", msg);
 	return;
     }
     
     crm_log_xml(LOG_DEBUG, "stonith_notify", msg);
     crm_element_value_int(msg, F_STONITH_RC, &rc);
     origin = crm_element_value(action, F_STONITH_ORIGIN);
     target = crm_element_value(action, F_STONITH_TARGET);
     executioner = crm_element_value(action, F_STONITH_DELEGATE);
-
-    if(rc == stonith_ok) {
+    
+    if(rc == stonith_ok && crm_str_eq(target, fsa_our_uname, TRUE)) {
+	crm_err("We were alegedly just fenced by %s for %s!", executioner, origin);
+	register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
+	
+    } else if(rc == stonith_ok) {
 	crm_info("Peer %s was terminated (%s) by %s for %s (ref=%s): %s",
 		 target, 
 		 crm_element_value(action, F_STONITH_OPERATION),
 		 executioner, origin,
 		 crm_element_value(action, F_STONITH_REMOTE),
 		 stonith_error2string(rc));
     } else {
 	crm_err("Peer %s could not be terminated (%s) by %s for %s (ref=%s): %s",
 		target, 
 		crm_element_value(action, F_STONITH_OPERATION),
 		executioner?executioner:"<anyone>", origin,
 		crm_element_value(action, F_STONITH_REMOTE),
 		stonith_error2string(rc));
     }
 
 #ifdef SUPPORT_CMAN
     if(rc == stonith_ok && is_cman_cluster()) {
 	int rc = 0;
 	char *target_copy = crm_strdup(target);
 	crm_info("Notifing CMAN that '%s' is now fenced", target);
 
-	rc = fenced_join();
-	if(rc != 0) {
-	    crm_notice("Could not connect to fenced: rc=%d", rc);
-
-	} else {
-	    rc = fenced_external(target_copy);
-	    if(rc != 0) {
-		crm_err("Could not notify fenced: rc=%d", rc);
-	    }
-	    fenced_leave();
-	}
-	crm_free(target_copy);
+        rc = fenced_external(target_copy);
+        if(rc != 0) {
+            crm_err("Could not notify fenced that '%s' is down: rc=%d", target, rc);
+        }
+        crm_free(target_copy);
     }
 #endif
     
     if(rc == stonith_ok && safe_str_eq(target, origin)) {
 	if(fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, target)) {
 	    const char *uuid = get_uuid(target);
 	    crm_notice("Target was our leader %s/%s (recorded leader: %s)",
 		       target, uuid, fsa_our_dc?fsa_our_dc:"<unset>");
 	    /* There's no need for everyone to update the cib.
 	     * Have the node that performed the op do the update too.
 	     * In the unlikely event that both die, the DC would be
 	     *   shot a second time which is not ideal but safe.
 	     */
 	    if(safe_str_eq(executioner, fsa_our_uname)) {
 		send_stonith_update(NULL, target, uuid);
 	    }
 	}
     }
 }
 
 gboolean
 te_connect_stonith(gpointer user_data)
 {
 	int lpc = 0;
 	int rc = stonith_ok;
 
 	if(stonith_api == NULL) {
 	    stonith_api = stonith_api_new();
 	}
 
 	if(stonith_api->state != stonith_disconnected) {
 	    crm_debug_2("Still connected");
 	    return TRUE;
 	}
 	
 	for(lpc = 0; lpc < 30; lpc++) {
 	    crm_info("Attempting connection to fencing daemon...");
 	    
 	    sleep(1);
 	    rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL, NULL);
 	    
 	    if(rc == stonith_ok) {
 		break;
 	    }
 	    
 	    if(user_data != NULL) {
 		crm_err("Sign-in failed: triggered a retry");
 		mainloop_set_trigger(stonith_reconnect);
 		return TRUE;
 	    } 
 
 	    crm_err("Sign-in failed: pausing and trying again in 2s...");
 	    sleep(1);
 	}
 	
 	CRM_CHECK(rc == stonith_ok, return TRUE); /* If not, we failed 30 times... just get out */
 	stonith_api->cmds->register_notification(
 	    stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy);
 
 	stonith_api->cmds->register_notification(
 	    stonith_api, STONITH_OP_FENCE, tengine_stonith_notify);
 
 	crm_info("Connected");
 	return TRUE;
 }
 
 gboolean
 stop_te_timer(crm_action_timer_t *timer)
 {
 	const char *timer_desc = "action timer";
 	
 	if(timer == NULL) {
 		return FALSE;
 	}
 	if(timer->reason == timeout_abort) {
 		timer_desc = "global timer";
 		crm_debug_2("Stopping %s", timer_desc);
 	}
 	
 	if(timer->source_id != 0) {
 		crm_debug_2("Stopping %s", timer_desc);
 		g_source_remove(timer->source_id);
 		timer->source_id = 0;
 
 	} else {
 		crm_debug_2("%s was already stopped", timer_desc);
 		return FALSE;
 	}
 
 	return TRUE;
 }
 
 gboolean
 te_graph_trigger(gpointer user_data) 
 {
     enum transition_status graph_rc = -1;
     if(transition_graph == NULL) {
 	crm_debug("Nothing to do");
 	return TRUE;
     }
     
     crm_debug_2("Invoking graph %d in state %s",
 	      transition_graph->id, fsa_state2string(fsa_state));
 
     switch(fsa_state) {
 	case S_STARTING:
 	case S_PENDING:
 	case S_NOT_DC:
 	case S_HALT:
 	case S_ILLEGAL:
 	case S_STOPPING:
 	case S_TERMINATE:
 	    return TRUE;
 	    break;
 	default:
 	    break;
     }
     
     if(transition_graph->complete == FALSE) {
 	graph_rc = run_graph(transition_graph);
 	print_graph(LOG_DEBUG_3, transition_graph);
 
 	if(graph_rc == transition_active) {
 		crm_debug_3("Transition not yet complete");
 		return TRUE;		
 
 	} else if(graph_rc == transition_pending) {
 		crm_debug_3("Transition not yet complete - no actions fired");
 		return TRUE;		
 	}
 	
 	if(graph_rc != transition_complete) {
 		crm_err("Transition failed: %s", transition_status(graph_rc));
 		print_graph(LOG_WARNING, transition_graph);
 	}
     }
     
     crm_info("Transition %d is now complete", transition_graph->id);
     transition_graph->complete = TRUE;
     notify_crmd(transition_graph);
     
     return TRUE;	
 }
 
 void
 trigger_graph_processing(const char *fn, int line) 
 {
 	mainloop_set_trigger(transition_trigger);
 	crm_debug_2("%s:%d - Triggered graph processing", fn, line);
 }
 
 void
 abort_transition_graph(
 	int abort_priority, enum transition_action abort_action,
 	const char *abort_text, xmlNode *reason, const char *fn, int line) 
 {
 	int log_level = LOG_INFO;
 	const char *magic = NULL;
 	CRM_CHECK(transition_graph != NULL, return);
 	
 	if(reason) {
 	    int diff_add_updates     = 0;
 	    int diff_add_epoch       = 0;
 	    int diff_add_admin_epoch = 0;
 	    
 	    int diff_del_updates     = 0;
 	    int diff_del_epoch       = 0;
 	    int diff_del_admin_epoch = 0;
 	    xmlNode *diff = get_xpath_object("//"F_CIB_UPDATE_RESULT"//diff", reason, LOG_DEBUG_2);
 	    magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
 
 	    if(diff) {
 		cib_diff_version_details(
 		    diff,
 		    &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, 
 		    &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
 		do_crm_log(log_level,
 			   "%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s",
 			   fn, line, transition_graph->complete, TYPE(reason), ID(reason), magic?magic:"NA",
 			   diff_add_admin_epoch,diff_add_epoch,diff_add_updates, abort_text);
 		
 	    } else {
 		do_crm_log(log_level,
 			   "%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s) : %s",
 			   fn, line, transition_graph->complete, TYPE(reason), ID(reason), magic?magic:"NA", abort_text);
 	    }
 
 	} else {
 	    do_crm_log(log_level,
 		       "%s:%d - Triggered transition abort (complete=%d) : %s",
 		       fn, line, transition_graph->complete, abort_text);
 	}
 	
 	switch(fsa_state) {
 	    case S_STARTING:
 	    case S_PENDING:
 	    case S_NOT_DC:
 	    case S_HALT:
 	    case S_ILLEGAL:
 	    case S_STOPPING:
 	    case S_TERMINATE:
 		do_crm_log(log_level,
 			   "Abort suppressed: state=%s (complete=%d)",
 			   fsa_state2string(fsa_state), transition_graph->complete);
 		return;
 	    default:
 		break;
 	}
 
 	if(magic == NULL && reason != NULL) {
 	    crm_log_xml(log_level+1, "Cause", reason);
 	}
 	
 	/* Make sure any queued calculations are discarded ASAP */
 	crm_free(fsa_pe_ref);
 	fsa_pe_ref = NULL;
 	
 	if(transition_graph->complete) {
 	    register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
 	    return;
 	}
 
 	update_abort_priority(
 		transition_graph, abort_priority, abort_action, abort_text);	
 	
 	mainloop_set_trigger(transition_trigger);
 }
 
diff --git a/cts/CM_ais.py b/cts/CM_ais.py
index ead4555753..b22c5b91a5 100644
--- a/cts/CM_ais.py
+++ b/cts/CM_ais.py
@@ -1,353 +1,351 @@
 '''CTS: Cluster Testing System: AIS dependent modules...
 '''
 
 __copyright__='''
 Copyright (C) 2007 Andrew Beekhof <andrew@suse.de>
 
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 import os, sys, warnings
 from cts.CTSvars import *
 from cts.CM_lha  import crm_lha
 from cts.CTS     import Process
 
 #######################################################################
 #
 #  LinuxHA v2 dependent modules
 #
 #######################################################################
 
 class crm_ais(crm_lha):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, Environment, randseed=None):
         crm_lha.__init__(self, Environment, randseed=randseed)
 
         self.update({
             "Name"           : "crm-ais",
 
-            "UUIDQueryCmd"   : "crmadmin -N --openais",
             "EpocheCmd"      : "crm_node -e --openais",
             "QuorumCmd"      : "crm_node -q --openais",
             "ParitionCmd"    : "crm_node -p --openais",
 
             "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new",            
             "Pat:ChildExit"    : "Child process .* exited",
 
             # Bad news Regexes.  Should never occur.
             "BadRegexes"   : (
                 r"ERROR:",
                 r"CRIT:",
                 r"TRACE:",
                 r"Shutting down\.",
                 r"Forcing shutdown\.",
                 r"Timer I_TERMINATE just popped",
                 r"input=I_ERROR",
                 r"input=I_FAIL",
                 r"input=I_INTEGRATED cause=C_TIMER_POPPED",
                 r"input=I_FINALIZED cause=C_TIMER_POPPED",
                 r"input=I_ERROR",
                 r", exiting\.",
                 r"WARN.*Ignoring HA message.*vote.*not in our membership list",
                 r"pengine.*Attempting recovery of resource",
                 r"is taking more than 2x its timeout",
                 r"Confirm not received from",
                 r"Welcome reply not received from",
                 r"Attempting to schedule .* after a stop",
                 r"Resource .* was active at shutdown",
                 r"duplicate entries for call_id",
                 r"Search terminated:",
                 r":global_timer_callback",
                 r"Faking parameter digest creation",
                 r"Parameters to .* action changed:",
                 r"Parameters to .* changed",
                 r"Child process .* terminated with signal 11",
                 r"Executing .* fencing operation",
 
                 # Not inherently bad, but worth tracking
                 r"No need to invoke the TE",
                 r"ping.*: DEBUG: Updated connected = 0",
                 r"Digest mis-match:",
             ),
         })
 
     def errorstoignore(self):
         # At some point implement a more elegant solution that 
         #   also produces a report at the end
         '''Return list of errors which are known and very noisey should be ignored'''
         if 1:
             return [ 
                 "crm_mon:",
                 "crmadmin:",
                 "update_trace_data",
                 "async_notify: strange, client not found",
                 "ERROR: Message hist queue is filling up"
                 ]
         return []
 
     def NodeUUID(self, node):
         return node
 
     def ais_components(self):   
         fullcomplist = {}
         self.complist = []
         self.common_ignore = [
                     "Pending action:",
                     "ERROR: crm_log_message_adv:",
                     "ERROR: MSG: No message to dump",
                     "pending LRM operations at shutdown",
                     "Lost connection to the CIB service",
                     "Connection to the CIB terminated...",
                     "Sending message to CIB service FAILED",
                     "apply_xml_diff: Diff application failed!",
                     "crmd: .*Action A_RECOVER .* not supported",
                     "pingd: .*ERROR: send_update: Could not send update",
                     "send_ipc_message: IPC Channel to .* is not connected",
                     "unconfirmed_actions: Waiting on .* unconfirmed actions",
                     "cib_native_msgready: Message pending on command channel",
                     "crmd:.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd",
                     "verify_stopped: Resource .* was active at shutdown.  You may ignore this error if it is unmanaged.",
                     "ERROR: attrd_connection_destroy: Lost connection to attrd",
                     "nfo: te_fence_node: Executing .* fencing operation",
             ]
 
         fullcomplist["cib"] = Process(self, "cib", pats = [
                     "State transition .* S_RECOVERY",
                     "Respawning .* crmd",
                     "Respawning .* attrd",
                     "Lost connection to the CIB service",
                     "Connection to the CIB terminated...",
                     "Child process crmd exited .* rc=2",
                     "Child process attrd exited .* rc=1",
                     "crmd: .*Input I_TERMINATE from do_recover",
                     "crmd: .*I_ERROR.*crmd_cib_connection_destroy",
                     "crmd:.*do_exit: Could not recover from internal error",
                     ], badnews_ignore = self.common_ignore)
 
         fullcomplist["lrmd"] = Process(self, "lrmd", pats = [
                     "State transition .* S_RECOVERY",
                     "LRM Connection failed",
                     "Respawning .* crmd",
                     "crmd: .*I_ERROR.*lrm_connection_destroy",
                     "Child process crmd exited .* rc=2",
                     "crmd: .*Input I_TERMINATE from do_recover",
                     "crmd:.*do_exit: Could not recover from internal error",
                     ], badnews_ignore = self.common_ignore)
 
         fullcomplist["crmd"] = Process(self, "crmd", pats = [
 #                    "WARN: determine_online_status: Node .* is unclean",
 #                    "Scheduling Node .* for STONITH",
 #                    "Executing .* fencing operation",
 # Only if the node wasn't the DC:  "State transition S_IDLE",
                     "State transition .* -> S_IDLE",
                     ], badnews_ignore = self.common_ignore)
 
         fullcomplist["attrd"] = Process(self, "attrd", pats = [
                     "crmd: .*ERROR: attrd_connection_destroy: Lost connection to attrd"
                     ], badnews_ignore = self.common_ignore)
 
         fullcomplist["pengine"] = Process(self, "pengine", dc_pats = [
                     "State transition .* S_RECOVERY",
                     "Respawning .* crmd",
                     "Child process crmd exited .* rc=2",
                     "crmd: .*pe_connection_destroy: Connection to the Policy Engine failed",
                     "crmd: .*I_ERROR.*save_cib_contents",
                     "crmd: .*Input I_TERMINATE from do_recover",
                     "crmd:.*do_exit: Could not recover from internal error",
                     ], badnews_ignore = self.common_ignore)
 
         stonith_ignore = [
             "update_failcount: Updating failcount for child_DoFencing",
             "ERROR: te_connect_stonith: Sign-in failed: triggered a retry",
             ]
         
         stonith_ignore.extend(self.common_ignore)
         
         fullcomplist["stonith-ng"] = Process(self, "stonith-ng", process="stonithd", pats = [
                 "CRIT: stonith_dispatch: Lost connection to the STONITH service",
                 "tengine_stonith_connection_destroy: Fencing daemon connection failed",
                 "Attempting connection to fencing daemon",
                 "te_connect_stonith: Connected",
                 ], badnews_ignore = stonith_ignore)
         
         vgrind = self.Env["valgrind-procs"].split()
         for key in fullcomplist.keys():
             if self.Env["valgrind-tests"]:
                 if key in vgrind:
                     # Processes running under valgrind can't be shot with "killall -9 processname"
                     self.log("Filtering %s from the component list as it is being profiled by valgrind" % key)
                     continue
             if key == "stonith-ng" and not self.Env["DoFencing"]:
                 continue
                 
             self.complist.append(fullcomplist[key])
 
         #self.complist = [ fullcomplist["pengine"] ]
         return self.complist
 
 class crm_whitetank(crm_ais):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, Environment, randseed=None):
         crm_ais.__init__(self, Environment, randseed=randseed)
 
         self.update({
             "Name"           : "crm-whitetank",
             "StartCmd"       : CTSvars.INITDIR+"/openais start",
             "StopCmd"        : CTSvars.INITDIR+"/openais stop",
 
             "Pat:We_stopped"   : "%s.*openais.*pcmk_shutdown: Shutdown complete",
             "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new",
             "Pat:They_dead"    : "openais:.*Node %s is now: lost",
             
             "Pat:ChildKilled"  : "%s openais.*Child process %s terminated with signal 9",
             "Pat:ChildRespawn" : "%s openais.*Respawning failed child process: %s",
             "Pat:ChildExit"    : "Child process .* exited",
         })
 
     def Components(self):    
         self.ais_components()
 
         aisexec_ignore = [
                     "ERROR: ais_dispatch: Receiving message .* failed",
                     "crmd: .*I_ERROR.*crmd_cib_connection_destroy",
                     "cib: .*ERROR: cib_ais_destroy: AIS connection terminated",
                     #"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated",
                     "crmd:.*do_exit: Could not recover from internal error",
                     "crmd: .*I_TERMINATE.*do_recover",
                     "attrd: .*CRIT: attrd_ais_destroy: Lost connection to OpenAIS service!",
                     "stonithd: .*ERROR: AIS connection terminated",
             ]
 
         aisexec_ignore.extend(self.common_ignore)
 
         self.complist.append(Process(self, "aisexec", pats = [
                     "ERROR: ais_dispatch: AIS connection failed",
                     "crmd: .*ERROR: do_exit: Could not recover from internal error",
                     "pengine: .*Scheduling Node .* for STONITH",
                     "stonithd: .*requests a STONITH operation RESET on node",
                     "stonithd: .*Succeeded to STONITH the node",
                     ], badnews_ignore = aisexec_ignore))
         
 class crm_flatiron(crm_ais):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, Environment, randseed=None):
         crm_ais.__init__(self, Environment, randseed=randseed)
 
         self.update({
             "Name"           : "crm-flatiron",
             "StartCmd"       : "service corosync start",
             "StopCmd"        : "service corosync stop",
 
 # The next pattern is too early
 #            "Pat:We_stopped"   : "%s.*Service engine unloaded: Pacemaker Cluster Manager",
 # The next pattern would be preferred, but it doesn't always come out
 #            "Pat:We_stopped"   : "%s.*Corosync Cluster Engine exiting with status",
             "Pat:We_stopped"  : "%s.*Service engine unloaded: corosync cluster quorum service",
             "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new",
             "Pat:They_dead"    : "corosync:.*Node %s is now: lost",
             
             "Pat:ChildKilled"  : "%s corosync.*Child process %s terminated with signal 9",
             "Pat:ChildRespawn" : "%s corosync.*Respawning failed child process: %s",
         })
 
     def Components(self):    
         self.ais_components()
 
         corosync_ignore = [
                     "ERROR: ais_dispatch: Receiving message .* failed",
                     "crmd: .*I_ERROR.*crmd_cib_connection_destroy",
                     "cib: .*ERROR: cib_ais_destroy: AIS connection terminated",
                     #"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated",
                     "crmd:.*do_exit: Could not recover from internal error",
                     "crmd: .*I_TERMINATE.*do_recover",
                     "attrd: .*CRIT: attrd_ais_destroy: Lost connection to Corosync service!",
                     "stonithd: .*ERROR: AIS connection terminated",
             ]
 
 #        corosync_ignore.extend(self.common_ignore)
 
 #        self.complist.append(Process(self, "corosync", pats = [
 #                    "ERROR: ais_dispatch: AIS connection failed",
 #                    "crmd: .*ERROR: do_exit: Could not recover from internal error",
 #                    "pengine: .*Scheduling Node .* for STONITH",
 #                    "stonithd: .*requests a STONITH operation RESET on node",
 #                    "stonithd: .*Succeeded to STONITH the node",
 #                    ], badnews_ignore = corosync_ignore))
         
     
         return self.complist
 
 class crm_mcp(crm_flatiron):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, Environment, randseed=None):
         crm_flatiron.__init__(self, Environment, randseed=randseed)
 
         self.update({
             "Name"           : "crm-mcp",
             "StartCmd"       : "service corosync start; service pacemaker start",
             "StopCmd"        : "service pacemaker stop; service corosync stop",
 
             "Pat:We_stopped"  : "%s.*Service engine unloaded: corosync cluster quorum service",
             "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new",
             "Pat:They_dead"    : "crmd:.*Node %s: .* state=lost .new",
             
             "Pat:ChildKilled"  : "%s pacemakerd.*Child process %s terminated with signal 9",
             "Pat:ChildRespawn" : "%s pacemakerd.*Respawning failed child process: %s",
         })
 
 class crm_cman(crm_flatiron):
     '''
     The crm version 3 cluster manager class.
     It implements the things we need to talk to and manipulate
     crm clusters running on top of openais
     '''
     def __init__(self, Environment, randseed=None):
         crm_flatiron.__init__(self, Environment, randseed=randseed)
 
         self.update({
             "Name"           : "crm-cman",
-            "StartCmd"       : "service corosync start; service pacemaker start",
-            "StopCmd"        : "service pacemaker stop; cman_tool leave",
+            "StartCmd"       : "service cman start; service pacemaker start",
+            "StopCmd"        : "service pacemaker stop; service cman stop;",
 
-            "UUIDQueryCmd"   : "crmadmin -N --cman",
             "EpocheCmd"      : "crm_node -e --cman",
             "QuorumCmd"      : "crm_node -q --cman",
             "ParitionCmd"    : "crm_node -p --cman",
 
             "Pat:We_stopped"  : "%s.*Service engine unloaded: corosync cluster quorum service",
             "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new",
             "Pat:They_dead"    : "crmd:.*Node %s: .* state=lost .new",
             
             "Pat:ChildKilled"  : "%s pacemakerd.*Child process %s terminated with signal 9",
             "Pat:ChildRespawn" : "%s pacemakerd.*Respawning failed child process: %s",
         })
diff --git a/cts/CTSlab.py b/cts/CTSlab.py
index b8f71ff972..5493bf9bc8 100755
--- a/cts/CTSlab.py
+++ b/cts/CTSlab.py
@@ -1,461 +1,468 @@
 #!/usr/bin/python
 
 '''CTS: Cluster Testing System: Lab environment module
  '''
 
 __copyright__='''
 Copyright (C) 2001,2005 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 from UserDict import UserDict
 import sys, types, string, string, signal, os, socket
 
 pdir=os.path.dirname(sys.path[0])
 sys.path.insert(0, pdir) # So that things work from the source directory
 
 try:
     from cts.CTSvars    import *
     from cts.CM_ais     import *
     from cts.CM_lha     import crm_lha
     from cts.CTSaudits  import AuditList
     from cts.CTStests   import TestList
     from cts.CTSscenarios import *
 
 except ImportError:
     sys.stderr.write("abort: couldn't find cts libraries in [%s]\n" %
                      ' '.join(sys.path))
     sys.stderr.write("(check your install and PYTHONPATH)\n")
     sys.exit(-1)
 
 cm = None
 Tests = []
 Chosen = []
 scenario = None
 
 # Not really used, the handler in 
 def sig_handler(signum, frame) :
     if cm: cm.log("Interrupted by signal %d"%signum)
     if scenario: scenario.summarize()
     if signum == 15 :
         if scenario: scenario.TearDown()
         sys.exit(1)
         
 class LabEnvironment(CtsLab):
 
     def __init__(self):
         CtsLab.__init__(self)
 
         #  Get a random seed for the random number generator.
-        self["DoStonith"] = 1
         self["DoStandby"] = 1
         self["DoFencing"] = 1
         self["XmitLoss"] = "0.0"
         self["RecvLoss"] = "0.0"
         self["IPBase"] = "127.0.0.10"
         self["ConnectivityHost"] = socket.gethostname()
         self["ClobberCIB"] = 0
         self["CIBfilename"] = None
         self["CIBResource"] = 0
         self["DoBSC"]    = 0
         self["use_logd"] = 0
         self["oprofile"] = []
         self["warn-inactive"] = 0
         self["ListTests"] = 0
         self["benchmark"] = 0
         self["Schema"] = "pacemaker-1.0"
         self["Stack"] = "openais"
         self["stonith-type"] = "external/ssh"
         self["stonith-params"] = "hostlist=all,livedangerously=yes"
         self["at-boot"] = 1  # Does the cluster software start automatically when the node boots 
         self["logger"] = ([StdErrLog(self)])
         self["loop-minutes"] = 60
         self["valgrind-prefix"] = None
         self["valgrind-procs"] = "cib crmd attrd pengine stonith-ng"
         self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""
         #self["valgrind-opts"] = """--trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""
 
         self["experimental-tests"] = 0
         self["valgrind-tests"] = 0
         self["unsafe-tests"] = 1
         self["loop-tests"] = 1
         self["scenario"] = "random"
 
 def usage(arg, status=1):
     print "Illegal argument " + arg
     print "usage: " + sys.argv[0] +" [options] number-of-iterations" 
     print "\nCommon options: "  
     print "\t [--at-boot (1|0)],         does the cluster software start at boot time" 
     print "\t [--nodes 'node list'],     list of cluster nodes separated by whitespace" 
     print "\t [--limit-nodes max],       only use the first 'max' cluster nodes supplied with --nodes" 
     print "\t [--stack (heartbeat|ais)], which cluster stack is installed"
     print "\t [--logfile path],          where should the test software look for logs from cluster nodes" 
     print "\t [--outputfile path],       optional location for the test software to write logs to" 
     print "\t [--syslog-facility name],  which syslog facility should the test software log to" 
     print "\t [--choose testcase-name],  run only the named test" 
     print "\t [--list-tests],            list the valid tests" 
     print "\t [--benchmark],             add the timing information" 
     print "\t "
     print "Options for release testing: "  
     print "\t [--clobber-cib | -c ]       Erase any existing configuration"
     print "\t [--populate-resources | -r] Generate a sample configuration"
     print "\t [--test-ip-base ip]         Offset for generated IP address resources"
     print "\t "
     print "Additional (less common) options: "  
     print "\t [--trunc (truncate logfile before starting)]" 
     print "\t [--xmit-loss lost-rate(0.0-1.0)]" 
     print "\t [--recv-loss lost-rate(0.0-1.0)]" 
     print "\t [--standby (1 | 0 | yes | no)]" 
     print "\t [--fencing (1 | 0 | yes | no)]" 
     print "\t [--stonith (1 | 0 | yes | no)]" 
     print "\t [--stonith-type type]" 
     print "\t [--stonith-args name=value]" 
     print "\t [--bsc]" 
     print "\t [--once],                 run all valid tests once" 
     print "\t [--no-loop-tests],        dont run looping/time-based tests" 
     print "\t [--no-unsafe-tests],      dont run tests that are unsafe for use with ocfs2/drbd" 
     print "\t [--valgrind-tests],       include tests using valgrind" 
     print "\t [--experimental-tests],   include experimental tests" 
     print "\t [--oprofile 'node list'], list of cluster nodes to run oprofile on]" 
     print "\t [--qarsh]                 Use the QARSH backdoor to access nodes instead of SSH"
     print "\t [--seed random_seed]"
     print "\t [--set option=value]"
     sys.exit(status)
 
     
 #
 #   A little test code...
 #
 if __name__ == '__main__': 
 
     Environment = LabEnvironment()
 
     NumIter = 0
     Version = 1
     LimitNodes = 0
     TruncateLog = 0
     ListTests = 0
     HaveSeed = 0
     node_list = ''
 
     # Set the signal handler
     signal.signal(15, sig_handler)
     signal.signal(10, sig_handler)
     
     # Process arguments...
 
     skipthis=None
     args=sys.argv[1:]
     for i in range(0, len(args)):
        if skipthis:
            skipthis=None
            continue
 
        elif args[i] == "-l" or args[i] == "--limit-nodes":
            skipthis=1
            LimitNodes = int(args[i+1])
 
        elif args[i] == "-r" or args[i] == "--populate-resources":
            Environment["CIBResource"] = 1
 
        elif args[i] == "-L" or args[i] == "--logfile":
            skipthis=1
            Environment["LogFileName"] = args[i+1]
 
        elif args[i] == "--outputfile":
            skipthis=1
            Environment["OutputFile"] = args[i+1]
 
        elif args[i] == "--test-ip-base":
            skipthis=1
            Environment["IPBase"] = args[i+1]
 
        elif args[i] == "--oprofile":
            skipthis=1
            Environment["oprofile"] = args[i+1].split(' ')
 
        elif args[i] == "--trunc":
            Environment["TruncateLog"]=1
 
        elif args[i] == "--list-tests" or args[i] == "--list" :
            Environment["ListTests"]=1
 
        elif args[i] == "--benchmark":
            Environment["benchmark"]=1
 
        elif args[i] == "--bsc":
            Environment["DoBSC"] = 1
            Environment["scenario"] = "basic-sanity"
 
        elif args[i] == "--qarsh":
            Environment.rsh.enable_qarsh()
 
        elif args[i] == "--fencing":
            skipthis=1
            if args[i+1] == "1" or args[i+1] == "yes":
                Environment["DoFencing"] = 1
            elif args[i+1] == "0" or args[i+1] == "no":
                Environment["DoFencing"] = 0
            else:
                usage(args[i+1])
 
        elif args[i] == "--stonith":
            skipthis=1
            if args[i+1] == "1" or args[i+1] == "yes":
-               Environment["DoStonith"]=1
+               Environment["DoFencing"]=1
            elif args[i+1] == "0" or args[i+1] == "no":
-               Environment["DoStonith"]=0
+               Environment["DoFencing"]=0
+           elif args[i+1] == "rhcs":
+               Environment["DoStonith"]=1
+               Environment["stonith-type"] = "fence_xvm"
+               Environment["stonith-params"] = "pcmk_arg_map=domain:uname"
+           elif args[i+1] == "lha":
+               Environment["DoStonith"]=1
+               Environment["stonith-type"] = "external/ssh"
+               Environment["stonith-params"] = "hostlist=all,livedangerously=yes"
            else:
                usage(args[i+1])
 
        elif args[i] == "--stonith-type":
            Environment["stonith-type"] = args[i+1]
            skipthis=1
 
        elif args[i] == "--stonith-args":
            Environment["stonith-params"] = args[i+1]
            skipthis=1
 
        elif args[i] == "--standby":
            skipthis=1
            if args[i+1] == "1" or args[i+1] == "yes":
                Environment["DoStandby"] = 1
            elif args[i+1] == "0" or args[i+1] == "no":
                Environment["DoStandby"] = 0
            else:
                usage(args[i+1])
 
        elif args[i] == "--clobber-cib" or args[i] == "-c":
            Environment["ClobberCIB"] = 1
 
        elif args[i] == "--cib-filename":
            skipthis=1
            Environment["CIBfilename"] = args[i+1]
 
        elif args[i] == "--xmit-loss":
            try:
                float(args[i+1])
            except ValueError:
                print ("--xmit-loss parameter should be float")
                usage(args[i+1])
            skipthis=1
            Environment["XmitLoss"] = args[i+1]
 
        elif args[i] == "--recv-loss":
            try:
                float(args[i+1])
            except ValueError:
                print ("--recv-loss parameter should be float")
                usage(args[i+1])
            skipthis=1
            Environment["RecvLoss"] = args[i+1]
 
        elif args[i] == "--choose":
            skipthis=1
            Chosen.append(args[i+1])
            Environment["scenario"] = "sequence"
 
        elif args[i] == "--nodes":
            skipthis=1
            node_list = args[i+1].split(' ')
 
        elif args[i] == "--syslog-facility" or args[i] == "--facility":
            skipthis=1
            Environment["SyslogFacility"] = args[i+1]
 
        elif args[i] == "--seed":
            skipthis=1
            Environment.SeedRandom(args[i+1])
 
        elif args[i] == "--warn-inactive":
            Environment["warn-inactive"] = 1
 
        elif args[i] == "--schema":
            skipthis=1
            Environment["Schema"] = args[i+1]
 
        elif args[i] == "--ais":
            Environment["Stack"] = "openais"
 
        elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot":
            skipthis=1
            if args[i+1] == "1" or args[i+1] == "yes":
                Environment["at-boot"] = 1
            elif args[i+1] == "0" or args[i+1] == "no":
                Environment["at-boot"] = 0
            else:
                usage(args[i+1])
 
        elif args[i] == "--heartbeat" or args[i] == "--lha":
            Environment["Stack"] = "heartbeat"
 
        elif args[i] == "--hae":
            Environment["Stack"] = "openais"
            Environment["Schema"] = "hae"
 
        elif args[i] == "--stack":
            Environment["Stack"] = args[i+1]
            skipthis=1
 
        elif args[i] == "--once":
            Environment["scenario"] = "all-once"
 
        elif args[i] == "--valgrind-tests":
            Environment["valgrind-tests"] = 1
 
        elif args[i] == "--no-loop-tests":
            Environment["loop-tests"] = 0
 
        elif args[i] == "--loop-minutes":
            skipthis=1
            try:
                Environment["loop-minutes"]=int(args[i+1])
            except ValueError:
                usage(args[i])
 
        elif args[i] == "--no-unsafe-tests":
            Environment["unsafe-tests"] = 0
 
        elif args[i] == "--experimental-tests":
            Environment["experimental-tests"] = 1
 
        elif args[i] == "--set":
            skipthis=1
            (name, value) = args[i+1].split('=')
            Environment[name] = value
 
        else:
            try:
                NumIter=int(args[i])
            except ValueError:
                usage(args[i])
 
     if Environment["DoBSC"]:
         NumIter = 2
         LimitNodes = 1
         Chosen.append("AddResource")
         Environment["ClobberCIB"]  = 1
         Environment["CIBResource"] = 0 
         Environment["logger"].append(FileLog(Environment, Environment["LogFileName"]))
 
     elif Environment["OutputFile"]:
         Environment["logger"].append(FileLog(Environment, Environment["OutputFile"]))
 
     elif Environment["SyslogFacility"]:
         Environment["logger"].append(SysLog(Environment))
 
     if Environment["Stack"] == "heartbeat" or Environment["Stack"] == "lha":
         Environment["Stack"]    = "heartbeat"
         Environment['CMclass']  = crm_lha
 
     elif Environment["Stack"] == "openais" or Environment["Stack"] == "ais"  or Environment["Stack"] == "whitetank":
         Environment["Stack"]    = "openais (whitetank)"
         Environment['CMclass']  = crm_whitetank
         Environment["use_logd"] = 0
 
     elif Environment["Stack"] == "corosync" or Environment["Stack"] == "cs" or Environment["Stack"] == "flatiron":
         Environment["Stack"]    = "corosync (flatiron)"
         Environment['CMclass']  = crm_flatiron
         Environment["use_logd"] = 0
 
     elif Environment["Stack"] == "cman":
         Environment["Stack"]    = "corosync (cman)"
         Environment['CMclass']  = crm_cman
         Environment["use_logd"] = 0
 
     elif Environment["Stack"] == "mcp":
         Environment["Stack"]    = "corosync (mcp)"
         Environment['CMclass']  = crm_mcp
         Environment["use_logd"] = 0
 
     else:
         print "Unknown stack: "+Environment["Stack"]
         sys.exit(1)
 
     if len(node_list) < 1:
         print "No nodes specified!"
         sys.exit(1)
 
     if LimitNodes > 0:
         if len(node_list) > LimitNodes:
             print("Limiting the number of nodes configured=%d (max=%d)"
                   %(len(node_list), LimitNodes))
             while len(node_list) > LimitNodes:
                 node_list.pop(len(node_list)-1)
 
     Environment["nodes"] = node_list
 
     # Create the Cluster Manager object
     cm = Environment['CMclass'](Environment)
     if TruncateLog:
         Environment.log("Truncating %s" % LogFile)
         lf = open(LogFile, "w");
         if lf != None:
             lf.truncate(0)
             lf.close()
 
     Audits = AuditList(cm)
         
     if Environment["ListTests"] == 1 :
         Tests = TestList(cm, Audits)
         Environment.log("Total %d tests"%len(Tests))
         for test in Tests :
             Environment.log(str(test.name));
         sys.exit(0)
 
     if len(Chosen) == 0:
         Tests = TestList(cm, Audits)
 
     else:
         for TestCase in Chosen:
            match = None
 
            for test in TestList(cm, Audits):
                if test.name == TestCase:
                    match = test
 
            if not match:
                usage("--choose: No applicable/valid tests chosen")        
            else:
                Tests.append(match)
     
     # Scenario selection
     if Environment["scenario"] == "basic-sanity": 
         scenario = RandomTests(cm, [ BasicSanityCheck(Environment) ], Audits, Tests)
 
     elif Environment["scenario"] == "all-once": 
         NumIter = len(Tests)
         scenario = AllOnce(
             cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests)
     elif Environment["scenario"] == "sequence": 
         scenario = Sequence(
             cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests)
     else:
         scenario = RandomTests(
             cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests)
 
     Environment.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ")
     Environment.log("Stack:            %s" % Environment["Stack"])
     Environment.log("Schema:           %s" % Environment["Schema"])
     Environment.log("Scenario:         %s" % scenario.__doc__)
     Environment.log("Random Seed:      %s" % Environment["RandSeed"])
     Environment.log("System log files: %s" % Environment["LogFileName"])
 
     Environment.dump()
     rc = Environment.run(scenario, NumIter)
     sys.exit(rc)
diff --git a/cts/CTStests.py b/cts/CTStests.py
index 2287b0efd9..6661e37fd7 100644
--- a/cts/CTStests.py
+++ b/cts/CTStests.py
@@ -1,2223 +1,2223 @@
 '''CTS: Cluster Testing System: Tests module
 
 There are a few things we want to do here:
 
  '''
 
 __copyright__='''
 Copyright (C) 2000, 2001 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 
 Add RecourceRecover testcase Zhao Kai <zhaokai@cn.ibm.com>
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 #
 #        SPECIAL NOTE:
 #
 #        Tests may NOT implement any cluster-manager-specific code in them.
 #        EXTEND the ClusterManager object to provide the base capabilities
 #        the test needs if you need to do something that the current CM classes
 #        do not.  Otherwise you screw up the whole point of the object structure
 #        in CTS.
 #
 #                Thank you.
 #
 
 import time, os, re, types, string, tempfile, sys
 from stat import *
 from cts import CTS
 from cts.CTSaudits import *
 
 AllTestClasses = [ ]
 
 class CTSTest:
     '''
     A Cluster test.
     We implement the basic set of properties and behaviors for a generic
     cluster test.
 
     Cluster tests track their own statistics.
     We keep each of the kinds of counts we track as separate {name,value}
     pairs.
     '''
 
     def __init__(self, cm):
         #self.name="the unnamed test"
         self.Stats = {"calls":0
         ,        "success":0
         ,        "failure":0
         ,        "skipped":0
         ,        "auditfail":0}
 
 #        if not issubclass(cm.__class__, ClusterManager):
 #            raise ValueError("Must be a ClusterManager object")
         self.CM = cm
         self.Audits = []
         self.timeout=120
         self.passed = 1
         self.is_loop = 0
         self.is_unsafe = 0
         self.is_experimental = 0
         self.is_valgrind = 0
         self.benchmark = 0  # which tests to benchmark
         self.timer = {}  # timers
 
     def has_key(self, key):
         return self.Stats.has_key(key)
 
     def __setitem__(self, key, value):
         self.Stats[key] = value
         
     def __getitem__(self, key):
         return self.Stats[key]
 
     def log_mark(self, msg):
         self.CM.debug("MARK: test %s %s %d" % (self.name,msg,time.time()))
         return
 
     def get_timer(self,key = "test"):
         try: return self.timer[key]
         except: return 0
 
     def set_timer(self,key = "test"):
         self.timer[key] = time.time()
         return self.timer[key]
 
     def log_timer(self,key = "test"):
         elapsed = 0
         if key in self.timer:
             elapsed = time.time() - self.timer[key]
             s = key == "test" and self.name or "%s:%s" %(self.name,key)
             self.CM.debug("%s runtime: %.2f" % (s, elapsed))
             del self.timer[key]
         return elapsed
 
     def incr(self, name):
         '''Increment (or initialize) the value associated with the given name'''
         if not self.Stats.has_key(name):
             self.Stats[name]=0
         self.Stats[name] = self.Stats[name]+1
 
         # Reset the test passed boolean
         if name == "calls":
             self.passed = 1
 
     def failure(self, reason="none"):
         '''Increment the failure count'''
         self.passed = 0
         self.incr("failure")
         self.CM.log(("Test %s" % self.name).ljust(35)  +" FAILED: %s" % reason)
         return None
 
     def success(self):
         '''Increment the success count'''
         self.incr("success")
         return 1
 
     def skipped(self):
         '''Increment the skipped count'''
         self.incr("skipped")
         return 1
 
     def __call__(self, node):
         '''Perform the given test'''
         raise ValueError("Abstract Class member (__call__)")
         self.incr("calls")
         return self.failure()
 
     def audit(self):
         passed = 1
         if len(self.Audits) > 0:
             for audit in self.Audits:
                 if not audit():
                     self.CM.log("Internal %s Audit %s FAILED." % (self.name, audit.name()))
                     self.incr("auditfail")
                     passed = 0
         return passed
 
     def setup(self, node):
         '''Setup the given test'''
         return self.success()
 
     def teardown(self, node):
         '''Tear down the given test'''
         return self.success()
 
     def create_watch(self, patterns, timeout, name=None):
         if not name:
             name = self.name
         return CTS.LogWatcher(self.CM.Env, self.CM["LogFileName"], patterns, name, timeout)
 
     def local_badnews(self, prefix, watch, local_ignore=[]):
         errcount = 0
         if not prefix:
             prefix = "LocalBadNews:"
 
         ignorelist = []                
         ignorelist.append(" CTS: ")
         ignorelist.append(prefix)
         ignorelist.extend(local_ignore)
 
         while errcount < 100:
             match=watch.look(0)
             if match:
                add_err = 1
                for ignore in ignorelist:
                    if add_err == 1 and re.search(ignore, match):
                        add_err = 0
                if add_err == 1:
                    self.CM.log(prefix + " " + match)
                    errcount=errcount+1
             else:
               break
         else:
             self.CM.log("Too many errors!")
 
         return errcount
 
     def is_applicable(self):
         return self.is_applicable_common()
 
     def is_applicable_common(self):
         '''Return TRUE if we are applicable in the current test configuration'''
         #raise ValueError("Abstract Class member (is_applicable)")
 
         if self.is_loop and not self.CM.Env["loop-tests"]:
             return 0
         elif self.is_unsafe and not self.CM.Env["unsafe-tests"]:
             return 0
         elif self.is_valgrind and not self.CM.Env["valgrind-tests"]:
             return 0
         elif self.is_experimental and not self.CM.Env["experimental-tests"]:
             return 0
         elif self.CM.Env["benchmark"] and self.benchmark == 0:
             return 0
 
         return 1
 
     def find_ocfs2_resources(self, node):
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "o2cb" and r.parent != "NA":
                     self.CM.debug("Found o2cb: %s" % self.r_o2cb)
                     self.r_o2cb = r.parent
             if re.search("^Constraint", line):
                 c = AuditConstraint(self.CM, line)
                 if c.type == "rsc_colocation" and c.target == self.r_o2cb:
                     self.r_ocfs2.append(c.rsc)
 
         self.CM.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2))
         return len(self.r_ocfs2)
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         return 1
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return []
 
 ###################################################################
 class StopTest(CTSTest):
 ###################################################################
     '''Stop (deactivate) the cluster manager on a node'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name="Stop"
 
     def __call__(self, node):
         '''Perform the 'stop' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] != "up":
             return self.skipped()
 
         patterns = []
         # Technically we should always be able to notice ourselves stopping
         patterns.append(self.CM["Pat:We_stopped"] % node)
 
         #if self.CM.Env["use_logd"]:
         #    patterns.append(self.CM["Pat:Logd_stopped"] % node)
 
         # Any active node needs to notice this one left
         # NOTE: This wont work if we have multiple partitions
         for other in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[other] == "up" and other != node:
                 patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
                 #self.debug("Checking %s will notice %s left"%(other, node))
                 
         watch = self.create_watch(patterns, self.CM["DeadTime"])
         watch.setwatch()
 
         if node == self.CM.OurNode:
             self.incr("us")
         else:
             if self.CM.upcount() <= 1:
                 self.incr("all")
             else:
                 self.incr("them")
 
         self.CM.StopaCM(node)
         watch_result = watch.lookforall()
 
         failreason=None
         UnmatchedList = "||"
         if watch.unmatched:
             (rc, output) = self.CM.rsh(node, "/bin/ps axf", None)
             for line in output:
                 self.CM.debug(line)
                 
             for regex in watch.unmatched:
                 self.CM.log ("ERROR: Shutdown pattern not found: %s" % (regex))
                 UnmatchedList +=  regex + "||";
                 failreason="Missing shutdown pattern"
 
         self.CM.cluster_stable(self.CM["DeadTime"])
 
         if not watch.unmatched or self.CM.upcount() == 0:
             return self.success()
 
         if len(watch.unmatched) >= self.CM.upcount():
             return self.failure("no match against (%s)" % UnmatchedList)
 
         if failreason == None:
             return self.success()
         else:
             return self.failure(failreason)
 #
 # We don't register StopTest because it's better when called by
 # another test...
 #
 
 ###################################################################
 class StartTest(CTSTest):
 ###################################################################
     '''Start (activate) the cluster manager on a node'''
     def __init__(self, cm, debug=None):
         CTSTest.__init__(self,cm)
         self.name="start"
         self.debug = debug
 
     def __call__(self, node):
         '''Perform the 'start' test. '''
         self.incr("calls")
 
         if self.CM.upcount() == 0:
             self.incr("us")
         else:
             self.incr("them")
 
         if self.CM.ShouldBeStatus[node] != "down":
             return self.skipped()
         elif self.CM.StartaCM(node):
             return self.success()
         else:
             return self.failure("Startup %s on node %s failed"
                                 %(self.CM["Name"], node))
 
 #
 # We don't register StartTest because it's better when called by
 # another test...
 #
 
 ###################################################################
 class FlipTest(CTSTest):
 ###################################################################
     '''If it's running, stop it.  If it's stopped start it.
        Overthrow the status quo...
     '''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Flip"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, node):
         '''Perform the 'Flip' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] == "up":
             self.incr("stopped")
             ret = self.stop(node)
             type="up->down"
             # Give the cluster time to recognize it's gone...
             time.sleep(self.CM["StableTime"])
         elif self.CM.ShouldBeStatus[node] == "down":
             self.incr("started")
             ret = self.start(node)
             type="down->up"
         else:
             return self.skipped()
 
         self.incr(type)
         if ret:
             return self.success()
         else:
             return self.failure("%s failure" % type)
 
 #        Register FlipTest as a good test to run
 AllTestClasses.append(FlipTest)
 
 ###################################################################
 class RestartTest(CTSTest):
 ###################################################################
     '''Stop and restart a node'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Restart"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         '''Perform the 'restart' test. '''
         self.incr("calls")
 
         self.incr("node:" + node)
         
         ret1 = 1
         if self.CM.StataCM(node):
             self.incr("WasStopped")
             if not self.start(node):
                 return self.failure("start (setup) failure: "+node)
 
         self.set_timer()
         if not self.stop(node):
             return self.failure("stop failure: "+node)
         if not self.start(node):
             return self.failure("start failure: "+node)
         return self.success()
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RestartTest)
 
 ###################################################################
 class StonithdTest(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name="Stonithd"
         self.startall = SimulStartLite(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         self.incr("calls")
         if len(self.CM.Env["nodes"]) < 2:
             return self.skipped()
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         is_dc = self.CM.is_node_dc(node)
 
         watchpats = []
         watchpats.append("stonith-ng:.*Operation .* for host '%s' with device .* returned: 0" % node)
         watchpats.append("tengine_stonith_notify: Peer %s was terminated .*: OK" % node)
 
         if is_dc:
             watchpats.append("tengine_stonith_notify: Target was our leader .*%s" % node)
         else:
             watchpats.append("tengine_stonith_callback: .*: OK ")
 
         if self.CM.Env["LogWatcher"] != "remote" or not is_dc:
             # Often remote logs aren't flushed to disk by the time the node is shot,
             #   so we wont be able to find them
             # Remote syslog doesn't suffer this problem because they're already on 
             #   the loghost when the node is shot
             watchpats.append("Node %s will be fenced because termination was requested" % node)
             watchpats.append("Scheduling Node %s for STONITH" % node)
             watchpats.append("Executing .* fencing operation")
 
         if self.CM.Env["at-boot"] == 0:
             self.CM.debug("Expecting %s to stay down" % node)
             self.CM.ShouldBeStatus[node]="down"
         else:
             self.CM.debug("Expecting %s to come up again %d" % (node, self.CM.Env["at-boot"]))
             watchpats.append("%s crmd: .* S_STARTING -> S_PENDING" % node)
             watchpats.append("%s crmd: .* S_PENDING -> S_NOT_DC" % node)
 
         watch = self.create_watch(watchpats, 30 + self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"])
         watch.setwatch()
 
         self.CM.rsh(node, "crm_attribute --node %s --type status --attr-name terminate --attr-value true" % node)
 
         self.set_timer("fence")
         matched = watch.lookforall()
         self.log_timer("fence")
         self.set_timer("reform")
         if watch.unmatched:
             self.CM.log("Patterns not found: " + repr(watch.unmatched))
 
         self.CM.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.CM.debug("Waiting STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600)
 
         self.CM.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.CM["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected patterns")
         elif not is_stable:
             return self.failure("Cluster did not become stable")
 
         self.log_timer("reform")
         return self.success()
 
     def errorstoignore(self):
         return [ "Executing .* fencing operation" ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
 
-        if self.CM.Env.has_key("DoStonith"):
-            return self.CM.Env["DoStonith"]
+        if self.CM.Env.has_key("DoFencing"):
+            return self.CM.Env["DoFencing"]
 
         return 1
            
 AllTestClasses.append(StonithdTest)
 
 ###################################################################
 class StartOnebyOne(CTSTest):
 ###################################################################
     '''Start all the nodes ~ one by one'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="StartOnebyOne"
         self.stopall = SimulStopLite(cm)
         self.start = StartTest(cm)
         self.ns=CTS.NodeStatus(cm.Env)
 
     def __call__(self, dummy):
         '''Perform the 'StartOnebyOne' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Test setup failed")
 
         failed=[]
         self.set_timer()
         for node in self.CM.Env["nodes"]:
             if not self.start(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to start: " + repr(failed))
 
         return self.success()
 
 #        Register StartOnebyOne as a good test to run
 AllTestClasses.append(StartOnebyOne)
 
 ###################################################################
 class SimulStart(CTSTest):
 ###################################################################
     '''Start all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SimulStart"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStart' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
         
         self.CM.clear_all_caches()
  
         if not self.startall(None):
             return self.failure("Startall failed")
 
         return self.success()
 
 #        Register SimulStart as a good test to run
 AllTestClasses.append(SimulStart)
 
 ###################################################################
 class SimulStop(CTSTest):
 ###################################################################
     '''Stop all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SimulStop"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStop' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.stopall(None):
             return self.failure("Stopall failed")
 
         return self.success()
 
 #     Register SimulStop as a good test to run
 AllTestClasses.append(SimulStop)
 
 ###################################################################
 class StopOnebyOne(CTSTest):
 ###################################################################
     '''Stop all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="StopOnebyOne"
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, dummy):
         '''Perform the 'StopOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         failed=[]
         self.set_timer()
         for node in self.CM.Env["nodes"]:
             if not self.stop(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to stop: " + repr(failed))
 
         self.CM.clear_all_caches()
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(StopOnebyOne)
 
 ###################################################################
 class RestartOnebyOne(CTSTest):
 ###################################################################
     '''Restart all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="RestartOnebyOne"
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'RestartOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         did_fail=[]
         self.set_timer()
         self.restart = RestartTest(self.CM)
         for node in self.CM.Env["nodes"]:
             if not self.restart(node):
                 did_fail.append(node)
 
         if did_fail:
             return self.failure("Could not restart %d nodes: %s" 
                                 %(len(did_fail), repr(did_fail)))
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(RestartOnebyOne)
 
 ###################################################################
 class PartialStart(CTSTest):
 ###################################################################
     '''Start a node - but tell it to stop before it finishes starting up'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="PartialStart"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
         #self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'PartialStart' test. '''
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
 #   FIXME!  This should use the CM class to get the pattern
 #       then it would be applicable in general
         watchpats = []
         watchpats.append("Starting crmd")
         watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
         watch.setwatch()
 
         self.CM.StartaCMnoBlock(node)
         ret = watch.lookforall()
         if not ret:
             self.CM.log("Patterns not found: " + repr(watch.unmatched))
             return self.failure("Setup of %s failed" % node) 
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("%s did not stop in time" % node)
 
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(PartialStart)
 
 #######################################################################
 class StandbyTest(CTSTest):
 #######################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Standby"
         self.benchmark = 1
             
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         
     # make sure the node is active
     # set the node to standby mode
     # check resources, none resource should be running on the node
     # set the node to active mode
     # check resouces, resources should have been migrated back (SHOULD THEY?)
     
     def __call__(self, node):
     
         self.incr("calls")
         ret=self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
         
         self.CM.debug("Make sure node %s is active" % node)    
         if self.CM.StandbyStatus(node) != "off":
             if not self.CM.SetStandbyMode(node, "off"):
                 return self.failure("can't set node %s to active mode" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
 
         self.CM.debug("Getting resources running on node %s" % node)
         rsc_on_node = self.CM.active_resources(node)
 
         self.CM.debug("Setting node %s to standby mode" % node) 
         if not self.CM.SetStandbyMode(node, "on"):
             return self.failure("can't set node %s to standby mode" % node)
 
         self.set_timer("on")
         time.sleep(1)  # Allow time for the update to be applied and cause something
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "on":
             return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status))
         self.log_timer("on")
 
         self.CM.debug("Checking resources")
         bad_run = self.CM.active_resources(node)
         if len(bad_run) > 0:
             rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run)))
             self.CM.debug("Setting node %s to active mode" % node) 
             self.CM.SetStandbyMode(node, "off")
             return rc
 
         self.CM.debug("Setting node %s to active mode" % node) 
         if not self.CM.SetStandbyMode(node, "off"):
             return self.failure("can't set node %s to active mode" % node)
 
         self.set_timer("off")
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
         self.log_timer("off")
 
         return self.success()
 
 AllTestClasses.append(StandbyTest)
 
 #######################################################################
 class ValgrindTest(CTSTest):
 #######################################################################
     '''Check for memory leaks'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Valgrind"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_valgrind = 1
         self.is_loop = 1
 
     def setup(self, node):
         self.incr("calls")
         
         ret=self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         # Enable valgrind
         self.logPat = "/tmp/%s-*.valgrind" % self.name
 
         self.CM.Env["valgrind-prefix"] = self.name
 
         self.CM.rsh(node, "rm -f %s" % self.logPat, None)
         
         ret=self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         for node in self.CM.Env["nodes"]:
             (rc, output) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             for line in output:
                 self.CM.debug(line)
 
         return self.success()
 
     def teardown(self, node):
         # Disable valgrind
         self.CM.Env["valgrind-prefix"] = None
 
         # Return all nodes to normal
         ret=self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         return self.success()
 
     def find_leaks(self):
         # Check for leaks
         leaked = []
         self.stop = StopTest(self.CM)
 
         for node in self.CM.Env["nodes"]:
             (rc, ps_out) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             rc = self.stop(node)
             if not rc:
                 self.failure("Couldn't shut down %s" % node)
 
             rc = self.CM.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e ERROR.*SUMMARY:.*[1-9].*errors %s" % self.logPat, 0)
             if rc != 1:
                 leaked.append(node)
                 self.failure("Valgrind errors detected on %s" % node)
                 for line in ps_out:
                     self.CM.log(line)
                 (rc, output) = self.CM.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logPat, None)
                 for line in output:
                     self.CM.log(line)
                 (rc, output) = self.CM.rsh(node, "cat %s" % self.logPat, None)
                 for line in output:
                     self.CM.debug(line)
 
         self.CM.rsh(node, "rm -f %s" % self.logPat, None)
         return leaked
 
     def __call__(self, node):
         leaked = self.find_leaks()
         if len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))            
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """cib:.*readCibXmlFile:""", """HA_VALGRIND_ENABLED""" ]
 
 #######################################################################
 class StandbyLoopTest(ValgrindTest):
 #######################################################################
     '''Check for memory leaks by putting a node in and out of standby for an hour'''
     def __init__(self, cm):
         ValgrindTest.__init__(self,cm)
         self.name="StandbyLoop"
         
     def __call__(self, node):
     
         lpc = 0
         delay = 2
         failed = 0
         done=time.time() + self.CM.Env["loop-minutes"]*60
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "on"):
                 self.failure("can't set node %s to standby mode" % node)
                 failed = lpc
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "off"):
                 self.failure("can't set node %s to active mode" % node)
                 failed = lpc
 
         leaked = self.find_leaks()
         if failed:
             return self.failure("Iteration %d failed" % failed)
         elif len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
 AllTestClasses.append(StandbyLoopTest)
 
 ##############################################################################
 class BandwidthTest(CTSTest):
 ##############################################################################
 #        Tests should not be cluster-manager-specific
 #        If you need to find out cluster manager configuration to do this, then
 #        it should be added to the generic cluster manager API.
     '''Test the bandwidth which heartbeat uses'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Bandwidth"
         self.start = StartTest(cm)
         self.__setitem__("min",0)
         self.__setitem__("max",0)
         self.__setitem__("totalbandwidth",0)
         self.tempfile = tempfile.mktemp(".cts")
         self.startall = SimulStartLite(cm)
         
     def __call__(self, node):
         '''Perform the Bandwidth test'''
         self.incr("calls")
         
         if self.CM.upcount()<1:
             return self.skipped()
 
         Path = self.CM.InternalCommConfig()
         if "ip" not in Path["mediatype"]:
              return self.skipped()
 
         port = Path["port"][0]
         port = int(port)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Test setup failed")
         time.sleep(5)  # We get extra messages right after startup.
 
 
         fstmpfile = "/var/run/band_estimate"
         dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \
         %                (port, fstmpfile)
  
         rc = self.CM.rsh(node, dumpcmd)
         if rc == 0:
             farfile = "root@%s:%s" % (node, fstmpfile)
             self.CM.rsh.cp(farfile, self.tempfile)
             Bandwidth = self.countbandwidth(self.tempfile)
             if not Bandwidth:
                 self.CM.log("Could not compute bandwidth.")
                 return self.success()
             intband = int(Bandwidth + 0.5)
             self.CM.log("...bandwidth: %d bits/sec" % intband)
             self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth
             if self.Stats["min"] == 0:
                 self.Stats["min"] = Bandwidth
             if Bandwidth > self.Stats["max"]:
                 self.Stats["max"] = Bandwidth
             if Bandwidth < self.Stats["min"]:
                 self.Stats["min"] = Bandwidth
             self.CM.rsh(node, "rm -f %s" % fstmpfile)
             os.unlink(self.tempfile)
             return self.success()
         else:
             return self.failure("no response from tcpdump command [%d]!" % rc)
 
     def countbandwidth(self, file):
         fp = open(file, "r")
         fp.seek(0)
         count = 0
         sum = 0
         while 1:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count=count+1
                 linesplit = string.split(line," ")
                 for j in range(len(linesplit)-1):
                     if linesplit[j]=="udp": break
                     if linesplit[j]=="length:": break
                         
                 try:
                     sum = sum + int(linesplit[j+1])
                 except ValueError:
                     self.CM.log("Invalid tcpdump line: %s" % line)
                     return None
                 T1 = linesplit[0]
                 timesplit = string.split(T1,":")
                 time2split = string.split(timesplit[2],".")
                 time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001
                 break
 
         while count < 100:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count+1
                 linessplit = string.split(line," ")
                 for j in range(len(linessplit)-1):
                     if linessplit[j] =="udp": break
                     if linesplit[j]=="length:": break
                 try:
                     sum=int(linessplit[j+1])+sum
                 except ValueError:
                     self.CM.log("Invalid tcpdump line: %s" % line)
                     return None
 
         T2 = linessplit[0]
         timesplit = string.split(T2,":")
         time2split = string.split(timesplit[2],".")
         time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001
         time = time2-time1
         if (time <= 0):
             return 0
         return (sum*8)/time
 
     def is_applicable(self):
         '''BandwidthTest never applicable'''
         return 0
 
 AllTestClasses.append(BandwidthTest)
 
 ###################################################################
 class ResourceRecover(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="ResourceRecover"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max=30
         self.rid=None
         #self.is_unsafe = 1
         self.benchmark = 1
 
         # these are the values used for the new LRM API call
         self.action = "asyncmon"
         self.interval = 0
 
     def __call__(self, node):
         '''Perform the 'ResourceRecover' test. '''
         self.incr("calls")
         
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         resourcelist = self.CM.active_resources(node)
         # if there are no resourcelist, return directly
         if len(resourcelist)==0:
             self.CM.log("No active resources on %s" % node)
             return self.skipped()
 
         self.rid = self.CM.Env.RandomGen.choice(resourcelist)
 
         rsc = None
         (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.id == self.rid:
                     rsc = tmp
                     # Handle anonymous clones that get renamed
                     self.rid = rsc.clone_id
                     break
 
         if not rsc:
             return self.failure("Could not find %s in the resource list" % self.rid)
 
         self.CM.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id))
 
         pats = []
         pats.append("Updating failcount for %s on .* after .* %s"
                     % (self.rid, self.action))
 
         if rsc.managed():
             pats.append("crmd:.* Performing .* op=%s_stop_0" % self.rid)
             if rsc.unique():
                 pats.append("crmd:.* Performing .* op=%s_start_0" % self.rid)
                 pats.append("crmd:.* LRM operation %s_start_0.*confirmed.*ok" % self.rid)
             else:
                 # Anonymous clones may get restarted with a different clone number
                 pats.append("crmd:.* Performing .* op=.*_start_0")
                 pats.append("crmd:.* LRM operation .*_start_0.*confirmed.*ok")
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
         
         self.CM.rsh(node, "crm_resource -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover")
         watch.lookforall()
         self.log_timer("recover")
 
         self.CM.cluster_stable()
         recovered=self.CM.ResourceLocation(self.rid)
 
         if watch.unmatched: 
             return self.failure("Patterns not found: %s" % repr(watch.unmatched))
 
         elif rsc.unique() and len(recovered) > 1:
             return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered)))
 
         elif len(recovered) > 0:
             self.CM.debug("%s is running on: %s" %(self.rid, repr(recovered)))
 
         elif rsc.managed():
             return self.failure("%s was not recovered and is inactive" % self.rid)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """Updating failcount for %s""" % self.rid,
                  """Unknown operation: fail""",
                  """ERROR: sending stonithRA op to stonithd failed.""",
                  """ERROR: process_lrm_event: LRM operation %s_%s_%d""" % (self.rid, self.action, self.interval),
                  """ERROR: process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval),
                  ]
 
 AllTestClasses.append(ResourceRecover)
 
 ###################################################################
 class ComponentFail(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="ComponentFail"
         self.startall = SimulStartLite(cm)
         self.complist = cm.Components()
         self.patterns = []
         self.okerrpatterns = []
         self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'ComponentFail' test. '''
         self.incr("calls")
         self.patterns = []
         self.okerrpatterns = []
 
         # start all nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.CM.cluster_stable(self.CM["StableTime"]):
             return self.failure("Setup failed - unstable")
 
         node_is_dc = self.CM.is_node_dc(node, None)
 
         # select a component to kill
         chosen = self.CM.Env.RandomGen.choice(self.complist)
         while chosen.dc_only == 1 and node_is_dc == 0:
             chosen = self.CM.Env.RandomGen.choice(self.complist)
 
         self.CM.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot))
         self.incr(chosen.name)
         
         if chosen.name != "aisexec":
             if self.CM["Name"] != "crm-lha" or chosen.name != "pengine":
                 self.patterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name))
                 self.patterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name))
 
         self.patterns.extend(chosen.pats)
         if node_is_dc:
           self.patterns.extend(chosen.dc_pats)
 
         # In an ideal world, this next stuff should be in the "chosen" object as a member function
         if self.CM["Name"] == "crm-lha" and chosen.triggersreboot:
             # Make sure the node goes down and then comes back up if it should reboot...
             for other in self.CM.Env["nodes"]:
                 if other != node:
                     self.patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
             self.patterns.append(self.CM["Pat:Slave_started"] % node)
             self.patterns.append(self.CM["Pat:Local_started"] % node)
 
             if chosen.dc_only: 
                 # Sometimes these will be in the log, and sometimes they won't...
                 self.okerrpatterns.append("%s crmd:.*Process %s:.* exited" %(node, chosen.name))
                 self.okerrpatterns.append("%s crmd:.*I_ERROR.*crmdManagedChildDied" %node)
                 self.okerrpatterns.append("%s crmd:.*The %s subsystem terminated unexpectedly" %(node, chosen.name))
                 self.okerrpatterns.append("ERROR: Client .* exited with return code")
             else:
                 # Sometimes this won't be in the log...
                 self.okerrpatterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name))
                 self.okerrpatterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name))
                 self.okerrpatterns.append(self.CM["Pat:ChildExit"])
 
         # supply a copy so self.patterns doesnt end up empty
         tmpPats = []
         tmpPats.extend(self.patterns)
         self.patterns.extend(chosen.badnews_ignore)
 
         # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status
         stonithPats = []
         stonithPats.append("stonith-ng:.*Operation .* for host '%s' with device .* returned: 0" % node)
         stonith = self.create_watch(stonithPats, 0)
         stonith.setwatch()
 
         # set the watch for stable
         watch = self.create_watch(
             tmpPats, self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"])
         watch.setwatch()
         
         # kill the component
         chosen.kill(node)
 
         # check to see Heartbeat noticed
         matched = watch.lookforall(allow_multiple_matches=1)
         if watch.unmatched:
             self.CM.log("Patterns not found: " + repr(watch.unmatched))
 
         if self.CM.Env["at-boot"] == 0:
             self.CM.debug("Checking if %s was shot" % node)
             shot = stonith.look(60)
             if shot:
                 self.CM.debug("Found: "+ repr(shot))
                 self.CM.ShouldBeStatus[node]="down"
             
         self.CM.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.CM.debug("Waiting for any STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600)
 
         self.CM.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.CM["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected patterns")
         elif not is_stable:
             return self.failure("Cluster did not become stable")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
     # Note that okerrpatterns refers to the last time we ran this test
     # The good news is that this works fine for us...
         self.okerrpatterns.extend(self.patterns)
         return self.okerrpatterns
     
 AllTestClasses.append(ComponentFail)
 
 ####################################################################
 class SplitBrainTest(CTSTest):
 ####################################################################
     '''It is used to test split-brain. when the path between the two nodes break
        check the two nodes both take over the resource'''
     def __init__(self,cm):
         CTSTest.__init__(self,cm)
         self.name = "SplitBrain"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.is_experimental = 1
 
     def isolate_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.CM.Env["nodes"])
         
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"]) + " from " +repr(partition))
                 
         if len(other_nodes) == 0:
             return 1
 
         self.CM.debug("Creating partition: " + repr(partition))
         self.CM.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             if not self.CM.isolate_node(node, other_nodes):
                 self.CM.log("Could not isolate %s" % node)
                 return 0
 
         return 1
 
     def heal_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.CM.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"]))
 
         if len(other_nodes) == 0:
             return 1
 
         self.CM.debug("Healing partition: " + repr(partition))
         self.CM.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             self.CM.unisolate_node(node, other_nodes)
 
     def __call__(self, node):
         '''Perform split-brain test'''
         self.incr("calls")
         self.passed = 1
         partitions = {}
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")        
 
         while 1:
             # Retry until we get multiple partitions
             partitions = {}
             p_max = len(self.CM.Env["nodes"])
             for node in self.CM.Env["nodes"]:
                 p = self.CM.Env.RandomGen.randint(1, p_max)
                 if not partitions.has_key(p):
                     partitions[p]= []
                 partitions[p].append(node)
             p_max = len(partitions.keys())
             if p_max > 1:
                 break
             # else, try again
             
         self.CM.debug("Created %d partitions" % p_max)
         for key in partitions.keys():
             self.CM.debug("Partition["+str(key)+"]:\t"+repr(partitions[key]))
 
         # Disabling STONITH to reduce test complexity for now
         self.CM.rsh(node, "crm_attribute -n stonith-enabled -v false")
 
         for key in partitions.keys():
             self.isolate_partition(partitions[key])
 
         count = 30
         while count > 0: 
             if len(self.CM.find_partitions()) != p_max:
                 time.sleep(10)
             else:
                 break
         else:
             self.failure("Expected partitions were not created")
             
         # Target number of partitions formed - wait for stability
         if not self.CM.cluster_stable():
             self.failure("Partitioned cluster not stable")
 
         # Now audit the cluster state
         self.CM.partitions_expected = p_max
         if not self.audit():
             self.failure("Audits failed")
         self.CM.partitions_expected = 1
 
         # And heal them again
         for key in partitions.keys():
             self.heal_partition(partitions[key])
 
         # Wait for a single partition to form
         count = 30
         while count > 0: 
             if len(self.CM.find_partitions()) != 1:
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not reform")
 
         # Wait for it to have the right number of members
         count = 30
         while count > 0: 
             members = []
 
             partitions = self.CM.find_partitions()
             if len(partitions) > 0:
                 members = partitions[0].split()
 
             if len(members) != len(self.CM.Env["nodes"]):
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not completely reform")
 
         # Wait up to 20 minutes - the delay is more preferable than
         # trying to continue with in a messed up state
         if not self.CM.cluster_stable(1200):
             self.failure("Reformed cluster not stable")
             answer = raw_input('Continue? [nY]')
             if answer and answer == "n":
                 raise ValueError("Reformed cluster not stable")
 
         # Turn fencing back on
         if self.CM.Env["DoFencing"]:
             self.CM.rsh(node, "crm_attribute -D -n stonith-enabled")
         
         self.CM.cluster_stable()
 
         if self.passed:
             return self.success()
         return self.failure("See previous errors")
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return [
             "Another DC detected:",
             "ERROR: attrd_cib_callback: .*Application of an update diff failed",
             "crmd_ha_msg_callback:.*not in our membership list",
             "CRIT:.*node.*returning after partition",
             ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         return len(self.CM.Env["nodes"]) > 2
 
 AllTestClasses.append(SplitBrainTest)
 
 ####################################################################
 class Reattach(CTSTest):
 ####################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Reattach"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
         self.is_unsafe = 0 # Handled by canrunnow()
 
     def setup(self, node):
         attempt=0
         if not self.startall(None):
             return None
 
         # Make sure we are really _really_ stable and that all
         # resources, including those that depend on transient node
         # attributes, are started
         while not self.CM.cluster_stable(double_check=True):
             if attempt < 5:
                 attempt += 1
                 self.CM.debug("Not stable yet, re-testing")
             else:
                 self.CM.log("Cluster is not stable")
                 return None
 
         return 1
 
     def teardown(self, node):
         
         # Make sure 'node' is up
         start = StartTest(self.CM)
         start(node)
 
         is_managed = self.CM.rsh(node, "crm_attribute -GQ -t crm_config -n is-managed-default -d true", 1)
         is_managed = is_managed[:-1] # Strip off the newline
         if is_managed != "true":
             self.CM.log("Attempting to re-enable resource management on %s (%s)" % (node, is_managed))
             managed = self.create_watch(["is-managed-default"], 60)
             managed.setwatch()
             
             self.CM.rsh(node, "crm_attribute -D -n is-managed-default")
             
             if not managed.lookforall():
                 self.CM.log("Patterns not found: " + repr(managed.unmatched))
                 self.CM.log("Could not re-enable resource management")
                 return 0
 
         return 1
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         if self.find_ocfs2_resources(node):
             self.CM.log("Detach/Reattach scenarios are not possible with OCFS2 services present")
             return 0
         return 1
 
     def __call__(self, node):
         self.incr("calls")
 
         pats = []
         managed = self.create_watch(["is-managed-default"], 60)
         managed.setwatch()
         
         self.CM.debug("Disable resource management")
         self.CM.rsh(node, "crm_attribute -n is-managed-default -v false")
 
         if not managed.lookforall():
             self.CM.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not disabled")
 
         pats = []
         pats.append("crmd:.*Performing.*_stop_0")
         pats.append("crmd:.*Performing.*_start_0")
         pats.append("crmd:.*Performing.*_promote_0")
         pats.append("crmd:.*Performing.*_demote_0")
         pats.append("crmd:.*Performing.*_migrate_.*_0")
 
         watch = self.create_watch(pats, 60, "ShutdownActivity")
         watch.setwatch()
 
         self.CM.debug("Shutting down the cluster")
         ret = self.stopall(None)
         if not ret:
             self.CM.debug("Re-enable resource management")
             self.CM.rsh(node, "crm_attribute -D -n is-managed-default")
             return self.failure("Couldn't shut down the cluster")
 
         self.CM.debug("Bringing the cluster back up")
         ret = self.startall(None)
         time.sleep(5) # allow ping to update the CIB
         if not ret:
             self.CM.debug("Re-enable resource management")
             self.CM.rsh(node, "crm_attribute -D -n is-managed-default")
             return self.failure("Couldn't restart the cluster")
 
         if self.local_badnews("ResourceActivity:", watch):
             self.CM.debug("Re-enable resource management")
             self.CM.rsh(node, "crm_attribute -D -n is-managed-default")
             return self.failure("Resources stopped or started during cluster restart")
 
         watch = self.create_watch(pats, 60, "StartupActivity")
         watch.setwatch()
 
         managed = self.create_watch(["is-managed-default"], 60)
         managed.setwatch()
         
         self.CM.debug("Re-enable resource management")
         self.CM.rsh(node, "crm_attribute -D -n is-managed-default")
 
         if not managed.lookforall():
             self.CM.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not enabled")
 
         self.CM.cluster_stable()
 
         # Ignore actions for STONITH resources
         ignore = []
         (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rclass == "stonith":
                     self.CM.debug("Ignoring: crmd:.*Performing.*op=%s_.*_0" % r.id)
                     ignore.append("crmd:.*Performing.*op=%s_.*_0" % r.id)
         
         if self.local_badnews("ResourceActivity:", watch, ignore):
             return self.failure("Resources stopped or started after resource management was re-enabled")
 
         return ret
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ 
             "You may ignore this error if it is unmanaged.",
             "pingd: .*ERROR: send_ipc_message:",
             "pingd: .*ERROR: send_update:",
             "lrmd: .*ERROR: notify_client:",
             ]
 
     def is_applicable(self):
         if self.CM["Name"] == "crm-lha":
             return None
         return 1
 
 AllTestClasses.append(Reattach)
 
 ####################################################################
 class SpecialTest1(CTSTest):
 ####################################################################
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SpecialTest1"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, node):
         '''Perform the 'SpecialTest1' test for Andrew. '''
         self.incr("calls")
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Could not stop all nodes")
 
         #        Start the selected node
         ret = self.restart1(node)
         if not ret:
             return self.failure("Could not start "+node)
 
         #        Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Could not start the remaining nodes")
 
         return self.success()
 
 AllTestClasses.append(SpecialTest1)
 
 ####################################################################
 class HAETest(CTSTest):
 ####################################################################
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="HAETest"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_loop = 1
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret: 
             return self.failure("Couldn't stop all nodes")
         return self.success()
 
     def wait_on_state(self, node, resource, expected_clones, attempts=240):
         while attempts > 0:
             active=0
             (rc, lines) = self.CM.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None)
 
             # Hack until crm_resource does the right thing
             if rc == 0 and lines:
                 active = len(lines)
                 
             if len(lines) == expected_clones:
                 return 1
                 
             elif rc == 1:
                 self.CM.debug("Resource %s is still inactive" % resource)
 
             elif rc == 234:
                 self.CM.log("Unknown resource %s" % resource)
                 return 0
 
             elif rc == 246:
                 self.CM.log("Cluster is inactive")
                 return 0
 
             elif rc != 0:
                 self.CM.log("Call to crm_resource failed, rc=%d" % rc)
                 return 0
 
             else:
                 self.CM.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones))
 
             attempts -= 1
             time.sleep(1)
 
         return 0
 
     def find_dlm(self, node):
         self.r_dlm = None
 
         (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "controld" and r.parent != "NA":
                     self.CM.debug("Found dlm: %s" % self.r_dlm)
                     self.r_dlm = r.parent
                     return 1
         return 0
 
     def find_hae_resources(self, node):
         self.r_dlm = None
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         if self.find_dlm(node):
             self.find_ocfs2_resources(node)
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         if self.CM.Env["Schema"] == "hae":
             return 1
         return None
 
 ####################################################################
 class HAERoleTest(HAETest):
 ####################################################################
     def __init__(self, cm):
         '''Lars' mount/unmount test for the HA extension. '''
         HAETest.__init__(self,cm)
         self.name="HAERoleTest"
 
     def change_state(self, node, resource, target):
         rc = self.CM.rsh(node, "crm_resource -r %s -p target-role -v %s  --meta" % (resource, target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
         lpc = 0
         failed = 0
         delay = 2
         done=time.time() + self.CM.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.CM.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "Stopped")
             if not self.wait_on_state(node, self.r_dlm, 0):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "Started")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
             
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAERoleTest)
 
 ####################################################################
 class HAEStandbyTest(HAETest):
 ####################################################################
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         HAETest.__init__(self,cm)
         self.name="HAEStandbyTest"
 
     def change_state(self, node, resource, target):
         rc = self.CM.rsh(node, "crm_standby -l reboot -v %s" % (target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
 
         lpc = 0
         failed = 0
         done=time.time() + self.CM.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.CM.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "true")
             if not self.wait_on_state(node, self.r_dlm, clone_max-1):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "false")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
             
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAEStandbyTest)
 
 ###################################################################
 class NearQuorumPointTest(CTSTest):
 ###################################################################
     '''
     This test brings larger clusters near the quorum point (50%).
     In addition, it will test doing starts and stops at the same time.
 
     Here is how I think it should work:
     - loop over the nodes and decide randomly which will be up and which
       will be down  Use a 50% probability for each of up/down.
     - figure out what to do to get into that state from the current state
     - in parallel, bring up those going up  and bring those going down.
     '''
     
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="NearQuorumPoint"
 
     def __call__(self, dummy):
         '''Perform the 'NearQuorumPoint' test. '''
         self.incr("calls")
         startset = []
         stopset = []
        
         #decide what to do with each node
         for node in self.CM.Env["nodes"]:
             action = self.CM.Env.RandomGen.choice(["start","stop"])
             #action = self.CM.Env.RandomGen.choice(["start","stop","no change"])
             if action == "start" :
                 startset.append(node)
             elif action == "stop" :
                 stopset.append(node)
                 
         self.CM.debug("start nodes:" + repr(startset))
         self.CM.debug("stop nodes:" + repr(stopset))
 
         #add search patterns
         watchpats = [ ]
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 watchpats.append(self.CM["Pat:We_stopped"] % node)
                 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 #watchpats.append(self.CM["Pat:Slave_started"] % node)
                 watchpats.append(self.CM["Pat:Local_started"] % node)
             else:
                 for stopping in stopset:
                     if self.CM.ShouldBeStatus[stopping] == "up":
                         watchpats.append(self.CM["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping)))
                 
         if len(watchpats) == 0:
             return self.skipped()
 
         if len(startset) != 0:
             watchpats.append(self.CM["Pat:DC_IDLE"])
 
         watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
         
         watch.setwatch()
         
         #begin actions
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
                 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.CM.StartaCMnoBlock(node)
         
         #get the result        
         if watch.lookforall():
             self.CM.cluster_stable()
             return self.success()
 
         self.CM.log("Warn: Patterns not found: " + repr(watch.unmatched))
         
         #get the "bad" nodes
         upnodes = []        
         for node in stopset:
             if self.CM.StataCM(node) == 1:
                 upnodes.append(node)
         
         downnodes = []
         for node in startset:
             if self.CM.StataCM(node) == 0:
                 downnodes.append(node)
 
         if upnodes == [] and downnodes == []:
             self.CM.cluster_stable()
 
             # Make sure they're completely down with no residule
             for node in stopset:
                 self.CM.rsh(node, self.CM["StopCmd"])
 
             return self.success()
 
         if len(upnodes) > 0:
             self.CM.log("Warn: Unstoppable nodes: " + repr(upnodes))
         
         if len(downnodes) > 0:
             self.CM.log("Warn: Unstartable nodes: " + repr(downnodes))
         
         return self.failure()
 
 AllTestClasses.append(NearQuorumPointTest)
 
 ###################################################################
 class RollingUpgradeTest(CTSTest):
 ###################################################################
     '''Perform a rolling upgrade of the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="RollingUpgrade"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.CM.Env["nodes"]:
             if not self.downgrade(node, None):
                 return self.failure("Couldn't downgrade %s" % node)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret: 
             return self.failure("Couldn't stop all nodes")
 
         for node in self.CM.Env["nodes"]:
             if not self.upgrade(node, None):
                 return self.failure("Couldn't upgrade %s" % node)
 
         return self.success()
 
     def install(self, node, version, start=1, flags="--force"):
 
         target_dir = "/tmp/rpm-%s" % version
         src_dir = "%s/%s" % (self.CM.Env["rpm-dir"], version)
 
         self.CM.log("Installing %s on %s with %s" % (version, node, flags))
         if not self.stop(node):
             return self.failure("stop failure: "+node)
 
         rc = self.CM.rsh(node, "mkdir -p %s" % target_dir)
         rc = self.CM.rsh(node, "rm -f %s/*.rpm" % target_dir)
         (rc, lines) = self.CM.rsh(node, "ls -1 %s/*.rpm" % src_dir, None)
         for line in lines:
             line = line[:-1]
             rc = self.CM.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir))
         rc = self.CM.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir))
 
         if start and not self.start(node):
             return self.failure("start failure: "+node)
 
         return self.success()
 
     def upgrade(self, node, start=1):
         return self.install(node, self.CM.Env["current-version"], start)
 
     def downgrade(self, node, start=1):
         return self.install(node, self.CM.Env["previous-version"], start, "--force --nodeps")
 
     def __call__(self, node):
         '''Perform the 'Rolling Upgrade' test. '''
         self.incr("calls")
 
         for node in self.CM.Env["nodes"]:
             if self.upgrade(node):
                 return self.failure("Couldn't upgrade %s" % node)
 
             self.CM.cluster_stable()
 
         return self.success()
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return None
 
         if not self.CM.Env.has_key("rpm-dir"):
             return None
         if not self.CM.Env.has_key("current-version"):
             return None
         if not self.CM.Env.has_key("previous-version"):
             return None
 
         return 1
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RollingUpgradeTest)
 
 ###################################################################
 class BSC_AddResource(CTSTest):
 ###################################################################
     '''Add a resource to the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name="AddResource"
         self.resource_offset = 0
         self.cib_cmd="""cibadmin -C -o %s -X '%s' """
 
     def __call__(self, node):
         self.incr("calls")
         self.resource_offset =         self.resource_offset  + 1
 
         r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset)
         start_pat = "crmd.*%s_start_0.*confirmed.*ok"
 
         patterns = []
         patterns.append(start_pat % r_id)
 
         watch = self.create_watch(patterns, self.CM["DeadTime"])
         watch.setwatch()
 
         fields = string.split(self.CM.Env["IPBase"], '.')
         fields[3] = str(int(fields[3])+1)
         ip = string.join(fields, '.')
         self.CM.Env["IPBase"] = ip
 
         if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip):
             return self.failure("Make resource %s failed" % r_id)
 
         failed = 0
         watch_result = watch.lookforall()
         if watch.unmatched:
             for regex in watch.unmatched:
                 self.CM.log ("Warn: Pattern not found: %s" % (regex))
                 failed = 1
 
         if failed:
             return self.failure("Resource pattern(s) not found")
 
         if not self.CM.cluster_stable(self.CM["DeadTime"]):
             return self.failure("Unstable cluster")
 
         return self.success()
 
     def make_ip_resource(self, node, id, rclass, type, ip):
         self.CM.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node))
         rsc_xml="""
 <primitive id="%s" class="%s" type="%s"  provider="heartbeat">
     <instance_attributes id="%s"><attributes>
         <nvpair id="%s" name="ip" value="%s"/>
     </attributes></instance_attributes>
 </primitive>""" % (id, rclass, type, id, id, ip)
 
         node_constraint="""
       <rsc_location id="run_%s" rsc="%s">
         <rule id="pref_run_%s" score="100">
           <expression id="%s_loc_expr" attribute="#uname" operation="eq" value="%s"/>
         </rule>
       </rsc_location>""" % (id, id, id, id, node)
 
         rc = 0
         (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("constraints", node_constraint), None)
         if rc != 0:
             self.CM.log("Constraint creation failed: %d" % rc)
             return None
 
         (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("resources", rsc_xml), None)
         if rc != 0:
             self.CM.log("Resource creation failed: %d" % rc)
             return None
 
         return 1
 
     def is_applicable(self):
         if self.CM.Env["DoBSC"]:
             return 1
         return None
 
 AllTestClasses.append(BSC_AddResource)
 
 class SimulStopLite(CTSTest):
 ###################################################################
     '''Stop any active nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SimulStopLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStopLite' setup work. '''
         self.incr("calls")
 
         self.CM.debug("Setup: " + self.name)
 
         #     We ignore the "node" parameter...
         watchpats = [ ]
 
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.incr("WasStarted")
                 watchpats.append(self.CM["Pat:We_stopped"] % node)
                 #if self.CM.Env["use_logd"]:
                 #    watchpats.append(self.CM["Pat:Logd_stopped"] % node)
 
         if len(watchpats) == 0:
             self.CM.clear_all_caches()
             return self.success()
 
         #     Stop all the nodes - at about the same time...
         watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
 
         watch.setwatch()
         self.set_timer()
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
         if watch.lookforall():
             self.CM.clear_all_caches()
 
             # Make sure they're completely down with no residule
             for node in self.CM.Env["nodes"]:
                 self.CM.rsh(node, self.CM["StopCmd"])
 
             return self.success()
 
         did_fail=0
         up_nodes = []
         for node in self.CM.Env["nodes"]:
             if self.CM.StataCM(node) == 1:
                 did_fail=1
                 up_nodes.append(node)
 
         if did_fail:
             return self.failure("Active nodes exist: " + repr(up_nodes))
 
         self.CM.log("Warn: All nodes stopped but CTS didnt detect: " 
                     + repr(watch.unmatched))
 
         self.CM.clear_all_caches()
         return self.failure("Missing log message: "+repr(watch.unmatched))
 
     def is_applicable(self):
         '''SimulStopLite is a setup test and never applicable'''
         return 0
 
 ###################################################################
 class SimulStartLite(CTSTest):
 ###################################################################
     '''Start any stopped nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SimulStartLite"
         
     def __call__(self, dummy):
         '''Perform the 'SimulStartList' setup work. '''
         self.incr("calls")
         self.CM.debug("Setup: " + self.name)
 
         #        We ignore the "node" parameter...
         watchpats = [ ]
 
         uppat = self.CM["Pat:Slave_started"]
         if self.CM.upcount() == 0:
             uppat = self.CM["Pat:Local_started"]
 
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.incr("WasStopped")
                 watchpats.append(uppat % node)
         
         if len(watchpats) == 0:
             return self.success()
 
         watchpats.append(self.CM["Pat:DC_IDLE"])
         
         #        Start all the nodes - at about the same time...
         watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
 
         watch.setwatch()
 
         self.set_timer()
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.CM.StartaCMnoBlock(node)
         if watch.lookforall():
             for attempt in (1, 2, 3, 4, 5):
                 if self.CM.cluster_stable():
                     return self.success()
             return self.failure("Cluster did not stabilize") 
                 
         did_fail=0
         unstable = []
         for node in self.CM.Env["nodes"]:
             if self.CM.StataCM(node) == 0:
                 did_fail=1
                 unstable.append(node)
                 
         if did_fail:
             return self.failure("Unstarted nodes exist: " + repr(unstable))
 
         unstable = []
         for node in self.CM.Env["nodes"]:
             if not self.CM.node_stable(node):
                 did_fail=1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstable cluster nodes exist: " 
                                 + repr(unstable))
 
         self.CM.log("ERROR: All nodes started but CTS didnt detect: " 
                     + repr(watch.unmatched))
         return self.failure() 
 
 
     def is_applicable(self):
         '''SimulStartLite is a setup test and never applicable'''
         return 0
 
 def TestList(cm, audits):
     result = []
     for testclass in AllTestClasses:
         bound_test = testclass(cm)
         if bound_test.is_applicable():
             bound_test.Audits = audits
             result.append(bound_test)
     return result
 
 # vim:ts=4:sw=4:et:
diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Cman.xml b/doc/Clusters_from_Scratch/en-US/Ap-Cman.xml
index e70080312e..af27515cab 100644
--- a/doc/Clusters_from_Scratch/en-US/Ap-Cman.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ap-Cman.xml
@@ -1,131 +1,125 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
 %BOOK_ENTITIES;
 ]>
 <appendix id="ap-cman">
   <title>Using CMAN for Cluster Membership and Quorum</title>
 	
   <section>
     <title>Background</title>
     <para>
       <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html-single/Cluster_Suite_Overview/index.html#s2-clumembership-overview-CSO">CMAN v3</ulink> is a Corsync plugin that monitors the names and number of active cluster nodes in order to deliver membership and quorum information to clients (such as the Pacemaker daemons).
     </para>
     <para>
       In a traditional Corosync-Pacemaker cluster, a Pacemaker plugin is loaded to provide membership and quorum information. 
       The motivation for wanting to use CMAN for this instead, is to ensure all elements of the cluster stack are making decisions based on the same membership and quorum data.
       <footnote>
 	<para>
 	  A failure to do this can lead to what is called <literal>internal split-brain</literal> - a situation where different parts of the stack disagree about whether some nodes are alive or dead - which quickly leads to unnecssary down-time and/or data corruption.
 	</para>
       </footnote>
     </para>
     <para>
       CMAN has been around longer than Pacemaker and is part of the Red Hat cluster stack, so it is available and supported by many distributions and other pieces of software (such as OCFS2 and GFS2).
       For this reason it makes sense to support it.
     </para>
   </section>
   <section>
     <title>Adding CMAN Support</title>
       <warning>
 	<para>
 	  Be sure to disable the Pacemaker plugin before continuing with this section.
 	  In most cases, this can be achieved by removing <filename>/etc/corosync/service.d/pcmk</filename> and stopping Corosync.
 	</para>
       </warning>
     <section>
       <title>Adding CMAN Support - cluster.conf</title>
       <para>
 	The preferred approach for enabling CMAN is to configure <filename>cluster.conf</filename> and use the <filename>/etc/init.d/cman</filename> script to start Corosync.
 	Its far easier to maintain and start automatically starts the necessary pieces for using GFS2.
       </para>
       <para>
 	You can find some documentation on
 	<ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Cluster_Administration/s1-creating-cluster-cli-CA.html">Installing CMAN and Creating a Basic Cluster Configuration File</ulink>
 	at the Red Hat website.
 	However please ignore the parts about <literal>Fencing</literal>, <literal>Failover Domains</literal>, or <literal>HA Services</literal> and anything to do with <literal>rgmanager</literal> and <literal>fenced</literal>.
 	All these continue to be handled by Pacemaker in the normal manner.
       </para>
       <example>
 	<title>Sample cluster.conf for a two-node cluster</title>
 	<programlisting>
 	<![CDATA[
 <?xml version="1.0"?>
 <cluster config_version="1" name="beekhof">
-  <fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
+  <logging debug="true"/>
   <clusternodes>
-    <clusternode name="pcmk-1" nodeid="1">
-      <fence/>
-    </clusternode>
-    <clusternode name="pcmk-2" nodeid="2">
-      <fence/>
-    </clusternode>
+    <clusternode name="pcmk-1" nodeid="1"/>
+    <clusternode name="pcmk-2" nodeid="2"/>
   </clusternodes>
-  <cman/>
-  <fencedevices/>
-  <rm/>
+  <cman two_node="1" expected_votes="1"/>
 </cluster>
 	]]>
 	</programlisting>
       </example>
     </section>
     <section>
       <title>Adding CMAN Support - corosync.conf</title>
       <para>
 	The alternative is to add the necessary cman configuration elements to <filename>corosync.conf</filename>.
 	We recommend you place these directives in <filename>/etc/corosync/service.d/cman</filename> as they will differ between machines.
       </para>
       <para>
 	If you choose this approach, you would continue to start and stop Corosync with it's init script as previously described in this document.
       </para>
       
       <example>
 	<title>Sample corosync.conf extensions for a two-node cluster</title>
 	<programlisting>
 [root@pcmk-1 ~]# <userinput>cat &lt;&lt;-END &gt;&gt;/etc/corosync/service.d/cman</userinput>
 cluster {
     name: beekhof
 
     clusternodes {
             clusternode {
                     votes: 1
                     nodeid: 1
                     name: pcmk-1
             }
             clusternode {
                     votes: 1
                     nodeid: 2
                     name: pcmk-2
             }
     }
     cman {
             expected_votes: 2
             cluster_id: 123
             nodename: `uname -n`
             two_node: 1
             max_queued: 10
     }
 }
 
 service {
     name: corosync_cman
     ver: 0
 }
 
 quorum {
     provider: quorum_cman
 }
 END
 	</programlisting>
       </example>
 
       <warning>
 	<para>
 	  Verify that <literal>nodename</literal> was set appropriately on each host.
 	</para>
       </warning>
 
     </section>
   </section>
 </appendix>
 
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Active-Active.xml b/doc/Clusters_from_Scratch/en-US/Ch-Active-Active.xml
index 329cfd7178..8f709287dd 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Active-Active.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Active-Active.xml
@@ -1,721 +1,721 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
 %BOOK_ENTITIES;
 ]>
 <chapter>
   <title>Conversion to Active/Active</title>
   <section>
     <title>Requirements</title>
     <para>
       The primary requirement for an Active/Active cluster is that the data required for your services are available, simultaneously, on both machines.
       Pacemaker makes no requirement on how this is achieved, you could use a SAN if you had one available, however since DRBD supports multiple Primaries, we can also use that.
     </para>
     <para>
       The only hitch is that we need to use a cluster-aware filesystem (and the one we used earlier with DRBD, ext4, is not one of those).
       Both OCFS2 and GFS2 are supported, however here we will use GFS2 which comes with &DISTRO; &DISTRO_VERSION; .
     </para>
   </section>
   <section>
     <title>Install a Cluster Filesystem - GFS2</title>
     <para>
-      The first thing to do is install gfs2-utils on each machine.
+      The first thing to do is install gfs2-utils and gfs2-cluster on each machine.
     </para>
     <screen>
-[root@pcmk-1 ~]# <userinput>yum install -y gfs2-utils gfs-pcmk</userinput>
+[root@pcmk-1 ~]# <userinput>yum install -y gfs2-utils gfs2-cluster gfs-pcmk</userinput>
 Setting up Install Process
 Resolving Dependencies
 --&gt; Running transaction check
 ---&gt; Package gfs-pcmk.x86_64 0:3.0.5-2.fc12 set to be updated
 --&gt; Processing Dependency: libSaCkpt.so.3(OPENAIS_CKPT_B.01.01)(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
 --&gt; Processing Dependency: dlm-pcmk for package: gfs-pcmk-3.0.5-2.fc12.x86_64
 --&gt; Processing Dependency: libccs.so.3()(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
 --&gt; Processing Dependency: libdlmcontrol.so.3()(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
 --&gt; Processing Dependency: liblogthread.so.3()(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
 --&gt; Processing Dependency: libSaCkpt.so.3()(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
 ---&gt; Package gfs2-utils.x86_64 0:3.0.5-2.fc12 set to be updated
 --&gt; Running transaction check
 ---&gt; Package clusterlib.x86_64 0:3.0.5-2.fc12 set to be updated
 ---&gt; Package dlm-pcmk.x86_64 0:3.0.5-2.fc12 set to be updated
 ---&gt; Package openaislib.x86_64 0:1.1.0-1.fc12 set to be updated
 --&gt; Finished Dependency Resolution
 
 Dependencies Resolved
 
 ===========================================================================================
  Package                Arch               Version                   Repository        Size
 ===========================================================================================
 Installing:
  gfs-pcmk               x86_64             3.0.5-2.fc12              custom           101 k
  gfs2-utils             x86_64             3.0.5-2.fc12              custom           208 k
 Installing for dependencies:
  clusterlib             x86_64             3.0.5-2.fc12              custom            65 k
  dlm-pcmk               x86_64             3.0.5-2.fc12              custom            93 k
  openaislib             x86_64             1.1.0-1.fc12              fedora            76 k
 
 Transaction Summary
 ===========================================================================================
 Install       5 Package(s)
 Upgrade       0 Package(s)
 
 Total download size: 541 k
 Downloading Packages:
 (1/5): clusterlib-3.0.5-2.fc12.x86_64.rpm                                |  65 kB     00:00
 (2/5): dlm-pcmk-3.0.5-2.fc12.x86_64.rpm                                  |  93 kB     00:00
 (3/5): gfs-pcmk-3.0.5-2.fc12.x86_64.rpm                                  | 101 kB     00:00
 (4/5): gfs2-utils-3.0.5-2.fc12.x86_64.rpm                                | 208 kB     00:00
 (5/5): openaislib-1.1.0-1.fc12.x86_64.rpm                                |  76 kB     00:00
 -------------------------------------------------------------------------------------------
 Total                                                           992 kB/s | 541 kB     00:00
 Running rpm_check_debug
 Running Transaction Test
 Finished Transaction Test
 Transaction Test Succeeded
 Running Transaction
   Installing     : clusterlib-3.0.5-2.fc12.x86_64                                       1/5 
   Installing     : openaislib-1.1.0-1.fc12.x86_64                                       2/5 
   Installing     : dlm-pcmk-3.0.5-2.fc12.x86_64                                         3/5 
   Installing     : gfs-pcmk-3.0.5-2.fc12.x86_64                                         4/5 
   Installing     : gfs2-utils-3.0.5-2.fc12.x86_64                                       5/5 
 
 Installed:
   gfs-pcmk.x86_64 0:3.0.5-2.fc12                    gfs2-utils.x86_64 0:3.0.5-2.fc12
 
 Dependency Installed:
   clusterlib.x86_64 0:3.0.5-2.fc12   dlm-pcmk.x86_64 0:3.0.5-2.fc12 
   openaislib.x86_64 0:1.1.0-1.fc12  
 
 Complete!
 [root@pcmk-1 x86_64]#
     </screen>
     <warning>
       <para>
 	If this step fails, it is likely that your version/distribution does not ship the "Pacemaker" versions of dlm_controld and/or gfs_controld.
 	Normally these files would be called <filename>dlm_controld.pcmk</filename> and <filename>gfs_controld.pcmk</filename> and live in the <filename>/usr/sbin</filename> directory.
       </para>			  
       <para>
 	If you cannot locate an installation source for these files, you will need to install a package called <literal>cman</literal> and reconfigure Corosync to use it as outlined in <xref linkend="ap-cman"/>.
       </para>
       <para>
-	When using CMAN, you can skip <xref linkend="gfs-integration"/> where <literal>dlm-clone</literal> and <literal>gfs-clone</literal> are created, and proceed directly to <xref linkend="gfs-create-filesystem"/>.
+	When using CMAN, you can skip <xref linkend="gfs-integration"/> where <literal>dlm-clone</literal> and <literal>gfs-clone</literal> are created, and proceed directly to <xref linkend="gfs-create-filesystem"/> after ensuring that <literal>gfs2-utils</literal> and <literal>gfs2-cluster</literal> were installed.
       </para>
     </warning>
   </section>
 	
 	<section id="gfs-integration">
 		<title>Setup Pacemaker-GFS2 Integration</title>
 		<para>
 			GFS2 needs two services to be running, the first is the user-space interface to the kernel’s distributed lock manager (DLM). The DLM is used to co-ordinate which node(s) can access a given file (and when) and integrates with Pacemaker to obtain node membership <footnote>
 			<para>
 				The list of nodes the cluster considers to be available
 			</para>
 			</footnote> information and fencing capabilities.
 		</para>
 		<para>
 			The second service is GFS2’s own control daemon which also integrates with Pacemaker to obtain node membership data.
 		</para>
 		<section>
 			<title>Add the DLM service</title>
 			<para>
 				The DLM control daemon needs to run on all active cluster nodes, so we will use the shells interactive mode to create a cloned resource.
 			</para>
 			
 <screen>
 [root@pcmk-1 ~]# <userinput>crm</userinput>
 crm(live)# <userinput>cib new stack-glue</userinput>
 INFO: stack-glue shadow CIB created
 crm(stack-glue)# <userinput>configure primitive dlm ocf:pacemaker:controld op monitor interval=120s</userinput>
 crm(stack-glue)# <userinput>configure clone dlm-clone dlm meta interleave=true</userinput>
 crm(stack-glue)# <userinput>configure show xml</userinput>
 crm(stack-glue)# <userinput>configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebData ocf:linbit:drbd \
         params drbd_resource="wwwdata" \
         op monitor interval="60s"
 primitive WebFS ocf:heartbeat:Filesystem \
         params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 <emphasis>primitive dlm ocf:pacemaker:controld \</emphasis>
 <emphasis> op monitor interval="120s"</emphasis>
 ms WebDataClone WebData \
         meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
 <emphasis>clone dlm-clone dlm \</emphasis>
 <emphasis> meta interleave="true"</emphasis>
 location prefer-pcmk-1 WebSite 50: pcmk-1
 colocation WebSite-with-WebFS inf: WebSite WebFS
 colocation fs_on_drbd inf: WebFS WebDataClone:Master
 colocation website-with-ip inf: WebSite ClusterIP
 order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
 order WebSite-after-WebFS inf: WebFS WebSite
 order apache-after-ip inf: ClusterIP WebSite
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes=”2” \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness=”100”
 </screen>
 			<note>
 			  <para>
 			    TODO: Explain the meaning of the interleave option
 			  </para>
 			</note>
 			<para>
 			  Review the configuration before uploading it to the cluster, quitting the shell and watching the cluster’s response
 			</para>
 			<screen>
 crm(stack-glue)# <userinput>cib commit stack-glue</userinput>
 INFO: commited 'stack-glue' shadow CIB to the cluster
 crm(stack-glue)# <userinput>quit</userinput>
 bye
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Thu Sep  3 20:49:54 2009
 Stack: openais
 Current DC: pcmk-2 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 5 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 WebSite (ocf::heartbeat:apache):        Started pcmk-2
 Master/Slave Set: WebDataClone
         Masters: [ pcmk-1 ]
         Slaves: [ pcmk-2 ]
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-2
 <emphasis>Clone Set: dlm-clone</emphasis>
 <emphasis> Started: [ pcmk-2 pcmk-1 ]</emphasis>
 WebFS   (ocf::heartbeat:Filesystem):    Started pcmk-2
 			</screen>
 		</section>
 		
 		<section>
 			<title>Add the GFS2 service</title>
 			<para>
 				Once the DLM is active, we can add the GFS2 control daemon.
 			</para>
 			<para>
 				Use the crm shell to create the gfs-control cluster resource:
 			</para>
 			
 <screen>
 [root@pcmk-1 ~]# <userinput>crm</userinput>
 crm(live)# <userinput>cib new gfs-glue --force</userinput>
 INFO: gfs-glue shadow CIB created
 crm(gfs-glue)# <userinput>configure primitive gfs-control ocf:pacemaker:controld params daemon=gfs_controld.pcmk args="-g 0" op monitor interval=120s</userinput>
 crm(gfs-glue)# <userinput>configure clone gfs-clone gfs-control meta interleave=true</userinput>
 </screen>
 			<para>
 				Now ensure Pacemaker only starts the gfs-control service on nodes that also have a copy of the dlm service (created above) already running
 			</para>
 			
 <screen>
 crm(gfs-glue)# <userinput>configure colocation gfs-with-dlm INFINITY: gfs-clone dlm-clone</userinput>
 crm(gfs-glue)# <userinput>configure order start-gfs-after-dlm mandatory: dlm-clone gfs-clone</userinput>
 </screen>
 			<para>
 				Review the configuration before uploading it to the cluster, quitting the shell and watching the cluster’s response
 			</para>
 			
 <screen>
 crm(gfs-glue)# <userinput>configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebData ocf:linbit:drbd \
         params drbd_resource="wwwdata" \
         op monitor interval="60s"
 primitive WebFS ocf:heartbeat:Filesystem \
         params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 primitive dlm ocf:pacemaker:controld \
         op monitor interval="120s"
 <emphasis>primitive gfs-control ocf:pacemaker:controld \</emphasis>
 <emphasis> params daemon=”gfs_controld.pcmk” args=”-g 0” \</emphasis>
 <emphasis> op monitor interval="120s"</emphasis>
 ms WebDataClone WebData \
         meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
 clone dlm-clone dlm \
         meta interleave="true"
 <emphasis>clone gfs-clone gfs-control \</emphasis>
 <emphasis> meta interleave="true"</emphasis>
 location prefer-pcmk-1 WebSite 50: pcmk-1
 colocation WebSite-with-WebFS inf: WebSite WebFS
 colocation fs_on_drbd inf: WebFS WebDataClone:Master
 <emphasis>colocation gfs-with-dlm inf: gfs-clone dlm-clone</emphasis>
 colocation website-with-ip inf: WebSite ClusterIP
 order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
 order WebSite-after-WebFS inf: WebFS WebSite
 order apache-after-ip inf: ClusterIP WebSite
 <emphasis>order start-gfs-after-dlm inf: dlm-clone gfs-clone</emphasis>
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes=”2” \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness=”100”
 crm(gfs-glue)# <userinput>cib commit gfs-glue</userinput>
 INFO: commited 'gfs-glue' shadow CIB to the cluster
 crm(gfs-glue)# <userinput>quit</userinput>
 bye
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Thu Sep  3 20:49:54 2009
 Stack: openais
 Current DC: pcmk-2 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 6 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 WebSite (ocf::heartbeat:apache):        Started pcmk-2
 Master/Slave Set: WebDataClone
         Masters: [ pcmk-1 ]
         Slaves: [ pcmk-2 ]
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-2
 Clone Set: dlm-clone
         Started: [ pcmk-2 pcmk-1 ]
 <emphasis>Clone Set: gfs-clone</emphasis>
 <emphasis> Started: [ pcmk-2 pcmk-1 ]</emphasis>
 WebFS   (ocf::heartbeat:Filesystem):    Started pcmk-1
 </screen>
 		</section>
 
 	</section>
 	
 	<section id="gfs-create-filesystem">
 		<title>Create a GFS2 Filesystem</title>
 		<section>
 			<title>Preparation</title>
 			<para>
 				Before we do anything to the existing partition, we need to make sure it is unmounted. We do this by tell the cluster to stop the WebFS resource. This will ensure that other resources (in our case, Apache) using WebFS are not only stopped, but stopped in the correct order.
 			</para>
 			
 <screen>
 [root@pcmk-1 ~]# <userinput>crm_resource --resource WebFS --set-parameter target-role --meta --parameter-value Stopped</userinput>
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Thu Sep  3 15:18:06 2009
 Stack: openais
 Current DC: pcmk-1 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 6 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 Master/Slave Set: WebDataClone
         Masters: [ pcmk-1 ]
         Slaves: [ pcmk-2 ]
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-1
 Clone Set: dlm-clone
         Started: [ pcmk-2 pcmk-1 ]
 Clone Set: gfs-clone
         Started: [ pcmk-2 pcmk-1 ]
 </screen>
 			<note>
 				<para>
 					Note that both Apache and WebFS have been stopped.
 				</para>
 			</note>
 		</section>
 		
 		<section>
 			<title>Create and Populate an GFS2 Partition</title>
 			<para>
 				Now that the cluster stack and integration pieces are running smoothly, we can create an GFS2 partition.
 			</para>
 			<warning>
 				<para>
 					This will erase all previous content stored on the DRBD device. Ensure you have a copy of any important data.
 				</para>
 			</warning>
 			<para>
 				We need to specify a number of additional parameters when creating a GFS2 partition.
 			</para>
 			<para>
 				First we must use the -p option to specify that we want to use the the Kernel’s DLM. Next we use -j to indicate that it should reserve enough space for two journals (one per node accessing the filesystem).
 			</para>
 			<para>
 				Lastly, we use -t to specify the lock table name. The format for this field is clustername:fsname. For the fsname, we just need to pick something unique and descriptive and since we haven’t specified a clustername yet, we will use the default (pcmk).
 			</para>
 			<para>
 				To specify an alternate name for the cluster, locate the service section containing “name: pacemaker” in corosync.conf and insert the following line anywhere inside the block:
 			</para>
 			<para>
 				clustername: myname
 			</para>
 			<para>
 				Do this on each node in the cluster and be sure to restart them before continuing.
 			</para>
 			
 <screen>
 [root@pcmk-1 ~]# <userinput>mkfs.gfs2 -p lock_dlm -j 2 -t pcmk:web /dev/drbd1</userinput>
 This will destroy any data on /dev/drbd1.
 It appears to contain: data
 
 Are you sure you want to proceed? [y/n] y
 
 Device:                    /dev/drbd1
 Blocksize:                 4096
 Device Size                1.00 GB (131072 blocks)
 Filesystem Size:           1.00 GB (131070 blocks)
 Journals:                  2
 Resource Groups:           2
 Locking Protocol:          "lock_dlm"
 Lock Table:                "pcmk:web"
 UUID:                      6B776F46-177B-BAF8-2C2B-292C0E078613
 
 [root@pcmk-1 ~]#
 </screen>
 			<para>
 				Then (re)populate the new filesystem with data (web pages). For now we’ll create another variation on our home page.
 			</para>
 			
 <screen>
 [root@pcmk-1 ~]# <userinput>mount /dev/drbd1 /mnt/</userinput>
 [root@pcmk-1 ~]# <userinput>cat &lt;&lt;-END &gt;/mnt/index.html</userinput>
 &lt;html&gt;
 &lt;body&gt;My Test Site - GFS2&lt;/body&gt;
 &lt;/html&gt;
 END
 [root@pcmk-1 ~]# <userinput>umount /dev/drbd1</userinput>
 [root@pcmk-1 ~]# <userinput>drbdadm verify wwwdata</userinput>
 [root@pcmk-1 ~]#
 </screen>
 		</section>
 
 	</section>
 	
 	<section>
 		<title>Reconfigure the Cluster for GFS2</title>
 		
 <screen>
 [root@pcmk-1 ~]# <userinput>crm</userinput>
 crm(live)# <userinput>cib new GFS2</userinput>
 INFO: GFS2 shadow CIB created
 crm(GFS2)# <userinput>configure delete WebFS</userinput>
 crm(GFS2)# <userinput>configure primitive WebFS ocf:heartbeat:Filesystem params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”</userinput>
 </screen>
 		<para>
 			Now that we’ve recreated the resource, we also need to recreate all the constraints that used it. This is because the shell will automatically remove any constraints that referenced WebFS.
 		</para>
 		
 <screen>
 crm(GFS2)# <userinput>configure colocation WebSite-with-WebFS inf: WebSite WebFS</userinput>
 crm(GFS2)# <userinput>configure colocation fs_on_drbd inf: WebFS WebDataClone:Master</userinput>
 crm(GFS2)# <userinput>configure order WebFS-after-WebData inf: WebDataClone:promote WebFS:start</userinput>
 crm(GFS2)# <userinput>configure order WebSite-after-WebFS inf: WebFS WebSite</userinput>
 crm(GFS2)# <userinput>configure colocation WebFS-with-gfs-control INFINITY: WebFS gfs-clone</userinput>
 crm(GFS2)# <userinput>configure order start-WebFS-after-gfs-control mandatory: gfs-clone WebFS</userinput>
 crm(GFS2)# <userinput>configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebData ocf:linbit:drbd \
         params drbd_resource="wwwdata" \
         op monitor interval="60s"
 <emphasis>primitive WebFS ocf:heartbeat:Filesystem \</emphasis>
 <emphasis> params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”</emphasis>
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 primitive dlm ocf:pacemaker:controld \
         op monitor interval="120s"
 primitive gfs-control ocf:pacemaker:controld \
    params daemon=”gfs_controld.pcmk” args=”-g 0” \
         op monitor interval="120s"
 ms WebDataClone WebData \
         meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
 clone dlm-clone dlm \
         meta interleave="true"
 clone gfs-clone gfs-control \
         meta interleave="true"
 colocation WebFS-with-gfs-control inf: WebFS gfs-clone
 colocation WebSite-with-WebFS inf: WebSite WebFS
 colocation fs_on_drbd inf: WebFS WebDataClone:Master
 colocation gfs-with-dlm inf: gfs-clone dlm-clone
 colocation website-with-ip inf: WebSite ClusterIP
 order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
 order WebSite-after-WebFS inf: WebFS WebSite
 order apache-after-ip inf: ClusterIP WebSite
 order start-WebFS-after-gfs-control inf: gfs-clone WebFS
 order start-gfs-after-dlm inf: dlm-clone gfs-clone
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes=”2” \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness=”100”
 </screen>
 		<para>
 			Review the configuration before uploading it to the cluster, quitting the shell and watching the cluster’s response
 		</para>
 		
 <screen>
 crm(GFS2)# <userinput>cib commit GFS2</userinput>
 INFO: commited 'GFS2' shadow CIB to the cluster
 crm(GFS2)# <userinput>quit</userinput>
 bye
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Thu Sep  3 20:49:54 2009
 Stack: openais
 Current DC: pcmk-2 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 6 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 WebSite (ocf::heartbeat:apache):        Started pcmk-2
 Master/Slave Set: WebDataClone
         Masters: [ pcmk-1 ]
         Slaves: [ pcmk-2 ]
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-2
 Clone Set: dlm-clone
         Started: [ pcmk-2 pcmk-1 ]
 Clone Set: gfs-clone
         Started: [ pcmk-2 pcmk-1 ]
 <emphasis>WebFS (ocf::heartbeat:Filesystem): Started pcmk-1</emphasis>
 </screen>
 	</section>
   <section>
     <title>Reconfigure Pacemaker for Active/Active</title>
     <para>
       Almost everything is in place.
       Recent versions of DRBD are capable of operating in Primary/Primary mode and the filesystem we’re using is cluster aware.
       All we need to do now is reconfigure the cluster to take advantage of this.
     </para>
     <para>
       This will involve a number of changes, so we’ll again use interactive mode.
     </para>
     
     <screen>
 [root@pcmk-1 ~]# <userinput>crm</userinput>
 [root@pcmk-1 ~]# <userinput>cib new active</userinput>
     </screen>
     <para>
       There’s no point making the services active on both locations if we can’t reach them, so lets first clone the IP address.
       Cloned IPaddr2 resources use an iptables rule to ensure that each request only processed by one of the two clone instances.
       The additional meta options tell the cluster how many instances of the clone we want (one “request bucket” for each node) and that if all other nodes fail, then the remaining node should hold all of them.
       Otherwise the requests would be simply discarded.
     </para>
     <screen>
 [root@pcmk-1 ~]# <userinput>configure clone WebIP ClusterIP  \</userinput>
 <userinput>        meta globally-unique=”true” clone-max=”2” clone-node-max=”2”</userinput>
     </screen>
     <para>
       Now we must tell the ClusterIP how to decide which requests are processed by which hosts.
       To do this we must specify the clusterip_hash parameter.
     </para>
     <para>
       Open the ClusterIP resource
     </para>
     <screen>[root@pcmk-1 ~]# <userinput>configure edit  ClusterIP</userinput></screen>
     <para>
       And add the following to the params line
     </para>
     <screen>clusterip_hash="sourceip"</screen>
     <para>
       So that the complete definition looks like:
     </para>
     <screen>
 primitive ClusterIP ocf:heartbeat:IPaddr2 \ 
         params ip="192.168.122.101" cidr_netmask="32" clusterip_hash="sourceip" \
         op monitor interval="30s"
     </screen>
     <para>
       Here is the full transcript
     </para>
     <screen>
 [root@pcmk-1 ~]# <userinput>crm </userinput>
 crm(live)# <userinput>cib new active</userinput>
 INFO: active shadow CIB created
 crm(active)# <userinput>configure clone WebIP ClusterIP  \</userinput>
         meta globally-unique=”true” clone-max=”2” clone-node-max=”2”
 crm(active)# <userinput>configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebData ocf:linbit:drbd \
         params drbd_resource="wwwdata" \
         op monitor interval="60s"
 primitive WebFS ocf:heartbeat:Filesystem \
         params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip=”192.168.122.101” cidr_netmask=”32” clusterip_hash=”sourceip” \
         op monitor interval="30s"
 primitive dlm ocf:pacemaker:controld \
         op monitor interval="120s"
 primitive gfs-control ocf:pacemaker:controld \
    params daemon=”gfs_controld.pcmk” args=”-g 0” \
         op monitor interval="120s"
 ms WebDataClone WebData \
         meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
 <emphasis>clone WebIP ClusterIP \</emphasis>
 <emphasis> meta globally-unique=”true” clone-max=”2” clone-node-max=”2”</emphasis>
 clone dlm-clone dlm \
         meta interleave="true"
 clone gfs-clone gfs-control \
         meta interleave="true"
 colocation WebFS-with-gfs-control inf: WebFS gfs-clone
 colocation WebSite-with-WebFS inf: WebSite WebFS
 colocation fs_on_drbd inf: WebFS WebDataClone:Master
 colocation gfs-with-dlm inf: gfs-clone dlm-clone
 <emphasis>colocation website-with-ip inf: WebSite WebIP</emphasis>
 order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
 order WebSite-after-WebFS inf: WebFS WebSite
 <emphasis>order apache-after-ip inf: WebIP WebSite</emphasis>
 order start-WebFS-after-gfs-control inf: gfs-clone WebFS
 order start-gfs-after-dlm inf: dlm-clone gfs-clone
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes=”2” \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness=”100”
     </screen>
     <para>
       Notice how any constraints that referenced ClusterIP have been updated to use WebIP instead.
       This is an additional benefit of using the crm shell.
     </para>
     <para>
       Next we need to convert the filesystem and Apache resources into clones.
       Again, the shell will automatically update any relevant constraints.
     </para>
     <screen>
 crm(active)# <userinput>configure clone WebFSClone WebFS</userinput>
 crm(active)# <userinput>configure clone WebSiteClone WebSite</userinput>
     </screen>
     <para>
       The last step is to tell the cluster that it is now allowed to promote both instances to be Primary (aka. Master).
     </para>	
     <screen>
 crm(active)# <userinput>configure edit WebDataClone</userinput>
     </screen>
     <para>
       Change master-max to 2
     </para>
     <screen>
 crm(active)# <userinput>configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebData ocf:linbit:drbd \
         params drbd_resource="wwwdata" \
         op monitor interval="60s"
 primitive WebFS ocf:heartbeat:Filesystem \
         params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip=”192.168.122.101” cidr_netmask=”32” clusterip_hash=”sourceip” \
         op monitor interval="30s"
 primitive dlm ocf:pacemaker:controld \
         op monitor interval="120s"
 primitive gfs-control ocf:pacemaker:controld \
    params daemon=”gfs_controld.pcmk” args=”-g 0” \
         op monitor interval="120s"
 ms WebDataClone WebData \
         meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
 <emphasis>clone WebFSClone WebFS</emphasis>
 clone WebIP ClusterIP  \
         meta globally-unique=”true” clone-max=”2” clone-node-max=”2”
 <emphasis>clone WebSiteClone WebSite</emphasis>
 clone dlm-clone dlm \
         meta interleave="true"
 clone gfs-clone gfs-control \
         meta interleave="true"
 <emphasis>colocation WebFS-with-gfs-control inf: WebFSClone gfs-clone</emphasis>
 <emphasis>colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone</emphasis>
 <emphasis>colocation fs_on_drbd inf: WebFSClone WebDataClone:Master</emphasis>
 colocation gfs-with-dlm inf: gfs-clone dlm-clone
 <emphasis>colocation website-with-ip inf: WebSiteClone WebIP</emphasis>
 <emphasis>order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start</emphasis>
 <emphasis>order WebSite-after-WebFS inf: WebFSClone WebSiteClone</emphasis>
 <emphasis>order apache-after-ip inf: WebIP WebSiteClone</emphasis>
 order start-WebFS-after-gfs-control inf: gfs-clone WebFSClone
 order start-gfs-after-dlm inf: dlm-clone gfs-clone
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes=”2” \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness=”100”
     </screen>
     <para>
       Review the configuration before uploading it to the cluster, quitting the shell and watching the cluster’s response
     </para>
     <screen>
 crm(active)# <userinput>cib commit active</userinput>
 INFO: commited 'active' shadow CIB to the cluster
 crm(active)# <userinput>quit</userinput>
 bye
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Thu Sep  3 21:37:27 2009
 Stack: openais
 Current DC: pcmk-2 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 6 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 Master/Slave Set: WebDataClone
         Masters: [ pcmk-1 pcmk-2 ]
 Clone Set: dlm-clone
         Started: [ pcmk-2 pcmk-1 ]
 Clone Set: gfs-clone
         Started: [ pcmk-2 pcmk-1 ]
 <emphasis>Clone Set: WebIP</emphasis>
 <emphasis> Started: [ pcmk-1 pcmk-2 ]</emphasis>
 <emphasis>Clone Set: WebFSClone</emphasis>
 <emphasis> Started: [ pcmk-1 pcmk-2 ]</emphasis>
 <emphasis>Clone Set: WebSiteClone</emphasis>
 <emphasis> Started: [ pcmk-1 pcmk-2 ]</emphasis>
     </screen>
     <section>
       <title>Testing Recovery</title>
       <note>
 	<para>
 	  TODO: Put one node into standby to demonstrate failover
 	</para>
       </note>
     </section>
   </section>
     
 </chapter>
 
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml b/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
index 667c2c391d..8377f05620 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
@@ -1,472 +1,490 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
 %BOOK_ENTITIES;
 ]>
 <chapter>
 	<title>Apache - Adding More Services</title>
 	<note>
 		<para>
 			Now that we have a basic but functional active/passive two-node cluster, we’re ready to add some real services. We’re going to start with Apache because its a feature of many clusters and relatively simple to configure.
 		</para>
 	</note>
 	<section>
 		<title>Installation</title>
 		<para>
 			Before continuing, we need to make sure Apache is installed on <emphasis>both</emphasis> hosts.
 		</para>
 		
 <screen>
 [root@ppcmk-1 ~]# <userinput>yum install -y httpd</userinput>
 Setting up Install Process
 Resolving Dependencies
 --&gt; Running transaction check
 ---&gt; Package httpd.x86_64 0:2.2.13-2.fc12 set to be updated
 --&gt; Processing Dependency: httpd-tools = 2.2.13-2.fc12 for package: httpd-2.2.13-2.fc12.x86_64
 --&gt; Processing Dependency: apr-util-ldap for package: httpd-2.2.13-2.fc12.x86_64
 --&gt; Processing Dependency: /etc/mime.types for package: httpd-2.2.13-2.fc12.x86_64
 --&gt; Processing Dependency: libaprutil-1.so.0()(64bit) for package: httpd-2.2.13-2.fc12.x86_64
 --&gt; Processing Dependency: libapr-1.so.0()(64bit) for package: httpd-2.2.13-2.fc12.x86_64
 --&gt; Running transaction check
 ---&gt; Package apr.x86_64 0:1.3.9-2.fc12 set to be updated
 ---&gt; Package apr-util.x86_64 0:1.3.9-2.fc12 set to be updated
 ---&gt; Package apr-util-ldap.x86_64 0:1.3.9-2.fc12 set to be updated
 ---&gt; Package httpd-tools.x86_64 0:2.2.13-2.fc12 set to be updated
 ---&gt; Package mailcap.noarch 0:2.1.30-1.fc12 set to be updated
 --&gt; Finished Dependency Resolution
 
 Dependencies Resolved
 
 =======================================================================================
  Package               Arch             Version                Repository         Size
 =======================================================================================
 Installing:
  httpd               x86_64           2.2.13-2.fc12            rawhide           735 k
 Installing for dependencies:
  apr                 x86_64           1.3.9-2.fc12             rawhide           117 k
  apr-util            x86_64           1.3.9-2.fc12             rawhide            84 k
  apr-util-ldap       x86_64           1.3.9-2.fc12             rawhide            15 k
  httpd-tools         x86_64           2.2.13-2.fc12            rawhide            63 k
  mailcap             noarch           2.1.30-1.fc12            rawhide            25 k
 
 Transaction Summary
 =======================================================================================
 Install       6 Package(s)
 Upgrade       0 Package(s)
 
 Total download size: 1.0 M
 Downloading Packages:
 (1/6): apr-1.3.9-2.fc12.x86_64.rpm                                   | 117 kB     00:00     
 (2/6): apr-util-1.3.9-2.fc12.x86_64.rpm                              |  84 kB     00:00     
 (3/6): apr-util-ldap-1.3.9-2.fc12.x86_64.rpm                         |  15 kB     00:00     
 (4/6): httpd-2.2.13-2.fc12.x86_64.rpm                                | 735 kB     00:00     
 (5/6): httpd-tools-2.2.13-2.fc12.x86_64.rpm                          |  63 kB     00:00     
 (6/6): mailcap-2.1.30-1.fc12.noarch.rpm                              |  25 kB     00:00     
 ----------------------------------------------------------------------------------------
 Total                                                       875 kB/s | 1.0 MB     00:01     
 Running rpm_check_debug
 Running Transaction Test
 Finished Transaction Test
 Transaction Test Succeeded
 Running Transaction
   Installing     : apr-1.3.9-2.fc12.x86_64                                          1/6 
   Installing     : apr-util-1.3.9-2.fc12.x86_64                                     2/6 
   Installing     : apr-util-ldap-1.3.9-2.fc12.x86_64                                3/6 
   Installing     : httpd-tools-2.2.13-2.fc12.x86_64                                 4/6 
   Installing     : mailcap-2.1.30-1.fc12.noarch                                     5/6 
   Installing     : httpd-2.2.13-2.fc12.x86_64                                       6/6 
 
 Installed:
   httpd.x86_64 0:2.2.13-2.fc12                                                          
 
 Dependency Installed:
   apr.x86_64 0:1.3.9-2.fc12            apr-util.x86_64 0:1.3.9-2.fc12
   apr-util-ldap.x86_64 0:1.3.9-2.fc12  httpd-tools.x86_64 0:2.2.13-2.fc12
   mailcap.noarch 0:2.1.30-1.fc12  
 
 Complete!
 [root@pcmk-1 ~]#
 </screen>
 		<para>
 			Also, we need the wget tool in order for the cluster to be able to check the status of the Apache server.
 		</para>
 		
 <screen>
 [root@pcmk-1 ~]# <userinput>yum install -y wget</userinput>
 Setting up Install Process
 Resolving Dependencies
 --&gt; Running transaction check
 ---&gt; Package wget.x86_64 0:1.11.4-5.fc12 set to be updated
 --&gt; Finished Dependency Resolution
 
 Dependencies Resolved
 
 ===========================================================================================
  Package        Arch             Version                      Repository               Size
 ===========================================================================================
 Installing:
  wget          x86_64          1.11.4-5.fc12                   rawhide                393 k
 
 Transaction Summary
 ===========================================================================================
 Install       1 Package(s)
 Upgrade       0 Package(s)
 
 Total download size: 393 k
 Downloading Packages:
 wget-1.11.4-5.fc12.x86_64.rpm                                            | 393 kB     00:00     
 Running rpm_check_debug
 Running Transaction Test
 Finished Transaction Test
 Transaction Test Succeeded
 Running Transaction
   Installing     : wget-1.11.4-5.fc12.x86_64                                            1/1 
 
 Installed:
   wget.x86_64 0:1.11.4-5.fc12
 
 Complete!
 [root@pcmk-1 ~]#
 </screen>
 	</section>
 	
 	<section>
 		<title>Preparation</title>
 		<para>
 			First we need to create a page for Apache to serve up. On Fedora the default Apache docroot is /var/www/html, so we’ll create an index file there.
 		</para>
 		
 <screen>
 [root@pcmk-1 ~]# <userinput>cat &lt;&lt;-END &gt;/var/www/html/index.html</userinput>
  &lt;html&gt;
  &lt;body&gt;My Test Site - pcmk-1&lt;/body&gt;
  &lt;/html&gt;
  END
 [root@pcmk-1 ~]#
 </screen>
 		<para>
 			For the moment, we will simplify things by serving up only a static site and manually sync the data between the two nodes. So run the command again on pcmk-2.
 		</para>
 		
 <screen>
 [root@pcmk-2 ~]# <userinput>cat &lt;&lt;-END &gt;/var/www/html/index.html</userinput>
  &lt;html&gt;
  &lt;body&gt;My Test Site - pcmk-2&lt;/body&gt;
  &lt;/html&gt;
  END
 [root@pcmk-2 ~]#
 </screen>
 	</section>
 	
 	<section>
 	  <title>Enable the Apache status URL</title>
 	  <para>
 	    In order to monitor the health of your Apache instance, and recover it if it fails, the resource agent used by Pacemaker assumes the server-status URL is available.
 	    Look for the following in /etc/httpd/conf/httpd.conf and make sure it is not disabled or commented out:
 	  </para>
 	  <screen>
 &lt;Location /server-status>
    SetHandler server-status
    Order deny,allow
    Deny from all
    Allow from 127.0.0.1
 &lt;/Location>
 	  </screen>
 	</section>
 
 	<section>
 		<title>Update the Configuration</title>
 		<para>
 			At this point, Apache is ready to go, all that needs to be done is to add it to the cluster. Lets call the resource WebSite. We need to use an OCF script called apache in the heartbeat namespace <footnote>
 			<para>
 				Compare the key used here ocf:heartbeat:apache with the one we used earlier for the IP address: ocf:heartbeat:IPaddr2
 			</para>
 			</footnote> , the only required parameter is the path to the main Apache configuration file and we’ll tell the cluster to check once a minute that apache is still running.
 		</para>
 		
 <screen>
 [root@pcmk-1 ~]# <userinput>crm configure primitive WebSite ocf:heartbeat:apache params configfile=/etc/httpd/conf/httpd.conf op monitor interval=1min</userinput>
 [root@pcmk-1 ~]# <userinput>crm configure show</userinput>
 node pcmk-1
 node pcmk-2
 <emphasis>primitive WebSite ocf:heartbeat:apache \</emphasis>
 <emphasis> params configfile="/etc/httpd/conf/httpd.conf" \</emphasis>
 <emphasis> op monitor interval="1min"</emphasis>
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes="2" \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness="100"
 </screen>
 		<para>
 			After a short delay, we should see the cluster start apache
 		</para>
 		
 <screen>
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Fri Aug 28 16:12:49 2009
 Stack: openais
 Current DC: pcmk-2 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 2 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-2
 WebSite        (ocf::heartbeat:apache):        <emphasis>Started pcmk-1</emphasis>
 </screen>
 		<para>
 			Wait a moment, the WebSite resource isn’t running on the same host as our IP address!
 		</para>
 	</section>
 	
 	<section>
 		<title>Ensuring Resources Run on the Same Host</title>
 		<para>
-			To reduce the load on any one machine, Pacemaker will generally try to spread the configured resources across the cluster nodes. However we can tell the cluster that two resources are related and need to run on the same host (or not at all). Here we instruct the cluster that WebSite can only run on the host that ClusterIP is active on. If ClusterIP is not active anywhere, WebSite will not be permitted to run anywhere.
+		  To reduce the load on any one machine, Pacemaker will generally try to spread the configured resources across the cluster nodes.
+		  However we can tell the cluster that two resources are related and need to run on the same host (or not at all).
+		  Here we instruct the cluster that WebSite can only run on the host that ClusterIP is active on.
 		</para>
-		
+		<para>
+		  For the constraint, we need a name (choose something descriptive like website-with-ip), indicate that its mandatory (so that if ClusterIP is not active anywhere, WebSite will not be permitted to run anywhere either) by specifying a score of INFINITY and finally list the two resources.
+		</para>
+		<note>
+		  <para>
+		    If ClusterIP is not active anywhere, WebSite will not be permitted to run anywhere.
+		  </para>
+		</note>
+		<important>
+		  <para>
+		    Colocation constraints are "directional", in that they imply certain things about the order in which the two resources will have a location chosen.
+		    In this case we're saying <literal>WebSite</literal> needs to be placed on the same machine as <literal>ClusterIP</literal>, this implies that we must know the location of <literal>ClusterIP</literal> before choosing a location for <literal>WebSite</literal>.
+		  </para>
+		</important>
 <screen>
 [root@pcmk-1 ~]# <userinput>crm configure colocation website-with-ip INFINITY: WebSite ClusterIP</userinput>
 [root@pcmk-1 ~]# <userinput>crm configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 <emphasis>colocation website-with-ip inf: WebSite ClusterIP</emphasis>
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes="2" \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness="100"
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Fri Aug 28 16:14:34 2009
 Stack: openais
 Current DC: pcmk-2 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 2 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-2
 WebSite        (ocf::heartbeat:apache):        Started pcmk-2
 </screen>
 	</section>
 	
 	<section>
-		<title>Controlling Resource Start/Stop Ordering</title>
-		<para>
-			When Apache starts, it binds to the available IP addresses. It doesn’t know about any addresses we add afterwards, so not only do they need to run on the same node, but we need to make sure ClusterIP is already active before we start WebSite. We do this by adding an ordering constraint. We need to give it a name (chose something descriptive like apache-after-ip), indicate that its mandatory (so that any recovery for ClusterIP will also trigger recovery of WebSite) and list the two resources in the order we need them to start.
-		</para>
+	  <title>Controlling Resource Start/Stop Ordering</title>
+	  <para>
+	    When Apache starts, it binds to the available IP addresses. 
+	    It doesn’t know about any addresses we add afterwards, so not only do they need to run on the same node, but we need to make sure ClusterIP is already active before we start WebSite. 
+	    We do this by adding an ordering constraint. 
+	    We need to give it a name (choose something descriptive like apache-after-ip), indicate that its mandatory (so that any recovery for ClusterIP will also trigger recovery of WebSite) and list the two resources in the order we need them to start.
+	  </para>
 		
 <screen>
 [root@pcmk-1 ~]# <userinput>crm configure order apache-after-ip mandatory: ClusterIP WebSite</userinput>
 [root@pcmk-1 ~]# <userinput>crm configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 colocation website-with-ip inf: WebSite ClusterIP
 <emphasis>order apache-after-ip inf: ClusterIP WebSite</emphasis>
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes="2" \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness="100"
 </screen>
 	</section>
 	
 	<section>
 		<title>Specifying a Preferred Location</title>
 		<para>
 			Pacemaker does not rely on any sort of hardware symmetry between nodes, so it may well be that one machine is more powerful than the other. In such cases it makes sense to host the resources there if it is available. To do this we create a location constraint. Again we give it a descriptive name (prefer-pcmk-1), specify the resource we want to run there (WebSite), how badly we’d like it to run there (we’ll use 50 for now, but in a two-node situation almost any value above 0 will do) and the host’s name.
 		</para>
 		
 <screen>
 [root@pcmk-1 ~]# <userinput>crm configure location prefer-pcmk-1 WebSite 50: pcmk-1</userinput>
 [root@pcmk-1 ~]# <userinput>crm configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 <emphasis>location prefer-pcmk-1 WebSite 50: pcmk-1</emphasis>
 colocation website-with-ip inf: WebSite ClusterIP
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes="2" \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness="100"
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Fri Aug 28 16:17:35 2009
 Stack: openais
 Current DC: pcmk-2 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 2 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 ClusterIP        (ocf::heartbeat:IPaddr):        <emphasis>Started pcmk-2</emphasis>
 WebSite        (ocf::heartbeat:apache):        <emphasis>Started pcmk-2</emphasis>
 </screen>
 		<para>
 			Wait a minute, the resources are still on pcmk-2!
 		</para>
 		<para>
 			Even though we now prefer pcmk-1 over pcmk-2, that preference is (intentionally) less than the resource stickiness (how much we preferred not to have unnecessary downtime).
 		</para>
 		<para>
 			To see the current placement scores, you can use a tool called ptest
 		</para>
 		<para>
 			ptest -sL 
 			<note>
 				<para>
 					Include output
 				</para>
 			</note>
 		</para>
 		<para>
 			There is a way to force them to move though...
 		</para>
 	</section>
 	
 	<section>
 		<title>Manually Moving Resources Around the Cluster</title>
 		<para>
 			There are always times when an administrator needs to override the cluster and force resources to move to a specific location. Underneath we use location constraints like the one we created above, happily you don’t need to care. Just provide the name of the resource and the intended location, we’ll do the rest.
 		</para>
 		
 <screen>
 [root@pcmk-1 ~]# <userinput>crm resource move WebSite pcmk-1</userinput>
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Fri Aug 28 16:19:24 2009
 Stack: openais
 Current DC: pcmk-2 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 2 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-1
 WebSite        (ocf::heartbeat:apache):        Started pcmk-1
 Notice how the colocation rule we created has ensured that ClusterIP was also moved to pcmk-1.
 For the curious, we can see the effect of this command by examining the configuration
 crm configure show
 [root@pcmk-1 ~]# <userinput>crm configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 <emphasis>location cli-prefer-WebSite WebSite \</emphasis>
 <emphasis> rule $id="cli-prefer-rule-WebSite" inf: #uname eq pcmk-1</emphasis>
 location prefer-pcmk-1 WebSite 50: pcmk-1
 colocation website-with-ip inf: WebSite ClusterIP
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes="2" \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness="100"
 </screen>
 		<para>
 			Highlighted is the automated constraint used to move the resources to pcmk-1
 		</para>
 		<section>
 			<title>Giving Control Back to the Cluster</title>
 			<para>
 				Once we’ve finished whatever activity that required us to move the resources to pcmk-1, in our case nothing, we can then allow the cluster to resume normal operation with the unmove command. Since we previously configured a default stickiness, the resources will remain on pcmk-1.
 			</para>
 			
 <screen>
 [root@pcmk-1 ~]# <userinput>crm resource unmove WebSite</userinput>
 [root@pcmk-1 ~]# <userinput>crm configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 location prefer-pcmk-1 WebSite 50: pcmk-1
 colocation website-with-ip inf: WebSite ClusterIP
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes="2" \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness="100"
 </screen>
 			<para>
 				Note that the automated constraint is now gone. If we check the cluster status, we can also see that as expected the resources are still active on pcmk-1.
 			</para>
 			
 <screen>
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Fri Aug 28 16:20:53 2009
 Stack: openais
 Current DC: pcmk-2 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 2 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
  ClusterIP        (ocf::heartbeat:IPaddr):        <emphasis>Started pcmk-1</emphasis>
  WebSite        (ocf::heartbeat:apache):        <emphasis>Started pcmk-1</emphasis>
 </screen>
 		</section>
 
 	</section>
 
 </chapter>
 
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml b/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
index 03d974b410..5fc6805de8 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
@@ -1,528 +1,528 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
 %BOOK_ENTITIES;
 ]>
 <chapter>
   <title>Replicated Storage with DRBD</title>
   <para>
     Even if you’re serving up static websites, having to manually synchronize the contents of that website to all the machines in the cluster is not ideal.
     For dynamic websites, such as a wiki, its not even an option.
     Not everyone care afford network-attached storage but somehow the data needs to be kept in sync.
     Enter DRBD which can be thought of as network based RAID-1.
     See <ulink url="http://www.drbd.org/">http://www.drbd.org</ulink>/ for more details.
   </para>
   <para>
   </para>
   <section>
     <title>Install the DRBD Packages</title>
     <para>
       Since its inclusion in the upstream 2.6.33 kernel, everything needed to use DRBD ships with &DISTRO; &DISTRO_VERSION;.
       All you need to do is install it:
     </para>
     <screen>
-[root@pcmk-1 ~]# <userinput>yum install -y drbd-pacemaker</userinput>
+[root@pcmk-1 ~]# <userinput>yum install -y drbd-pacemaker drbd-udev</userinput>
 Loaded plugins: presto, refresh-packagekit
 Setting up Install Process
 Resolving Dependencies
 --> Running transaction check
 ---> Package drbd-pacemaker.x86_64 0:8.3.7-2.fc13 set to be updated
 --> Processing Dependency: drbd-utils = 8.3.7-2.fc13 for package: drbd-pacemaker-8.3.7-2.fc13.x86_64
 --> Running transaction check
 ---> Package drbd-utils.x86_64 0:8.3.7-2.fc13 set to be updated
 --> Finished Dependency Resolution
 
 Dependencies Resolved
 
 =================================================================================
  Package                Arch           Version              Repository      Size
 =================================================================================
 Installing:
  drbd-pacemaker         x86_64         8.3.7-2.fc13         fedora          19 k
 Installing for dependencies:
  drbd-utils             x86_64         8.3.7-2.fc13         fedora         165 k
 
 Transaction Summary
 =================================================================================
 Install       2 Package(s)
 Upgrade       0 Package(s)
 
 Total download size: 184 k
 Installed size: 427 k
 Downloading Packages:
 Setting up and reading Presto delta metadata
 fedora/prestodelta                                        | 1.7 kB     00:00     
 Processing delta metadata
 Package(s) data still to download: 184 k
 (1/2): drbd-pacemaker-8.3.7-2.fc13.x86_64.rpm             |  19 kB     00:01     
 (2/2): drbd-utils-8.3.7-2.fc13.x86_64.rpm                 | 165 kB     00:02     
 ---------------------------------------------------------------------------------
 Total                                             45 kB/s | 184 kB     00:04     
 Running rpm_check_debug
 Running Transaction Test
 Transaction Test Succeeded
 Running Transaction
   Installing     : drbd-utils-8.3.7-2.fc13.x86_64                            1/2 
   Installing     : drbd-pacemaker-8.3.7-2.fc13.x86_64                        2/2 
 
 Installed:
   drbd-pacemaker.x86_64 0:8.3.7-2.fc13                                           
 
 Dependency Installed:
   drbd-utils.x86_64 0:8.3.7-2.fc13                                               
 
 Complete!
 [root@pcmk-1 ~]#
     </screen>
   </section>
   <section>
     <title>Configure DRBD</title>
     <para>
       Before we configure DRBD, we need to set aside some disk for it to use.
     </para>
     <section>
       <title>Create A Partition for DRBD</title>
       <para>
 	If you have more than 1Gb free, feel free to use it.
 	For this guide however, 1Gb is plenty of space for a single html file and sufficient for later holding the GFS2 metadata.
       </para>
       <screen>
 [root@pcmk-1 ~]# <userinput>lvcreate -n drbd-demo -L 1G VolGroup</userinput>
   Logical volume "drbd-demo" created
 [root@pcmk-1 ~]# <userinput>lvs</userinput>
   LV        VG       Attr   LSize   Origin Snap%  Move Log Copy%  Convert
   <emphasis>drbd-demo VolGroup -wi-a- 1.00G</emphasis>                                      
   lv_root   VolGroup -wi-ao   7.30G                                      
   lv_swap   VolGroup -wi-ao 500.00M
       </screen>
       <para>
 	Repeat this on the second node, be sure to use the same size partition.
       </para>
       
       <screen>
 [root@pcmk-2 ~]# <userinput>lvs</userinput>
   LV      VG       Attr   LSize   Origin Snap%  Move Log Copy%  Convert
   lv_root VolGroup -wi-ao   7.30G                                      
   lv_swap <emphasis>VolGroup</emphasis> -wi-ao 500.00M                                      
 [root@pcmk-2 ~]# <userinput>lvcreate -n drbd-demo -L 1G VolGroup</userinput>
  <emphasis> Logical volume "drbd-demo" created</emphasis>
 [root@pcmk-2 ~]# <userinput>lvs</userinput>
   LV        VG       Attr   LSize   Origin Snap%  Move Log Copy%  Convert
   <emphasis>drbd-demo VolGroup -wi-a- 1.00G </emphasis>                                     
   lv_root   VolGroup -wi-ao   7.30G                                      
   lv_swap   VolGroup -wi-ao 500.00M
       </screen>
     </section>
     
     <section>
       <title>Write the DRBD Config</title>
       <para>
 	There is no series of commands for build a DRBD configuration, so simply copy the configuration below to /etc/drbd.conf
       </para>
       <para>
 	Detailed information on the directives used in this configuration (and other alternatives) is available from <ulink url="http://www.drbd.org/users-guide/ch-configure.html">http://www.drbd.org/users-guide/ch-configure.html</ulink>
       </para>
       <warning>
 	<para>
 	  Be sure to use the names and addresses of <emphasis>your</emphasis> nodes if they differ from the ones used in this guide.
 	</para>
       </warning>
       <screen>
 global { 
   usage-count yes; 
 }
 common {
   protocol C;
 }
 resource wwwdata {
   meta-disk internal;
   device    /dev/drbd1;
   syncer {
     verify-alg sha1;
   }
   net { 
     allow-two-primaries; 
   }
  <emphasis> on pcmk-1</emphasis> {
     disk      /dev/mapper/<emphasis>VolGroup</emphasis>-drbd--demo;
     address   192.168.122.101<emphasis>:7789;</emphasis> 
   }
   <emphasis>on</emphasis> 
 <emphasis>pcmk-2</emphasis> {
     disk      /dev/mapper/<emphasis>VolGroup</emphasis>-drbd--demo;
     address   192.168.122.102<emphasis>:7789;</emphasis>      
   }
 }
       </screen>
       <note>
 	<para>
 	  TODO: Explain the reason for the allow-two-primaries option
 	</para>
       </note>
     </section>
     
     <section>
       <title>Initialize and Load DRBD</title>
       <para>
 	With the configuration in place, we can now perform the DRBD initialization
       </para>
       <screen>
 [root@pcmk-1 ~]# <userinput>drbdadm create-md wwwdata</userinput>
 md_offset 12578816
 al_offset 12546048
 bm_offset 12541952
 
 Found some data 
  ==&gt; This might destroy existing data! &lt;==
 
 Do you want to proceed?
 [need to type 'yes' to confirm] <userinput>yes</userinput>
 
 Writing meta data...
 initializing activity log
 NOT initialized bitmap
 New drbd meta data block successfully created.
 success
       </screen>
       <para>
 	Now load the DRBD kernel module and confirm that everything is sane
       </para>
       <screen>
 [root@pcmk-1 ~]# <userinput>modprobe drbd</userinput>
 [root@pcmk-1 ~]# <userinput>drbdadm up wwwdata</userinput>
 [root@pcmk-1 ~]# <userinput>cat /proc/drbd</userinput>
 version: 8.3.6 (api:88/proto:86-90)
 GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
 
 <emphasis> 1: cs:WFConnection ro:Secondary/Unknown ds:Inconsistent/DUnknown C r--</emphasis>--
     ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:12248
 [root@pcmk-1 ~]# 
 
 Repeat on the second node
 drbdadm --force create-md wwwdata 
 modprobe drbd
 drbdadm up wwwdata
 cat /proc/drbd
 [root@pcmk-2 ~]# <userinput>drbdadm --force create-md wwwdata</userinput>
 Writing meta data...
 initializing activity log
 NOT initialized bitmap
 New drbd meta data block successfully created.
 success
 [root@pcmk-2 ~]# <userinput>modprobe drbd</userinput>
 WARNING: Deprecated config file /etc/modprobe.conf, all config files belong into /etc/modprobe.d/.
 [root@pcmk-2 ~]# <userinput>drbdadm up wwwdata</userinput>
 [root@pcmk-2 ~]# <userinput>cat /proc/drbd</userinput>
 version: 8.3.6 (api:88/proto:86-90)
 GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
 
 <emphasis> 1: cs:Connected ro:Secondary/Secondary ds:Inconsistent/Inconsistent C r----</emphasis>
     ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:12248
       </screen>
       <para>
 	Now we need to tell DRBD which set of data to use. 
 	Since both sides contain garbage, we can run the following on pcmk-1:
       </para>
       <screen>
 [root@pcmk-1 ~]# <userinput>drbdadm -- --overwrite-data-of-peer primary wwwdata</userinput>
 [root@pcmk-1 ~]# <userinput>cat /proc/drbd</userinput>
 version: 8.3.6 (api:88/proto:86-90)
 GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
  1: cs:SyncSource ro:Primary/Secondary ds:UpToDate/<emphasis>Inconsistent</emphasis> C r----
     ns:2184 nr:0 dw:0 dr:2472 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:10064
         [=====&gt;..............] sync'ed: 33.4% (10064/12248)K
         finish: 0:00:37 speed: 240 (240) K/sec
 [root@pcmk-1 ~]# <userinput>cat /proc/drbd</userinput>
 version: 8.3.6 (api:88/proto:86-90)
 GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
  1: <emphasis>cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate</emphasis> C r----
     ns:12248 nr:0 dw:0 dr:12536 al:0 bm:1 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
       </screen>
       <para>
 	pcmk-1 is now in the Primary state which allows it to be written to.
 	Which means its a good point at which to create a filesystem and populate it with some data to serve up via our WebSite resource.
       </para>
     </section>
     <section>
       <title>Populate DRBD with Data</title>
       <screen>
 [root@pcmk-1 ~]# <userinput>mkfs.ext4 /dev/drbd1</userinput>
 mke2fs 1.41.4 (27-Jan-2009)
 Filesystem label=
 OS type: Linux
 Block size=1024 (log=0)
 Fragment size=1024 (log=0)
 3072 inodes, 12248 blocks
 612 blocks (5.00%) reserved for the super user
 First data block=1
 Maximum filesystem blocks=12582912
 2 block groups
 8192 blocks per group, 8192 fragments per group
 1536 inodes per group
 Superblock backups stored on blocks: 
         8193
 
 Writing inode tables: done                            
 Creating journal (1024 blocks): done
 Writing superblocks and filesystem accounting information: done
 
 This filesystem will be automatically checked every 26 mounts or
 180 days, whichever comes first.  Use tune2fs -c or -i to override.
 
 Now mount the newly created filesystem so we can create our index file
 mount /dev/drbd1 /mnt/
 cat &lt;&lt;-END &gt;/mnt/index.html
 &lt;html&gt;
 &lt;body&gt;My Test Site - drbd&lt;/body&gt;
 &lt;/html&gt;
 END
 umount /dev/drbd1
 [root@pcmk-1 ~]# <userinput>mount /dev/drbd1 /mnt/</userinput>
 [root@pcmk-1 ~]# <userinput>cat &lt;&lt;-END &gt;/mnt/index.html</userinput>
 &gt; &lt;html&gt;
 &gt; &lt;body&gt;My Test Site - drbd&lt;/body&gt;
 &gt; &lt;/html&gt;
 &gt; END
 [root@pcmk-1 ~]# <userinput>umount /dev/drbd1</userinput>
       </screen>
     </section>
   </section>
   <section>
     <title>Configure the Cluster for DRBD</title>
     <para>
       One handy feature of the crm shell is that you can use it in interactive mode to make several changes atomically.
     </para>
     <para>
       First we launch the shell. The prompt will change to indicate you’re in interactive mode.
     </para>
     <screen>
 [root@pcmk-1 ~]# <userinput>crm</userinput>
 cib crm(live)#
     </screen>
     <para>
       Next we must create a working copy or the current configuration.
       This is where all our changes will go.
       The cluster will not see any of them until we say its ok.
       Notice again how the prompt changes, this time to indicate that we’re no longer looking at the live cluster.
     </para>
     <screen>
 cib crm(live)# <userinput>cib new drbd</userinput>
 INFO: drbd shadow CIB created
 crm(drbd)#
     </screen>
     <para>
       Now we can create our DRBD clone and display the revised configuration.
     </para>
     <screen>
 crm(drbd)# <userinput>configure primitive WebData ocf:linbit:drbd params drbd_resource=wwwdata \</userinput>
 <userinput>        op monitor interval=60s</userinput>
 crm(drbd)# <userinput>configure ms WebDataClone WebData meta master-max=1 master-node-max=1 \</userinput>
 <userinput>        clone-max=2 clone-node-max=1 notify=true</userinput>
 crm(drbd)# <userinput>configure show</userinput>
 node pcmk-1
 node pcmk-2
 <emphasis>primitive WebData ocf:linbit:drbd \</emphasis>
 <emphasis> params drbd_resource="wwwdata" \</emphasis>
 <emphasis> op monitor interval="60s"</emphasis>
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 <emphasis>ms WebDataClone WebData \</emphasis>
 <emphasis> meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"</emphasis>
 location prefer-pcmk-1 WebSite 50: pcmk-1
 colocation website-with-ip inf: WebSite ClusterIP
 order apache-after-ip inf: ClusterIP WebSite
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes=”2” \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness=”100”
     </screen>
     <para>
       Once we’re happy with the changes, we can tell the cluster to start using them and use crm_mon to check everything is functioning.
     </para>
     <screen>
 crm(drbd)# <userinput>cib commit drbd</userinput>
 INFO: commited 'drbd' shadow CIB to the cluster
 crm(drbd)# <userinput>quit</userinput>
 bye
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Tue Sep  1 09:37:13 2009
 Stack: openais
 Current DC: pcmk-1 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 3 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-1
 WebSite (ocf::heartbeat:apache):        Started pcmk-1
 <emphasis>Master/Slave Set: WebDataClone</emphasis>
 <emphasis> Masters: [ pcmk-2 ]</emphasis>
 <emphasis> Slaves: [ pcmk-1 ]</emphasis>
     </screen>
     <note>
       <para>
 	Include details on adding a second DRBD resource
       </para>
     </note>
     <para>
       Now that DRBD is functioning we can configure a Filesystem resource to use it.
       In addition to the filesystem’s definition, we also need to tell the cluster where it can be located (only on the DRBD Primary) and when it is allowed to start (after the Primary was promoted).
     </para>
     <para>
       Once again we’ll use the shell’s interactive mode
     </para>
     <screen>
 [root@pcmk-1 ~]# <userinput>crm</userinput>
 crm(live)# <userinput>cib new fs</userinput>
 INFO: fs shadow CIB created
 crm(fs)# <userinput>configure primitive WebFS ocf:heartbeat:Filesystem \</userinput>
 <userinput>        params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"</userinput>
 crm(fs)# <userinput>configure colocation fs_on_drbd inf: WebFS WebDataClone:Master</userinput>
 crm(fs)# <userinput>configure order WebFS-after-WebData inf: WebDataClone:promote WebFS:start</userinput>
     </screen>
     <para>
       We also need to tell the cluster that Apache needs to run on the same machine as the filesystem and that it must be active before Apache can start.
     </para>
     <screen>
 crm(fs)# <userinput>configure colocation WebSite-with-WebFS inf: WebSite WebFS</userinput>
 crm(fs)# <userinput>configure order WebSite-after-WebFS inf: WebFS WebSite</userinput>
     </screen>
     <para>
       Time to review the updated configuration:
     </para>    
     <screen>
 [root@pcmk-1 ~]# <userinput>crm configure show</userinput>
 node pcmk-1
 node pcmk-2
 primitive WebData ocf:linbit:drbd \
         params drbd_resource="wwwdata" \
         op monitor interval="60s"
 primitive WebFS ocf:heartbeat:Filesystem \
         params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"
 primitive WebSite ocf:heartbeat:apache \
         params configfile="/etc/httpd/conf/httpd.conf" \
         op monitor interval="1min"
 primitive ClusterIP ocf:heartbeat:IPaddr2 \
         params ip="192.168.122.101" cidr_netmask="32" \
         op monitor interval="30s"
 ms WebDataClone WebData \
         meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
 location prefer-pcmk-1 WebSite 50: pcmk-1
 colocation WebSite-with-WebFS inf: WebSite WebFS
 colocation fs_on_drbd inf: WebFS WebDataClone:Master
 colocation website-with-ip inf: WebSite ClusterIP
 order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
 order WebSite-after-WebFS inf: WebFS WebSite
 order apache-after-ip inf: ClusterIP WebSite
 property $id="cib-bootstrap-options" \
         dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
         cluster-infrastructure="openais" \
         expected-quorum-votes=”2” \
         stonith-enabled="false" \
         no-quorum-policy="ignore"
 rsc_defaults $id="rsc-options" \
         resource-stickiness=”100”
     </screen>
     <para>
       After reviewing the new configuration, we again upload it and watch the cluster put it into effect.
     </para>
     <screen>
 crm(fs)# <userinput>cib commit fs</userinput>
 INFO: commited 'fs' shadow CIB to the cluster
 crm(fs)# <userinput>quit</userinput>
 bye
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Tue Sep  1 10:08:44 2009
 Stack: openais
 Current DC: pcmk-1 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 4 Resources configured.
 ============
 
 Online: [ pcmk-1 pcmk-2 ]
 
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-1
 <emphasis>WebSite (ocf::heartbeat:apache): Started pcmk-1</emphasis>
 Master/Slave Set: WebDataClone
         Masters: [ pcmk-1 ]
         Slaves: [ pcmk-2 ]
 <emphasis>WebFS (ocf::heartbeat:Filesystem): Started pcmk-1</emphasis>
     </screen>
     <section>
       <title>Testing Migration</title>
       <para>
 	We could shut down the active node again, but another way to safely simulate recovery is to put the node into what is called “standby mode”.
 	Nodes in this state tell the cluster that they are not allowed to run resources.
 	Any resources found active there will be moved elsewhere.
 	This feature can be particularly useful when updating the resources’ packages.
       </para>
       <para>
 	Put the local node into standby mode and observe the cluster move all the resources to the other node.
 	Note also that the node’s status will change to indicate that it can no longer host resources.
       </para>
       <screen>
 [root@pcmk-1 ~]# <userinput>crm node standby</userinput>
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Tue Sep  1 10:09:57 2009
 Stack: openais
 Current DC: pcmk-1 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 4 Resources configured.
 ============
 
 <emphasis>Node pcmk-1: standby</emphasis>
 Online: [ pcmk-2 ]
 
 ClusterIP        (ocf::heartbeat:IPaddr):        <emphasis>Started pcmk-2</emphasis>
 WebSite (ocf::heartbeat:apache):        <emphasis>Started pcmk-2</emphasis>
 Master/Slave Set: WebDataClone
         <emphasis>Masters: [ pcmk-2 ]</emphasis>
         Stopped: [ WebData:1 ]
 WebFS   (ocf::heartbeat:Filesystem):    <emphasis>Started pcmk-2</emphasis>
       </screen>
       <para>
 	Once we’ve done everything we needed to on pcmk-1 (in this case nothing, we just wanted to see the resources move), we can allow the node to be a full cluster member again.
       </para>
       <screen>
 [root@pcmk-1 ~]# <userinput>crm node online</userinput>
 [root@pcmk-1 ~]# <userinput>crm_mon</userinput>
 ============
 Last updated: Tue Sep  1 10:13:25 2009
 Stack: openais
 Current DC: pcmk-1 - partition with quorum
 Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
 2 Nodes configured, 2 expected votes
 4 Resources configured.
 ============
 
 <emphasis>Online: [ pcmk-1 pcmk-2 ]</emphasis>
 
 ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-2
 WebSite (ocf::heartbeat:apache):        Started pcmk-2
 Master/Slave Set: WebDataClone
         Masters: [ pcmk-2 ]
         Slaves: [ pcmk-1 ]
 WebFS   (ocf::heartbeat:Filesystem):    Started pcmk-2
       </screen>
       <para>
 	Notice that our resource stickiness settings prevent the services from migrating back to pcmk-1.
       </para>
     </section>
   </section>
 </chapter>
 
diff --git a/extra/resources/SysInfo b/extra/resources/SysInfo
index 8f9870bd1a..a2fd4ac126 100644
--- a/extra/resources/SysInfo
+++ b/extra/resources/SysInfo
@@ -1,388 +1,330 @@
 #!/bin/sh
 #
 #
 #	SysInfo OCF Resource Agent
 #	It records (in the CIB) various attributes of a node
 #
 # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Br�e
 #                    All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 #######################################################################
 # Initialization:
 
 . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
 
 #######################################################################
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="SysInfo">
 <version>1.0</version>
 
 <longdesc lang="en">
 This is a SysInfo Resource Agent.
 It records (in the CIB) various attributes of a node
 Sample Linux output:
    arch:   i686
    os:     Linux-2.4.26-gentoo-r14
    free_swap:      1999
    cpu_info:       Intel(R) Celeron(R) CPU 2.40GHz
    cpu_speed:      4771.02
    cpu_cores:      1
    cpu_load:       0.00
    ram_total:      513
    ram_free:       117
    root_free:      2.4
 
 Sample Darwin output:
    arch:   i386
    os:     Darwin-8.6.2
    cpu_info:       Intel Core Duo
    cpu_speed:      2.16
    cpu_cores:      2
    cpu_load:       0.18
    ram_total:      2016
    ram_free:       787
    root_free:      13
 
 Units:
    free_swap: Mb
    ram_*:     Mb
    cpu_speed (Linux): bogomips
    cpu_speed (Darwin): Ghz
    *_free:    GB (or user-defined: disk_unit)
 
 </longdesc>
 <shortdesc lang="en">SysInfo resource agent</shortdesc>
 
 <parameters>
 
 <parameter name="pidfile" unique="0">
 <longdesc lang="en">PID file</longdesc>
 <shortdesc lang="en">PID file</shortdesc>
 <content type="string" default="$OCF_RESKEY_pidfile" />
 </parameter>
 
 <parameter name="delay" unique="0">
 <longdesc lang="en">Interval to allow values to stabilize</longdesc>
 <shortdesc lang="en">Dampening Delay</shortdesc>
 <content type="string" default="0s" />
 </parameter>
 
 <parameter name="disks" unique="1">
 <longdesc lang="en">
 Filesystems or Paths to be queried for free disk space as a SPACE separated list - e.g "/dev/sda1 /tmp".
 Results will be written to an attribute with leading slashes removed, and other slashes replaced with underscore, and the word 'free' appended - e.g /dev/sda1 -> dev_sda1_free
 Note: The root filesystem '/' is always queried to an attribute named 'root_free'
 </longdesc>
 <shortdesc lang="en">List of Filesytems/Paths to query for free disk space</shortdesc>
 content type="string" />
 </parameter>
 
 <parameter name="disk_unit" unique="1">
 <longdesc lang="en">
 Unit to report disk free space in.
 Can be one of: B, K, M, G, T, P (case-insensitive)
 </longdesc>
 <shortdesc lang="en">Unit to report disk free space in</shortdesc>
 content type="string" default="G"/>
 </parameter>
 
 
 </parameters>
 <actions>
 <action name="start"   timeout="90" />
 <action name="stop"    timeout="100" />
 <action name="monitor" timeout="20s" interval="60s"/>
 <action name="meta-data"  timeout="5" />
 <action name="validate-all"  timeout="30" />
 </actions>
 </resource-agent>
 END
 }
 
 #######################################################################
 
 UpdateStat() {
     name=$1; shift
     value="$*"
-    echo -e "$name:\t$value"
+    printf "%s:\t%s\n" "$name" "$value"
     ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -v "$value"
 }
 
 SysInfoStats() {
 
     UpdateStat arch "`uname -m`"
     UpdateStat os "`uname -s`-`uname -r`"
 
     case `uname -s` in
 	"Darwin")
 	    mem=`top -l 1 | grep Mem: | awk '{print $10}'`
 	    mem_used=`top -l 1 | grep Mem: | awk '{print $8}'`
 	    mem=`SysInfo_mem_units $mem`
 	    mem_used=`SysInfo_mem_units $mem_used`
 	    mem_total=`expr $mem_used + $mem`
-	    cpu_type=`system_profiler SPHardwareDataType | grep "CPU Type:"`
-	    cpu_type=${cpu_type/*: /}
-	    cpu_speed=`system_profiler SPHardwareDataType | grep "CPU Speed:" | awk '{print $3}'`
-	    cpu_cores=`system_profiler SPHardwareDataType | grep "Number Of"`
-	    cpu_cores=${cpu_cores/*: /}
+	    cpu_type=`system_profiler SPHardwareDataType | awk -F': ' '/^CPU Type/ {print $2; exit}'`
+	    cpu_speed=`system_profiler SPHardwareDataType | awk -F': ' '/^CPU Speed/ {print $2; exit}'`
+	    cpu_cores=`system_profiler SPHardwareDataType | awk -F': ' '/^Number Of/ {print $2; exit}'`
 	;;
 	"Linux")
 	    if [ -f /proc/cpuinfo ]; then
-		cpu_type=`grep "model name" /proc/cpuinfo | head -n 1`
-		cpu_type=${cpu_type/*: /}
-		cpu_speed=`grep "bogomips" /proc/cpuinfo | head -n 1`
-		cpu_speed=${cpu_speed/*: /}
+		cpu_type=`awk -F': ' '/model name/ {print $2; exit}' /proc/cpuinfo`
+		cpu_speed=`awk -F': ' '/bogomips/ {print $2; exit}' /proc/cpuinfo`
 		cpu_cores=`grep "^processor" /proc/cpuinfo | wc -l`
 	    fi
 
 	    if [ -f /proc/meminfo ]; then
 	        # meminfo results are in kB
 		mem=`grep "SwapFree" /proc/meminfo | awk '{print $2"k"}'`
 		if [ ! -z $mem ]; then
 		    UpdateStat free_swap `SysInfo_mem_units $mem`
 		fi
 		mem=`grep "Inactive" /proc/meminfo | awk '{print $2"k"}'`
 		mem_total=`grep "MemTotal" /proc/meminfo | awk '{print $2"k"}'`
 	    else
 		mem=`top -n 1 | grep Mem: | awk '{print $7}'`
 	    fi
 	    ;;
 	*)
     esac
 
     if [ x != x"$cpu_type" ]; then
 	UpdateStat cpu_info "$cpu_type"
     fi
 
     if [ x != x"$cpu_speed" ]; then
 	UpdateStat cpu_speed "$cpu_speed"
     fi
 
     if [ x != x"$cpu_cores" ]; then
 	UpdateStat cpu_cores "$cpu_cores"
     fi
 
     loads=`uptime`
     load15=`echo ${loads} | awk '{print $10}'`
     UpdateStat cpu_load $load15
 
     if [ ! -z "$mem" ]; then
         # Massage the memory values
  	UpdateStat ram_total `SysInfo_mem_units $mem_total`
 	UpdateStat ram_free `SysInfo_mem_units $mem`
     fi
 
     # Portability notes:
     #   o tail: explicit "-n" not available in Solaris; instead simplify
     #	  'tail -n <c>' to the equivalent 'tail -<c>'.
     for disk in "/" ${OCF_RESKEY_disks}; do
 	unset disk_free disk_label
-	disk_free=`df -k ${disk} | tail -1 | awk '{print $4}'`
+	disk_free=`df -h ${disk} | tail -1 | awk '{print $4}'`
 	if [ x != x"$disk_free" ]; then
 	    disk_label=`echo $disk | sed -e 's#^/$#root#;s#^/*##;s#/#_#g'`
 	    UpdateStat ${disk_label}_free `SysInfo_hdd_units $disk_free`
 	fi
     done
 }
 
+SysInfo_megabytes() {
+    # Size in megabytes
+    echo $1 | awk '{ n = $0;
+		     sub(/[0-9]+(.[0-9]+)?/, "");
+		     split(n, a, $0);
+                     n=a[1];
+                     if ($0 == "G" || $0 == "") { n *= 1024 };
+                     if (/^kB?/) { n /= 1024 };
+                     printf "%d\n", n }' # Intentionaly round to an integer
+}
+
 SysInfo_mem_units() {
     mem=$1
 
     if [ -z $1 ]; then 
 	return
     fi
 
-    memlen=`expr ${#mem} - 1`
-    memlen_alt=`expr ${#mem} - 2`    
-    if [ ${mem:$memlen:1} = "G" ]; then
-	mem="${mem:0:$memlen}"
-	if [ $mem != ${mem/./} ]; then
-	    mem_before=${mem/.*/}
-	    mem_after=${mem/*./}
-	    mem=$[mem_before*1024]
-	    if [ ${#mem_after} = 0 ]; then
-		:
-	    elif [ ${#mem_after} = 1 ]; then
-		mem=$[mem+100*$mem_after]
-	    elif [ ${#mem_after} = 2 ]; then
-		mem=$[mem+10*$mem_after]
-	    elif [ ${#mem_after} = 3 ]; then
-		mem=$[mem+$mem_after]
-	    else 
-		mem_after=${mem_after:0:3}
-		mem=$[mem+$mem_after]
-	    fi
-	fi
-    elif [ ${mem:$memlen:1} = "M" ]; then
-	mem=${mem/.*/}
-	mem="${mem:0:$memlen}"
-    elif [ ${mem:$memlen:1} = "k" ]; then
-	mem="${mem:0:$memlen}"
-	mem=${mem/.*/}
-	mem=`expr $mem / 1024`
-    elif [ ${mem:$memlen_alt:2} = "kB" ]; then
-	mem="${mem:0:$memlen_alt}"
-	mem=${mem/.*/}
-	mem=`expr $mem / 1024`
-    elif [ ${mem:$memlen_alt:2} = "Mb" ]; then
-	mem="${mem:0:$memlen_alt}"
-	mem=${mem/.*/}
-    elif [ ${mem:$memlen_alt:2} = "MB" ]; then
-	mem="${mem:0:$memlen_alt}"
-	mem=${mem/.*/}
-    fi
-
+    mem=$(SysInfo_megabytes "$1")
     # Round to the next multiple of 50
-    memlen=`expr ${#mem} - 2`
-    mem_round="${mem:$memlen:2}"
-    if [ x$mem_round = x ]; then 
-	:
-    elif [ $mem_round = "00" ]; then 
-	:
-    elif [ $mem_round -lt "50" ]; then 
-	mem=$[mem+50]
-	mem=$[mem-$mem_round]
-
-    else 
-	mem=$[mem+100]
-	mem=$[mem-$mem_round]
+    r=$(($mem % 50))
+    if [ $r != 0 ]; then
+	mem=$(($mem + 50 - $r))
     fi
+
     echo $mem    
 }
 
 SysInfo_hdd_units() {
-    disk_size=$1
-    disk_unit=${OCF_RESKEY_disk_unit}
-    if [ -z $disk_unit ]; then
-	disk_unit="G"
-    fi
-    
-    case $disk_unit in
-	[Pp])
-            disk_size=$((disk_size/1024/1024/1024/1024))
-	    ;;
-	[Tt])
-            disk_size=$((disk_size/1024/1024/1024))
-	    ;;
-	[Gg])
-            disk_size=$((disk_size/1024/1024))
-            ;;
-        [Mm])
-            disk_size=$((disk_size/1024))
-            ;;
-        [Kk])
-	    #Already in KB
-            ;;
-        [Bb])
-            disk_size=$((disk_size*1024))
-            ;;
-           *)
-	    ocf_log err "disk_unit set to invalid unit"
-	    exit $OCF_ERR_ARGS
+    # Defauts to size in gigabytes
+
+    case $OCF_RESKEY_disk_unit in 
+	[Pp]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024 / 1024));;
+	[Tt]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024));;
+	[Gg]) echo $(($(SysInfo_megabytes "$1") / 1024));;
+	[Mm]) echo SysInfo_megabytes "$1";;
+	[Kk]) echo $(($(SysInfo_megabytes "$1") * 1024));;
+	[Bb]) echo $(($(SysInfo_megabytes "$1") * 1024 * 1024));;
+	*) 
+	    ocf_log err "Invalid value for disk_unit: $OCF_RESKEY_disk_unit"
+	    echo $(($(SysInfo_megabytes "$1") / 1024));;
     esac
-
-    echo $disk_size
 }
 
 SysInfo_usage() {
 	cat <<END
 usage: $0 {start|stop|monitor|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 SysInfo_start() {
     echo $OCF_RESKEY_clone > $OCF_RESKEY_pidfile
     SysInfoStats
     exit $OCF_SUCCESS
 }
 
 SysInfo_stop() {
     rm $OCF_RESKEY_pidfile
     exit $OCF_SUCCESS
 }
 
 SysInfo_monitor() {
     if [ -f $OCF_RESKEY_pidfile ]; then
 	clone=`cat $OCF_RESKEY_pidfile`
     fi
 
     if [ x$clone = x ]; then
 	rm $OCF_RESKEY_pidfile
 	exit $OCF_NOT_RUNNING
 
     elif [ $clone = $OCF_RESKEY_clone ]; then
 	SysInfoStats
 	exit $OCF_SUCCESS
 
     elif [ x$OCF_RESKEY_CRM_meta_globally_unique = xtrue  
 	    -o x$OCF_RESKEY_CRM_meta_globally_unique = xTrue
 	    -o x$OCF_RESKEY_CRM_meta_globally_unique = xyes
 	    -o x$OCF_RESKEY_CRM_meta_globally_unique = xYes
 	]; then
 	SysInfoStats
 	exit $OCF_SUCCESS
     fi
     exit $OCF_NOT_RUNNING
 }
 
 SysInfo_validate() {
     return $OCF_SUCCESS
 }
 
 if [ $# -ne 1 ]; then
     SysInfo_usage
     exit $OCF_ERR_ARGS
 fi
 
 : ${OCF_RESKEY_pidfile:="$HA_VARRUN/SysInfo-${OCF_RESOURCE_INSTANCE}"}
+: ${OCF_RESKEY_disk_unit:="G"}
 : ${OCF_RESKEY_clone:="0"}
 if [ x != x${OCF_RESKEY_delay} ]; then
     OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}"
 fi
 
 case $__OCF_ACTION in
 meta-data)	meta_data
 		exit $OCF_SUCCESS
 		;;
 start)		SysInfo_start
 		;;
 stop)		SysInfo_stop
 		;;
 monitor)	SysInfo_monitor
 		;;
 validate-all)	SysInfo_validate
 		;;
 usage|help)	SysInfo_usage
 		exit $OCF_SUCCESS
 		;;
 *)		SysInfo_usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 esac
 
 exit $?
diff --git a/lib/common/ais.c b/lib/common/ais.c
index 15db59bc75..90559dc921 100644
--- a/lib/common/ais.c
+++ b/lib/common/ais.c
@@ -1,1225 +1,1229 @@
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  * 
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  * 
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 #include <bzlib.h>
 #include <crm/ais.h>
 #include <crm/common/ipc.h>
 #include <crm/common/cluster.h>
 #include <sys/utsname.h>
 #include "stack.h"
 #ifdef SUPPORT_COROSYNC
 #  include <corosync/corodefs.h>
 #endif
 
 #ifdef SUPPORT_CMAN
 #  include <libcman.h>
 cman_handle_t pcmk_cman_handle = NULL;
 #endif
 
 #ifdef SUPPORT_CS_QUORUM
 #  include <sys/socket.h>
 #  include <netinet/in.h>
 #  include <arpa/inet.h>
 
 #  include <corosync/cpg.h>
 #  include <corosync/quorum.h>
 
 quorum_handle_t pcmk_quorum_handle = 0;
 cpg_handle_t pcmk_cpg_handle = 0;
 struct cpg_name pcmk_cpg_group = {
     .length = 0,
     .value[0] = 0,
 };
 #endif
 
 static char *pcmk_uname = NULL;
 static int pcmk_uname_len = 0;
 static uint32_t pcmk_nodeid = 0;
 
 #define cs_repeat(counter, max, code) do {		\
 	code;						\
 	if(rc == CS_ERR_TRY_AGAIN) {			\
 	    counter++;					\
 	    crm_debug("Retrying operation after %ds", counter);	\
 	    sleep(counter);				\
 	}						\
     } while(rc == CS_ERR_TRY_AGAIN && counter < max)
 
 enum crm_ais_msg_types text2msg_type(const char *text) 
 {
 	int type = crm_msg_none;
 
 	CRM_CHECK(text != NULL, return type);
 	if(safe_str_eq(text, "ais")) {
 		type = crm_msg_ais;
 	} else if(safe_str_eq(text, "crm_plugin")) {
 		type = crm_msg_ais;
 	} else if(safe_str_eq(text, CRM_SYSTEM_CIB)) {
 		type = crm_msg_cib;
 	} else if(safe_str_eq(text, CRM_SYSTEM_CRMD)) {
 		type = crm_msg_crmd;
 	} else if(safe_str_eq(text, CRM_SYSTEM_DC)) {
 		type = crm_msg_crmd;
 	} else if(safe_str_eq(text, CRM_SYSTEM_TENGINE)) {
 		type = crm_msg_te;
 	} else if(safe_str_eq(text, CRM_SYSTEM_PENGINE)) {
 		type = crm_msg_pe;
 	} else if(safe_str_eq(text, CRM_SYSTEM_LRMD)) {
 		type = crm_msg_lrmd;
 	} else if(safe_str_eq(text, CRM_SYSTEM_STONITHD)) {
 		type = crm_msg_stonithd;
 	} else if(safe_str_eq(text, "stonith-ng")) {
 		type = crm_msg_stonith_ng;
 	} else if(safe_str_eq(text, "attrd")) {
 		type = crm_msg_attrd;
 
 	} else {
 	    /* This will normally be a transient client rather than
 	     * a cluster daemon.  Set the type to the pid of the client
 	     */
 	    int scan_rc = sscanf(text, "%d", &type);
 	    if(scan_rc != 1) {
 		/* Ensure its sane */
 		type = crm_msg_none;
 	    }
 	}
 	return type;
 }
 
 char *get_ais_data(const AIS_Message *msg)
 {
     int rc = BZ_OK;
     char *uncompressed = NULL;
     unsigned int new_size = msg->size + 1;
     
     if(msg->is_compressed == FALSE) {
 	crm_debug_2("Returning uncompressed message data");
 	uncompressed = strdup(msg->data);
 
     } else {
 	crm_debug_2("Decompressing message data");
 	crm_malloc0(uncompressed, new_size);
 	
 	rc = BZ2_bzBuffToBuffDecompress(
 	    uncompressed, &new_size, (char*)msg->data, msg->compressed_size, 1, 0);
 	
 	CRM_ASSERT(rc == BZ_OK);
 	CRM_ASSERT(new_size == msg->size);
     }
     
     return uncompressed;
 }
 
 
 #if SUPPORT_COROSYNC
 int ais_fd_sync = -1;
 int ais_fd_async = -1; /* never send messages via this channel */
 void *ais_ipc_ctx = NULL;
 hdb_handle_t ais_ipc_handle = 0;
 GFDSource *ais_source = NULL;
 GFDSource *ais_source_sync = NULL;
 GFDSource *cman_source = NULL;
 GFDSource *cpg_source = NULL;
 GFDSource *quorumd_source = NULL;
 static char *ais_cluster_name = NULL;
 
 gboolean get_ais_nodeid(uint32_t *id, char **uname)
 {
     struct iovec iov;
     int retries = 0;
     int rc = CS_OK;
     coroipc_response_header_t header;
     struct crm_ais_nodeid_resp_s answer;
 
     header.error = CS_OK;
     header.id = crm_class_nodeid;
     header.size = sizeof(coroipc_response_header_t);
 
     CRM_CHECK(id != NULL, return FALSE);
     CRM_CHECK(uname != NULL, return FALSE);
 
     iov.iov_base = &header;
     iov.iov_len = header.size;
     
   retry:
     errno = 0;
     rc = coroipcc_msg_send_reply_receive(
 	ais_ipc_handle, &iov, 1, &answer, sizeof (answer));
     if(rc == CS_OK) {
 	CRM_CHECK(answer.header.size == sizeof (struct crm_ais_nodeid_resp_s),
 		  crm_err("Odd message: id=%d, size=%d, error=%d",
 			  answer.header.id, answer.header.size, answer.header.error));
 	CRM_CHECK(answer.header.id == crm_class_nodeid, crm_err("Bad response id: %d", answer.header.id));
     }
 
     if(rc == CS_ERR_TRY_AGAIN && retries < 20) {
 	retries++;
 	crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries);
 	sleep(retries); /* Proportional back off */
 	goto retry;
     }
 
     if(rc != CS_OK) {    
 	crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc));
 	return FALSE;
 	
     } else if(answer.header.error != CS_OK) {
 	crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc));
 	return FALSE;
     }
 
     crm_info("Server details: id=%u uname=%s cname=%s",
 	     answer.id, answer.uname, answer.cname);
     
     *id = answer.id;
     *uname = crm_strdup(answer.uname);
     ais_cluster_name = crm_strdup(answer.cname);
 
     return TRUE;
 }
 
 gboolean crm_get_cluster_name(char **cname)
 {
     CRM_CHECK(cname != NULL, return FALSE);
     if(ais_cluster_name) {
 	*cname = crm_strdup(ais_cluster_name);
 	return TRUE;
     }
     return FALSE;
 }
 
 gboolean
 send_ais_text(int class, const char *data,
 	      gboolean local, const char *node, enum crm_ais_msg_types dest)
 {
     static int msg_id = 0;
     static int local_pid = 0;
     enum cluster_type_e cluster_type = get_cluster_type();
 
     int retries = 0;
     int rc = CS_OK;
     int buf_len = sizeof(coroipc_response_header_t);
 
     char *buf = NULL;
     struct iovec iov;
     const char *transport = "pcmk";
     coroipc_response_header_t *header = NULL;
     AIS_Message *ais_msg = NULL;
     enum crm_ais_msg_types sender = text2msg_type(crm_system_name);
 
     /* There are only 6 handlers registered to crm_lib_service in plugin.c */
     CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); 
 
     if(data == NULL) {
 	data = "";
     }
     
     if(local_pid == 0) {
 	local_pid = getpid();
     }
 
     if(sender == crm_msg_none) {
 	sender = local_pid;
     }
     
     crm_malloc0(ais_msg, sizeof(AIS_Message));
     
     ais_msg->id = msg_id++;
     ais_msg->header.id = class;
     ais_msg->header.error = CS_OK;
     
     ais_msg->host.type = dest;
     ais_msg->host.local = local;
     if(node) {
 	ais_msg->host.size = strlen(node);
 	memset(ais_msg->host.uname, 0, MAX_NAME);
 	memcpy(ais_msg->host.uname, node, ais_msg->host.size);
 	ais_msg->host.id = 0;
 	
     } else {
 	ais_msg->host.size = 0;
 	memset(ais_msg->host.uname, 0, MAX_NAME);
 	ais_msg->host.id = 0;
     }
 
     ais_msg->sender.id = 0;
     ais_msg->sender.type = sender;
     ais_msg->sender.pid = local_pid;
     ais_msg->sender.size = pcmk_uname_len;
     memset(ais_msg->sender.uname, 0, MAX_NAME);
     memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size);
 
     ais_msg->size = 1 + strlen(data);
 
     if(ais_msg->size < CRM_BZ2_THRESHOLD) {
   failback:
 	crm_realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size);
 	memcpy(ais_msg->data, data, ais_msg->size);
 	
     } else {
 	char *compressed = NULL;
 	char *uncompressed = crm_strdup(data);
 	unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */
 	
 	crm_debug_5("Compressing message payload");
 	crm_malloc(compressed, len);
 	
 	rc = BZ2_bzBuffToBuffCompress(
 	    compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK);
 
 	crm_free(uncompressed);
 	
 	if(rc != BZ_OK) {
 	    crm_err("Compression failed: %d", rc);
 	    crm_free(compressed);
 	    goto failback;  
 	}
 
 	crm_realloc(ais_msg, sizeof(AIS_Message) + len + 1);
 	memcpy(ais_msg->data, compressed, len);
 	ais_msg->data[len] = 0;
 	crm_free(compressed);
 
 	ais_msg->is_compressed = TRUE;
 	ais_msg->compressed_size = len;
 
 	crm_debug_2("Compression details: %d -> %d",
 		  ais_msg->size, ais_data_len(ais_msg));
     } 
 
     ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg);
 
     crm_debug_3("Sending%s message %d to %s.%s (data=%d, total=%d)",
 		ais_msg->is_compressed?" compressed":"",
 		ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest),
 		ais_data_len(ais_msg), ais_msg->header.size);
 
     iov.iov_base = ais_msg;
     iov.iov_len = ais_msg->header.size;
     crm_realloc(buf, buf_len);
 
     do {
 	if(rc == CS_ERR_TRY_AGAIN) {
 	    retries++;
 	    crm_info("Peer overloaded or membership in flux:"
 		     " Re-sending message (Attempt %d of 20)", retries);
 	    sleep(retries); /* Proportional back off */
 	}
 	
 	errno = 0;
 	switch(cluster_type) {
 	    case pcmk_cluster_classic_ais:
 		rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len);
 		header = (coroipc_response_header_t *)buf;
 		if(rc == CS_OK) {
 		    CRM_CHECK(header->size == sizeof (coroipc_response_header_t),
 					crm_err("Odd message: id=%d, size=%d, class=%d, error=%d",
 						header->id, header->size, class, header->error));
 		    
 		    CRM_ASSERT(buf_len >= header->size);
 		    CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK,
 					crm_err("Bad response id (%d) for request (%d)", header->id, ais_msg->header.id));
 		    CRM_CHECK(header->error == CS_OK, rc = header->error);
 		}
 		break;
 		
 	    case pcmk_cluster_corosync:
 	    case pcmk_cluster_cman:
 		transport = "cpg";
 		CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail);
 		rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1);
 		if(rc == CS_ERR_TRY_AGAIN) {
 		    cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED;
 		    int rc2 = cpg_flow_control_state_get (pcmk_cpg_handle, &fc_state);
 		    if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) {
 			crm_warn("Connection overloaded, cannot send messages");
 			goto bail;
 
 		    } else if (rc2 != CS_OK) {
 			crm_warn("Could not determin the connection state: %s (%d)", ais_error2text(rc2), rc2);
 			goto bail;
 		    }
 		}
 		break;
 		
 	    case pcmk_cluster_unknown:
 	    case pcmk_cluster_invalid:
 	    case pcmk_cluster_heartbeat:
 		CRM_ASSERT(is_openais_cluster());
 		break;
 	}
 	
     } while (rc == CS_ERR_TRY_AGAIN && retries < 20);
 
   bail:
     if(rc != CS_OK) {    
 	crm_perror(LOG_ERR,"Sending message %d via %s: FAILED (rc=%d): %s",
 		   ais_msg->id, transport, rc, ais_error2text(rc));
 
     } else {
 	crm_debug_4("Message %d: sent", ais_msg->id);
     }
 
     crm_free(buf);
     crm_free(ais_msg);
     return (rc == CS_OK);
 }
 
 gboolean
 send_ais_message(xmlNode *msg, 
 		 gboolean local, const char *node, enum crm_ais_msg_types dest)
 {
     gboolean rc = TRUE;
     char *data = NULL;
 
     if(is_classic_ais_cluster()) {
 	if(ais_fd_async < 0 || ais_source == NULL) {
 	    crm_err("Not connected to AIS: %d %p", ais_fd_async, ais_source);
 	    return FALSE;
 	}
     }
 
     data = dump_xml_unformatted(msg);
     rc = send_ais_text(0, data, local, node, dest);
     crm_free(data);
     return rc;
 }
 
 void terminate_ais_connection(void) 
 {
     crm_notice("Disconnecting from AIS");
     
 /*     G_main_del_fd(ais_source); */
 /*     G_main_del_fd(ais_source_sync);     */
 
-#ifdef SUPPORT_CMAN
-    if(is_cman_cluster()) {
-	cman_stop_notification(pcmk_cman_handle);
-	cman_finish(pcmk_cman_handle);
+    if(is_classic_ais_cluster() == FALSE) {
+	coroipcc_service_disconnect(ais_ipc_handle);
+
+    } else {
+	cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group);
     }
-#endif
 
     if(is_corosync_cluster()) {
 	quorum_finalize(pcmk_quorum_handle);
     }
 
-    if(is_classic_ais_cluster() == FALSE) {
-	coroipcc_service_disconnect(ais_ipc_handle);
-
-    } else {
-	cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group);
+#ifdef SUPPORT_CMAN
+    if(is_cman_cluster()) {
+	cman_stop_notification(pcmk_cman_handle);
+	cman_finish(pcmk_cman_handle);
     }
+#endif
 }
 
 int ais_membership_timer = 0;
 gboolean ais_membership_force = FALSE;
 
 static gboolean ais_dispatch_message(
     AIS_Message *msg, gboolean (*dispatch)(AIS_Message*,char*,int))
 {
     char *data = NULL;
     char *uncompressed = NULL;
     
     xmlNode *xml = NULL;
     CRM_ASSERT(msg != NULL);
     
     crm_debug_3("Got new%s message (size=%d, %d, %d)",
 		msg->is_compressed?" compressed":"",
 		ais_data_len(msg), msg->size, msg->compressed_size);
     
     data = msg->data;
     if(msg->is_compressed && msg->size > 0) {
 	int rc = BZ_OK;
 	unsigned int new_size = msg->size + 1;
 
 	if(check_message_sanity(msg, NULL) == FALSE) {
 	    goto badmsg;
 	}
 
 	crm_debug_5("Decompressing message data");
 	crm_malloc0(uncompressed, new_size);
 	rc = BZ2_bzBuffToBuffDecompress(
 	    uncompressed, &new_size, data, msg->compressed_size, 1, 0);
 
 	if(rc != BZ_OK) {
 	    crm_err("Decompression failed: %d", rc);
 	    goto badmsg;
 	}
 	
 	CRM_ASSERT(rc == BZ_OK);
 	CRM_ASSERT(new_size == msg->size);
 
 	data = uncompressed;
 
     } else if(check_message_sanity(msg, data) == FALSE) {
 	goto badmsg;
 
     } else if(safe_str_eq("identify", data)) {
 	int pid = getpid();
 	char *pid_s = crm_itoa(pid);
 	send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais);
 	crm_free(pid_s);
 	goto done;
     }
 
     if(msg->header.id != crm_class_members) {
 	crm_update_peer(msg->sender.id, 0,0,0,0, msg->sender.uname, msg->sender.uname, NULL, NULL);
     }
     
     if(msg->header.id == crm_class_rmpeer) {
 	uint32_t id = crm_int_helper(data, NULL);
 	crm_info("Removing peer %s/%u", data, id);
 	reap_crm_member(id);
 	goto done;
 
     } else if(msg->header.id == crm_class_members
 	|| msg->header.id == crm_class_quorum) {
 
 	xml = string2xml(data);
 	if(xml == NULL) {
 	    crm_err("Invalid membership update: %s", data);
 	    goto badmsg;
 	}
 	
 	if(is_classic_ais_cluster() == FALSE) {
 	    xmlNode *node = NULL;
 	    for(node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) {
 		crm_update_cman_node(node, crm_peer_seq);
 	    }
 	
 	} else {
 	    xmlNode *node = NULL;
 	    const char *value = NULL;
 	    gboolean quorate = FALSE;	
 	    
 	    value = crm_element_value(xml, "quorate");
 	    CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); goto badmsg);
 	    if(crm_is_true(value)) {
 		quorate = TRUE;
 	    }
 	    
 	    value = crm_element_value(xml, "id");
 	    CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); goto badmsg);
 	    crm_peer_seq = crm_int_helper(value, NULL);
 	    
 	    if(quorate != crm_have_quorum) {
 		crm_notice("Membership %s: quorum %s", value, quorate?"acquired":"lost");
 		crm_have_quorum = quorate;
 		
 	    } else {
 		crm_info("Membership %s: quorum %s", value, quorate?"retained":"still lost");
 	    }
 	
 	    for(node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) {
 		crm_update_ais_node(node, crm_peer_seq);
 	    }
 	}
     }
 
     if(dispatch != NULL) {
 	dispatch(msg, data, 0);
     }
     
   done:
     crm_free(uncompressed);
     free_xml(xml);
     return TRUE;
 
   badmsg:
     crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
 	    " min=%d, total=%d, size=%d, bz2_size=%d",
 	    msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
 	    ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
 	    msg->sender.pid, (int)sizeof(AIS_Message),
 	    msg->header.size, msg->size, msg->compressed_size);
     goto done;
 }
 
 gboolean ais_dispatch(int sender, gpointer user_data)
 {
     int rc = CS_OK;
     char *buffer = NULL;
     gboolean good = TRUE;
     gboolean (*dispatch)(AIS_Message*,char*,int) = user_data;
 
-    rc = coroipcc_dispatch_get (ais_ipc_handle, (void**)&buffer, 0);
+    do {
+	rc = coroipcc_dispatch_get (ais_ipc_handle, (void**)&buffer, 0);
 
-    if (rc == 0 || buffer == NULL) {
-	/* Zero is a legal "no message afterall" value */
-	return TRUE;
+	if (rc == 0 || buffer == NULL) {
+	    /* Zero is a legal "no message afterall" value */
+	    return TRUE;
+	    
+	} else if (rc != CS_OK) {
+	    crm_perror(LOG_ERR,"Receiving message body failed: (%d) %s", rc, ais_error2text(rc));
+	    goto bail;
+	}
 	
-    } else if (rc != CS_OK) {
-	crm_perror(LOG_ERR,"Receiving message body failed: (%d) %s", rc, ais_error2text(rc));
-	goto bail;
-    }
+	good = ais_dispatch_message((AIS_Message*)buffer, dispatch);
+	coroipcc_dispatch_put (ais_ipc_handle);
 
-    good = ais_dispatch_message((AIS_Message*)buffer, dispatch);
-    coroipcc_dispatch_put (ais_ipc_handle);
+    } while(good);
+    
     return good;
 
   bail:
     crm_err("AIS connection failed");
     return FALSE;
 }
 
 static void
 ais_destroy(gpointer user_data)
 {
     crm_err("AIS connection terminated");
     ais_fd_sync = -1;
     exit(1);
 }
 
 static gboolean pcmk_proc_dispatch(IPC_Channel *ch, gpointer user_data)
 {
     xmlNode *msg = NULL;
     gboolean stay_connected = TRUE;
 	
     while(IPC_ISRCONN(ch)) {
 	if(ch->ops->is_message_pending(ch) == 0) {
 	    break;
 	}
 
 	msg = xmlfromIPC(ch, MAX_IPC_DELAY);
 
 	if(msg) {
 	    xmlNode *node = NULL;
 	    for(node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) {
 		int id = 0;
 		int children = 0;
 		const char *uname = crm_element_value(node, "uname");
 		crm_element_value_int(node, "processes", &children);
 		
 		crm_update_peer(id, 0, 0, 0, children, NULL, uname, NULL, NULL);
 	    }
 	    free_xml(msg);
 	}
 
 	if(ch->ch_status != IPC_CONNECT) {
 	    break;
 	}
     }
 	
     if (ch->ch_status != IPC_CONNECT) {
 	stay_connected = FALSE;
     }
     return stay_connected;
 }
 
 #ifdef SUPPORT_CMAN
 
 static gboolean pcmk_cman_dispatch(int sender, gpointer user_data)
 {
-    int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ONE);
+    int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ALL);
     if(rc < 0) {
 	crm_err("Connection to cman failed: %d", rc);
 	return FALSE;
     }
     return TRUE;
 }
 
 #define MAX_NODES 256
 
 static void cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg)
 {
     int rc = 0, lpc = 0, node_count = 0;
 
     cman_cluster_t cluster;
     static cman_node_t cman_nodes[MAX_NODES];
     gboolean (*dispatch)(unsigned long long, gboolean) = privdata;
     
     switch (reason) {
 	case CMAN_REASON_STATECHANGE:
 
 	    memset(&cluster, 0, sizeof(cluster));
 	    rc = cman_get_cluster(pcmk_cman_handle, &cluster);
 	    if (rc < 0) {
 		crm_err("Couldn't query cman cluster details: %d %d", rc, errno);
 		return;
 	    }
 
 	    crm_peer_seq = cluster.ci_generation;
 	    if(arg != crm_have_quorum) {
 		crm_notice("Membership %llu: quorum %s", crm_peer_seq, arg?"acquired":"lost");
 		crm_have_quorum = arg;
 		
 	    } else {
 		crm_info("Membership %llu: quorum %s", crm_peer_seq, arg?"retained":"still lost");
 	    }
 
 	    rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes);
 	    if (rc < 0) {
 		crm_err("Couldn't query cman node list: %d %d", rc, errno);
 		return;
 	    }
 
 	    for (lpc = 0; lpc < node_count; lpc++) {
 		if (cman_nodes[lpc].cn_nodeid == 0) {
 		    /* Never allow node ID 0 to be considered a member #315711 */
 		    cman_nodes[lpc].cn_member = 0;
 		}
 		crm_update_peer(cman_nodes[lpc].cn_nodeid, cman_nodes[lpc].cn_incarnation,
 				cman_nodes[lpc].cn_member?crm_peer_seq:0, 0, 0,
 				cman_nodes[lpc].cn_name,   cman_nodes[lpc].cn_name, NULL,
 				cman_nodes[lpc].cn_member?CRM_NODE_MEMBER:CRM_NODE_LOST);
 	    }
 
 	    if(dispatch) {
 		dispatch(crm_peer_seq, crm_have_quorum);		
 	    }
 	    break;
 
 	case CMAN_REASON_TRY_SHUTDOWN:
 	    /* Always reply with a negative - pacemaker needs to be stopped first */
 	    crm_info("CMAN wants to shut down: %s", arg?"forced":"optional");
 	    cman_replyto_shutdown(pcmk_cman_handle, 0);
 	    break;
 	    
 	case CMAN_REASON_CONFIG_UPDATE:
 	    /* Ignore */
 	    break;
     }
 }
 #endif
 
 gboolean init_cman_connection(
     gboolean (*dispatch)(unsigned long long, gboolean), void (*destroy)(gpointer))
 {
 #ifdef SUPPORT_CMAN
     int rc = -1, fd = -1;
     cman_cluster_t cluster;
     
     crm_info("Configuring Pacemaker to obtain quorum from cman");
 
     memset(&cluster, 0, sizeof(cluster));
 
     pcmk_cman_handle = cman_init(dispatch);
     if(pcmk_cman_handle == NULL || cman_is_active(pcmk_cman_handle) == FALSE) {
 	crm_err("Couldn't connect to cman");
 	goto cman_bail;
     }
 
     rc = cman_get_cluster(pcmk_cman_handle, &cluster);
     if (rc < 0) {
 	crm_err("Couldn't query cman cluster details: %d %d", rc, errno);
 	goto cman_bail;	
     }
     ais_cluster_name = crm_strdup(cluster.ci_name);
     
     rc = cman_start_notification(pcmk_cman_handle, cman_event_callback);
     if (rc < 0) {
 	crm_err("Couldn't register for cman notifications: %d %d", rc, errno);
 	goto cman_bail;
     }
 
     /* Get the current membership state */
     cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE,
 			cman_is_quorate(pcmk_cman_handle));
 
     fd = cman_get_fd(pcmk_cman_handle);
     crm_debug("Adding fd=%d to mainloop", fd);
     cman_source = G_main_add_fd(
 	G_PRIORITY_HIGH, fd, FALSE, pcmk_cman_dispatch, dispatch, destroy);
 
   cman_bail:
     if (rc < 0) {
 	cman_finish(pcmk_cman_handle);
 	return FALSE;
     }
 #else
     crm_err("cman qorum is not supported in this build");
     exit(100);
 #endif
     return TRUE;
 }
 
 #ifdef SUPPORT_CS_QUORUM
 gboolean (*pcmk_cpg_dispatch_fn)(AIS_Message*,char*,int) = NULL;
 
 static gboolean pcmk_cpg_dispatch(int sender, gpointer user_data)
 {
     int rc = 0;
     pcmk_cpg_dispatch_fn = user_data;
     rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL);
     if(rc != CS_OK) {
 	crm_err("Connection to the CPG API failed: %d", rc);
 	return FALSE;
     }
     return TRUE;
 }
 
 static void pcmk_cpg_deliver (
 	cpg_handle_t handle,
 	const struct cpg_name *groupName,
 	uint32_t nodeid,
 	uint32_t pid,
 	void *msg,
 	size_t msg_len)
 {
     AIS_Message *ais_msg = (AIS_Message*)msg;
     if(ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) {
 	crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u",
 		nodeid, pid, ais_msg->sender.id);
 	return;
 
     } else if(ais_msg->host.size != 0
 	      && safe_str_neq(ais_msg->host.uname, pcmk_uname)) {
 	/* Not for us */
 	return;
     }
 
     ais_msg->sender.id = nodeid;
     if(ais_msg->sender.size == 0) {
 	crm_node_t *peer = crm_get_peer(nodeid, NULL);
 	if(peer == NULL) {
 	    crm_err("Peer with nodeid=%u is unknown", nodeid);
 
 	} else if(peer->uname == NULL) {
 	    crm_err("No uname for peer with nodeid=%u", nodeid);
 
 	} else {
 	    crm_notice("Fixing uname for peer with nodeid=%u", nodeid);
 	    ais_msg->sender.size = strlen(peer->uname);
 	    memset(ais_msg->sender.uname, 0, MAX_NAME);
 	    memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size);
 	}
     }
 
     ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn);
 }
 
 static void pcmk_cpg_membership(
 	cpg_handle_t handle,
 	const struct cpg_name *groupName,
 	const struct cpg_address *member_list, size_t member_list_entries,
 	const struct cpg_address *left_list, size_t left_list_entries,
 	const struct cpg_address *joined_list, size_t joined_list_entries)
 {
     /* Don't care about CPG membership */
 }
 
 static gboolean pcmk_quorum_dispatch(int sender, gpointer user_data)
 {
     int rc = 0;
     rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
     if(rc < 0) {
 	crm_err("Connection to the Quorum API failed: %d", rc);
 	return FALSE;
     }
     return TRUE;
 }
 
 static void pcmk_quorum_notification(
 	quorum_handle_t handle,
 	uint32_t quorate,
 	uint64_t ring_id,
 	uint32_t view_list_entries,
 	uint32_t *view_list)
 {
 	int i;
 
 	if(quorate != crm_have_quorum) {
 	    crm_notice("Membership "U64T": quorum %s (%lu)", ring_id,
 		       quorate?"acquired":"lost", (long unsigned int)view_list_entries);
 	    crm_have_quorum = quorate;
 	    
 	} else {
 	    crm_info("Membership "U64T": quorum %s (%lu)", ring_id,
 		     quorate?"retained":"still lost", (long unsigned int)view_list_entries);
 	}
 	for (i=0; i<view_list_entries; i++) {
 		crm_debug(" %d ", view_list[i]);
 	}
 }
 
 cpg_callbacks_t cpg_callbacks = {
     .cpg_deliver_fn =            pcmk_cpg_deliver,
     .cpg_confchg_fn =            pcmk_cpg_membership,
 };
 
 quorum_callbacks_t quorum_callbacks = {
     .quorum_notify_fn = pcmk_quorum_notification,
 };
 
 #endif
 
 static gboolean init_cpg_connection(
     gboolean (*dispatch)(AIS_Message*,char*,int), void (*destroy)(gpointer), uint32_t *nodeid)
 {
 #ifdef SUPPORT_CS_QUORUM
     int rc = -1;
     int fd = 0;
     int retries = 0;
 	
     strcpy(pcmk_cpg_group.value, crm_system_name);
     pcmk_cpg_group.length = strlen(crm_system_name)+1;
 
     cs_repeat(retries, 30, rc = cpg_initialize (&pcmk_cpg_handle, &cpg_callbacks));
     if (rc != CS_OK) {
 	crm_err("Could not connect to the Cluster Process Group API: %d\n", rc);
 	goto bail;
     }
 
     retries = 0;
     cs_repeat(
 	retries, 30, rc = cpg_local_get (pcmk_cpg_handle, (unsigned int*)nodeid));
     if (rc != CS_OK) {
 	crm_err("Could not get local node id from the CPG API");
 	goto bail;
     }	
 
     retries = 0;
     cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group));
     if (rc != CS_OK) {
 	crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc);
 	goto bail;
     }
 
     rc = cpg_fd_get(pcmk_cpg_handle, &fd);
     if (rc != CS_OK) {
 	crm_err("Could not obtain the CPG API connection: %d\n", rc);
 	goto bail;
     }
 
     crm_debug("Adding fd=%d to mainloop", fd);
     cpg_source = G_main_add_fd(
 	G_PRIORITY_HIGH, fd, FALSE, pcmk_cpg_dispatch, dispatch, destroy);
 
   bail:
     if (rc != CS_OK) {
 	cpg_finalize(pcmk_cpg_handle);
 	return FALSE;
     }
 #else
     crm_err("corosync qorum is not supported in this build");
     exit(100);
 #endif
     return TRUE;
 }
 
 gboolean init_quorum_connection(
     gboolean (*dispatch)(unsigned long long, gboolean), void (*destroy)(gpointer))
 {
 #ifdef SUPPORT_CS_QUORUM
     int rc = -1;
     int fd = 0;
     int quorate = 0;
 	
     crm_info("Configuring Pacemaker to obtain quorum from Corosync");
 
     rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks);
     if ( rc != CS_OK) {
 	crm_err("Could not connect to the Quorum API: %d\n", rc);
 	goto bail;
     }
 
     rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
     if ( rc != CS_OK) {
 	crm_err("Could not obtain the current Quorum API state: %d\n", rc);
 	goto bail;
     }
     crm_notice("Quorum %s", quorate?"acquired":"lost");
     crm_have_quorum = quorate;
 
     rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES|CS_TRACK_CURRENT);
     if ( rc != CS_OK) {
 	crm_err("Could not setup Quorum API notifications: %d\n", rc);
 	goto bail;
     }
 
     rc = quorum_fd_get(pcmk_quorum_handle, &fd);
     if (rc != CS_OK) {
 	crm_err("Could not obtain the Quorum API connection: %d\n", rc);
 	goto bail;
     }
 
     quorumd_source = G_main_add_fd(
 	G_PRIORITY_HIGH, fd, FALSE, pcmk_quorum_dispatch, dispatch, destroy);
 
   bail:
     if (rc != CS_OK) {
 	quorum_finalize(pcmk_quorum_handle);
 	return FALSE;
     }
 	
 #else
     crm_err("corosync quorum is not supported in this build");
     exit(100);
 #endif
     return TRUE;
 }
 
 static gboolean init_ais_connection_classic(
     gboolean (*dispatch)(AIS_Message*,char*,int),
     void (*destroy)(gpointer), char **our_uuid, char **our_uname, int *nodeid)
 {
     int rc;
     int pid = 0;
     char *pid_s = NULL;
     struct utsname name;
     
     crm_info("Creating connection to our Corosync plugin");
     rc = coroipcc_service_connect(
 	COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID,
 	AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE,
 	&ais_ipc_handle);
     if(ais_ipc_handle) {
 	coroipcc_fd_get(ais_ipc_handle, &ais_fd_async);
     }
     if(ais_fd_async <= 0 && rc == CS_OK) {
 	crm_err("No context created, but connection reported 'ok'");
 	rc = CS_ERR_LIBRARY;
     }
     if (rc != CS_OK) {
 	crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, ais_error2text(rc), rc);
     }
 	
     if(rc != CS_OK) {
 	return FALSE;
     }
 
     if(destroy == NULL) {
 	destroy = ais_destroy;
     } 
 	
     if(dispatch) {
 	crm_debug("Adding fd=%d to mainloop", ais_fd_async);
 	ais_source = G_main_add_fd(
 	    G_PRIORITY_HIGH, ais_fd_async, FALSE, ais_dispatch, dispatch, destroy);
     }
     
     crm_info("AIS connection established");
 
     pid = getpid();
     pid_s = crm_itoa(pid);
     send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais);
     crm_free(pid_s);
 
     if(uname(&name) < 0) {
 	crm_perror(LOG_ERR,"Could not determin the current host");
 	exit(100);
     }
     
     get_ais_nodeid(&pcmk_nodeid, &pcmk_uname);
     if(safe_str_neq(name.nodename, pcmk_uname)) {
 	crm_crit("Node name mismatch!  OpenAIS supplied %s, our lookup returned %s",
 		 pcmk_uname, name.nodename);
 	crm_notice("Node name mismatches usually occur when assigned automatically by DHCP servers");
 	crm_notice("If this node was part of the cluster with a different name,"
 		   " you will need to remove the old entry with crm_node --remove");
     }
     return TRUE;
 }
 
 gboolean init_ais_connection(
     gboolean (*dispatch)(AIS_Message*,char*,int), void (*destroy)(gpointer),
     char **our_uuid, char **our_uname, int *nodeid)
 {
     int retries = 0;
     enum cluster_type_e type = get_cluster_type();
 
     while(retries++ < 30) {
 	int rc = init_ais_connection_once(type, dispatch, destroy, our_uuid, our_uname, nodeid);
 	switch(rc) {
 	    case CS_OK:
 		if(getenv("HA_mcp")) {
 		    IPC_Channel *ch = init_client_ipc_comms_nodispatch("pcmk");
 		    G_main_add_IPC_Channel(G_PRIORITY_HIGH, ch, FALSE, pcmk_proc_dispatch, NULL, destroy);
 		}
 		return TRUE;
 		break;
 	    case CS_ERR_TRY_AGAIN:
 		break;
 	    default:
 		return FALSE;
 	}
     }
 
     crm_err("Retry count exceeded: %d", retries);
     return FALSE;
 }
 
 static char *get_local_node_name(void) 
 {
     char *name = NULL;
     struct utsname res;
     
     if(is_cman_cluster()) {
 #ifdef SUPPORT_CMAN
 	cman_node_t us;
 	cman_handle_t cman;
 
 	cman = cman_init(NULL);
 	if(cman != NULL && cman_is_active(cman)) {
 	    us.cn_name[0] = 0;
 	    cman_get_node(cman, CMAN_NODEID_US, &us);
 	    name = crm_strdup(us.cn_name);
 	    crm_info("Using CMAN node name: %s", name);
 
 	} else {
 	    crm_err("Couldn't determin node name from CMAN");
 	}
 	    
 	cman_finish(cman);
 #endif
 	
     } else if(uname(&res) < 0) {
 	crm_perror(LOG_ERR,"Could not determin the current host");
 	exit(100);
 	
     } else {
 	name = crm_strdup(res.nodename);
     }
     return name;
 }
 
 extern int set_cluster_type(enum cluster_type_e type);
 
 gboolean init_ais_connection_once(
     enum cluster_type_e type,
     gboolean (*dispatch)(AIS_Message*,char*,int),
     void (*destroy)(gpointer), char **our_uuid, char **our_uname, int *nodeid)
 {
     enum cluster_type_e use_type = 0;
     crm_peer_init();
     if(type) {
 	set_cluster_type(type);
     }
     
     use_type = get_cluster_type();
     /* Here we just initialize comms */
     switch(use_type) {
 	case pcmk_cluster_classic_ais:
 	    if(init_ais_connection_classic(
 		   dispatch, destroy, our_uuid, &pcmk_uname, nodeid) == FALSE) {
 		goto bail;
 	    }
 	    break;
 	case pcmk_cluster_cman:
 	case pcmk_cluster_corosync:
 	    if(init_cpg_connection(dispatch, destroy, &pcmk_nodeid) == FALSE) {
 		goto bail;
 	    }
 	    pcmk_uname = get_local_node_name();
 	    break;
 	default:
 	    crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(use_type), use_type);
 	    goto bail;
 	    break;
     }
 
     crm_info("Connection to '%s': established", name_for_cluster_type(type));
     
     CRM_ASSERT(pcmk_uname != NULL);
     pcmk_uname_len = strlen(pcmk_uname);
     
     if(pcmk_nodeid != 0) {
 	/* Ensure the local node always exists */
 	crm_update_peer(pcmk_nodeid, 0, 0, 0, 0, pcmk_uname, pcmk_uname, NULL, NULL);
     }
 
     if(our_uuid != NULL) {
 	*our_uuid = crm_strdup(pcmk_uname);
     }
 
     if(our_uname != NULL) {
 	*our_uname = crm_strdup(pcmk_uname);
     }
 
     if(nodeid != NULL) {
 	*nodeid = pcmk_nodeid;
     }
 
     return TRUE;
 
   bail:
     if(type) {
 	set_cluster_type(pcmk_cluster_unknown);
     }
     return FALSE;
 }
 
 gboolean check_message_sanity(const AIS_Message *msg, const char *data) 
 {
     gboolean sane = TRUE;
     gboolean repaired = FALSE;
     int dest = msg->host.type;
     int tmp_size = msg->header.size - sizeof(AIS_Message);
 
     if(sane && msg->header.size == 0) {
 	crm_warn("Message with no size");
 	sane = FALSE;
     }
 
     if(sane && msg->header.error != CS_OK) {
 	crm_warn("Message header contains an error: %d", msg->header.error);
 	sane = FALSE;
     }
 
     if(sane && ais_data_len(msg) != tmp_size) {
 	crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size);
 	sane = TRUE;
     }
 
     if(sane && ais_data_len(msg) == 0) {
 	crm_warn("Message with no payload");
 	sane = FALSE;
     }
 
     if(sane && data && msg->is_compressed == FALSE) {
 	int str_size = strlen(data) + 1;
 	if(ais_data_len(msg) != str_size) {
 	    int lpc = 0;
 	    crm_warn("Message payload is corrupted: expected %d bytes, got %d",
 		    ais_data_len(msg), str_size);
 	    sane = FALSE;
 	    for(lpc = (str_size - 10); lpc < msg->size; lpc++) {
 		if(lpc < 0) {
 		    lpc = 0;
 		}
 		crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]);
 	    }
 	}
     }
     
     if(sane == FALSE) {
 	crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
 		msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
 		ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
 		msg->sender.pid, msg->is_compressed, ais_data_len(msg),
 		msg->header.size);
 	
     } else if(repaired) {
 	crm_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
 		msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
 		ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
 		msg->sender.pid, msg->is_compressed, ais_data_len(msg),
 		msg->header.size);
     } else {
 	crm_debug_3("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
 		    msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
 		    ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
 		    msg->sender.pid, msg->is_compressed, ais_data_len(msg),
 		    msg->header.size);
     }
     
     return sane;
 }
 #endif
 
diff --git a/lib/common/utils.c b/lib/common/utils.c
index 2f6332fdd7..cc8d3da248 100644
--- a/lib/common/utils.c
+++ b/lib/common/utils.c
@@ -1,2654 +1,2657 @@
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  * 
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  * 
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #ifndef _GNU_SOURCE
 #  define _GNU_SOURCE
 #endif
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <string.h>
 
 #include <stdlib.h>
 #include <limits.h>
 #include <ctype.h>
 #include <pwd.h>
 #include <grp.h>
 #include <time.h>
 #include <libgen.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 #include <crm/common/util.h>
 #include <crm/common/ipc.h>
 #include <crm/common/iso8601.h>
 #include <libxml2/libxml/relaxng.h>
 
 
 #if HAVE_HB_CONFIG_H
 #include <heartbeat/hb_config.h> /* for HB_COREDIR */
 #endif
 
 #if HAVE_GLUE_CONFIG_H
 #include <glue_config.h> /* for HB_COREDIR */
 #endif
 
 #ifndef MAXLINE
 #    define MAXLINE 512
 #endif
 
 #ifdef HAVE_GETOPT_H
 #  include <getopt.h>
 #endif
 
 CRM_TRACE_INIT_DATA(common);
 
 static uint ref_counter = 0;
 unsigned int crm_log_level = LOG_INFO;
 gboolean crm_config_error = FALSE;
 gboolean crm_config_warning = FALSE;
 const char *crm_system_name = "unknown";
 
 int node_score_red = 0;
 int node_score_green = 0;
 int node_score_yellow = 0;
 int node_score_infinity = INFINITY;
 
 void crm_set_env_options(void);
 
 gboolean
 check_time(const char *value) 
 {
     if(crm_get_msec(value) < 5000) {
 	return FALSE;
     }
     return TRUE;
 }
 
 gboolean
 check_timer(const char *value) 
 {
     if(crm_get_msec(value) < 0) {
 	return FALSE;
     }
     return TRUE;
 }
 
 gboolean
 check_boolean(const char *value) 
 {
     int tmp = FALSE;
     if(crm_str_to_boolean(value, &tmp) != 1) {
 	return FALSE;
     }
     return TRUE;
 }
 
 gboolean
 check_number(const char *value) 
 {
     errno = 0;
     if(value == NULL) {
 	return FALSE;
 		
     } else if(safe_str_eq(value, MINUS_INFINITY_S)) {
 		
     } else if(safe_str_eq(value, INFINITY_S)) {
 
     } else {
 	crm_int_helper(value, NULL);
     }
 
     if(errno != 0) {
 	return FALSE;
     }
     return TRUE;
 }
 
 int
 char2score(const char *score) 
 {
     int score_f = 0;
 	
     if(score == NULL) {
 		
     } else if(safe_str_eq(score, MINUS_INFINITY_S)) {
 	score_f = -node_score_infinity;
 		
     } else if(safe_str_eq(score, INFINITY_S)) {
 	score_f = node_score_infinity;
 		
     } else if(safe_str_eq(score, "+"INFINITY_S)) {
 	score_f = node_score_infinity;
 		
     } else if(safe_str_eq(score, "red")) {
 	score_f = node_score_red;
 
     } else if(safe_str_eq(score, "yellow")) {
 	score_f = node_score_yellow;
 
     } else if(safe_str_eq(score, "green")) {
 	score_f = node_score_green;
 
     } else {
 	score_f = crm_parse_int(score, NULL);
 	if(score_f > 0 && score_f > node_score_infinity) {
 	    score_f = node_score_infinity;
 			
 	} else if(score_f < 0 && score_f < -node_score_infinity) {
 	    score_f = -node_score_infinity;
 	}
     }
 	
     return score_f;
 }
 
 char *
 score2char(int score) 
 {
     if(score >= node_score_infinity) {
 	return crm_strdup(INFINITY_S);
 
     } else if(score <= -node_score_infinity) {
 	return crm_strdup("-"INFINITY_S);
     } 
     return crm_itoa(score);
 }
 
 
 const char *
 cluster_option(GHashTable* options, gboolean(*validate)(const char*),
 	       const char *name, const char *old_name, const char *def_value)
 {
     const char *value = NULL;
     CRM_ASSERT(name != NULL);
 
     if(options != NULL) {
 	value = g_hash_table_lookup(options, name);
     }
 
     if(value == NULL && old_name && options != NULL) {
 	value = g_hash_table_lookup(options, old_name);
 	if(value != NULL) {
 	    crm_config_warn("Using deprecated name '%s' for"
 			    " cluster option '%s'", old_name, name);
 	    g_hash_table_insert(
 		options, crm_strdup(name), crm_strdup(value));
 	    value = g_hash_table_lookup(options, old_name);
 	}
     }
 
     if(value == NULL) {
 	crm_debug_2("Using default value '%s' for cluster option '%s'",
 		    def_value, name);
 
 	if(options == NULL) {
 	    return def_value;
 	}
 		
 	g_hash_table_insert(
 	    options, crm_strdup(name), crm_strdup(def_value));
 	value = g_hash_table_lookup(options, name);
     }
 	
     if(validate && validate(value) == FALSE) {
 	crm_config_err("Value '%s' for cluster option '%s' is invalid."
 		       "  Defaulting to %s", value, name, def_value);
 	g_hash_table_replace(options, crm_strdup(name),
 			     crm_strdup(def_value));
 	value = g_hash_table_lookup(options, name);
     }
 	
     return value;
 }
 
 
 const char *
 get_cluster_pref(GHashTable *options, pe_cluster_option *option_list, int len, const char *name)
 {
     int lpc = 0;
     const char *value = NULL;
     gboolean found = FALSE;
     for(lpc = 0; lpc < len; lpc++) {
 	if(safe_str_eq(name, option_list[lpc].name)) {
 	    found = TRUE;
 	    value = cluster_option(options, 
 				   option_list[lpc].is_valid,
 				   option_list[lpc].name,
 				   option_list[lpc].alt_name,
 				   option_list[lpc].default_value);
 	}
     }
     CRM_CHECK(found, crm_err("No option named: %s", name));
     CRM_ASSERT(value != NULL);
     return value;
 }
 
 void
 config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long,
 		pe_cluster_option *option_list, int len)
 {
     int lpc = 0;
 
     fprintf(stdout, "<?xml version=\"1.0\"?>"
 	    "<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n"
 	    "<resource-agent name=\"%s\">\n"
 	    "  <version>%s</version>\n"
 	    "  <longdesc lang=\"en\">%s</longdesc>\n"
 	    "  <shortdesc lang=\"en\">%s</shortdesc>\n"
 	    "  <parameters>\n", name, version, desc_long, desc_short);
 	
     for(lpc = 0; lpc < len; lpc++) {
 	if(option_list[lpc].description_long == NULL
 	   && option_list[lpc].description_short == NULL) {
 	    continue;
 	}
 	fprintf(stdout, "    <parameter name=\"%s\" unique=\"0\">\n"
 		"      <shortdesc lang=\"en\">%s</shortdesc>\n"
 		"      <content type=\"%s\" default=\"%s\"/>\n"
 		"      <longdesc lang=\"en\">%s%s%s</longdesc>\n"
 		"    </parameter>\n",
 		option_list[lpc].name,
 		option_list[lpc].description_short,
 		option_list[lpc].type,
 		option_list[lpc].default_value,
 		option_list[lpc].description_long?option_list[lpc].description_long:option_list[lpc].description_short,
 		option_list[lpc].values?"  Allowed values: ":"",
 		option_list[lpc].values?option_list[lpc].values:"");
     }
     fprintf(stdout, "  </parameters>\n</resource-agent>\n");
 }
 
 void
 verify_all_options(GHashTable *options, pe_cluster_option *option_list, int len)
 {
     int lpc = 0;
     for(lpc = 0; lpc < len; lpc++) {
 	cluster_option(options, 
 		       option_list[lpc].is_valid,
 		       option_list[lpc].name,
 		       option_list[lpc].alt_name,
 		       option_list[lpc].default_value);
     }
 }
 
 char *
 generateReference(const char *custom1, const char *custom2)
 {
 
     const char *local_cust1 = custom1;
     const char *local_cust2 = custom2;
     int reference_len = 4;
     char *since_epoch = NULL;
 
     reference_len += 20; /* too big */
     reference_len += 40; /* too big */
 	
     if(local_cust1 == NULL) { local_cust1 = "_empty_"; }
     reference_len += strlen(local_cust1);
 	
     if(local_cust2 == NULL) { local_cust2 = "_empty_"; }
     reference_len += strlen(local_cust2);
 	
     crm_malloc0(since_epoch, reference_len);
 
     if(since_epoch != NULL) {
 	sprintf(since_epoch, "%s-%s-%ld-%u",
 		local_cust1, local_cust2,
 		(unsigned long)time(NULL), ref_counter++);
     }
 
     return since_epoch;
 }
 
 gboolean
 decodeNVpair(const char *srcstring, char separator, char **name, char **value)
 {
     int lpc = 0;
     int len = 0;
     const char *temp = NULL;
 
     CRM_ASSERT(name != NULL && value != NULL);
     *name = NULL;
     *value = NULL;
 
     crm_debug_4("Attempting to decode: [%s]", srcstring);
     if (srcstring != NULL) {
 	len = strlen(srcstring);
 	while(lpc <= len) {
 	    if (srcstring[lpc] == separator) {
 		crm_malloc0(*name, lpc+1);
 		if(*name == NULL) {
 		    break; /* and return FALSE */
 		}
 		strncpy(*name, srcstring, lpc);
 		(*name)[lpc] = '\0';
 
 /* this sucks but as the strtok manpage says..
  * it *is* a bug
  */
 		len = len-lpc; len--;
 		if(len <= 0) {
 		    *value = NULL;
 		} else {
 
 		    crm_malloc0(*value, len+1);
 		    if(*value == NULL) {
 			crm_free(*name);
 			break; /* and return FALSE */
 		    }
 		    temp = srcstring+lpc+1;
 		    strncpy(*value, temp, len);
 		    (*value)[len] = '\0';
 		}
 		return TRUE;
 	    }
 	    lpc++;
 	}
     }
 
     if(*name != NULL) {
 	crm_free(*name);
     }
     *name = NULL;
     *value = NULL;
     
     return FALSE;
 }
 
 char *
 crm_concat(const char *prefix, const char *suffix, char join) 
 {
     int len = 0;
     char *new_str = NULL;
     CRM_ASSERT(prefix != NULL);
     CRM_ASSERT(suffix != NULL);
     len = strlen(prefix) + strlen(suffix) + 2;
 
     crm_malloc0(new_str, (len));
     sprintf(new_str, "%s%c%s", prefix, join, suffix);
     new_str[len-1] = 0;
     return new_str;
 }
 
 
 char *
 generate_hash_key(const char *crm_msg_reference, const char *sys)
 {
     char *hash_key = crm_concat(sys?sys:"none", crm_msg_reference, '_');
     crm_debug_3("created hash key: (%s)", hash_key);
     return hash_key;
 }
 
 char *
 generate_hash_value(const char *src_node, const char *src_subsys)
 {
     char *hash_value = NULL;
 	
     if (src_node == NULL || src_subsys == NULL) {
 	return NULL;
     }
     
     if (strcasecmp(CRM_SYSTEM_DC, src_subsys) == 0) {
 	hash_value = crm_strdup(src_subsys);
 	CRM_ASSERT(hash_value);
 	return hash_value;
     }
 
     hash_value = crm_concat(src_node, src_subsys, '_');
     crm_info("created hash value: (%s)", hash_value);
     return hash_value;
 }
 
 char *
 crm_itoa(int an_int)
 {
     int len = 32;
     char *buffer = NULL;
 	
     crm_malloc0(buffer, (len+1));
     if(buffer != NULL) {
 	snprintf(buffer, len, "%d", an_int);
     }
 	
     return buffer;
 }
 
 extern int LogToLoggingDaemon(int priority, const char * buf, int bstrlen, gboolean use_pri_str);
 
 #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER
 GLogFunc glib_log_default;
 
 static void
 crm_glib_handler(const gchar *log_domain, GLogLevelFlags flags, const gchar *message, gpointer user_data)
 {
     int log_level = LOG_WARNING;
     GLogLevelFlags msg_level = (flags & G_LOG_LEVEL_MASK);
 
     switch(msg_level) {
 	case G_LOG_LEVEL_CRITICAL:
 	    /* log and record how we got here */
 	    crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, message, TRUE, TRUE);
 	    return;
 
 	case G_LOG_LEVEL_ERROR:	log_level = LOG_ERR;    break;
 	case G_LOG_LEVEL_MESSAGE:	log_level = LOG_NOTICE; break;
 	case G_LOG_LEVEL_INFO:	log_level = LOG_INFO;   break;
 	case G_LOG_LEVEL_DEBUG:	log_level = LOG_DEBUG;  break;
 		
 	case G_LOG_LEVEL_WARNING:
 	case G_LOG_FLAG_RECURSION:
 	case G_LOG_FLAG_FATAL:
 	case G_LOG_LEVEL_MASK:
 	    log_level = LOG_WARNING;
 	    break;
     }
 
     do_crm_log(log_level, "%s: %s", log_domain, message);
 }
 #endif
 
 void crm_log_deinit(void) {
 #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER
     g_log_set_default_handler(glib_log_default, NULL);
 #endif
 }
 
 gboolean crm_log_init(
     const char *entity, int level, gboolean coredir, gboolean to_stderr,
     int argc, char **argv)
 {
     return crm_log_init_worker(entity, level, coredir, to_stderr, argc, argv, FALSE);
 }
 
 gboolean crm_log_init_quiet(
     const char *entity, int level, gboolean coredir, gboolean to_stderr,
     int argc, char **argv)
 {
     return crm_log_init_worker(entity, level, coredir, to_stderr, argc, argv, TRUE);
 }
 
 #if SUPPORT_TRACING
 static int
 update_trace_data(struct _pcmk_ddebug_query *query, struct _pcmk_ddebug *start, struct _pcmk_ddebug *stop) 
 {
     int lpc = 0;
     unsigned nfound = 0;
     struct _pcmk_ddebug *dp;
     const char *match = "unknown";
 
     CRM_ASSERT(stop != NULL);
     CRM_ASSERT(start != NULL);
     
     for (dp = start; dp != stop; dp++) {
 	gboolean bump = FALSE;
 	lpc++;
 	/* fprintf(stderr, "checking: %-12s %20s:%u fmt:%s\n", */
 	/* 	dp->function, dp->filename, dp->lineno, dp->format); */
 
 	if (query->functions && strstr(query->functions, dp->function) != NULL) {
 	    match = "function";
 	    bump = TRUE;
 	}
 
 	if(query->files) {
 	    char token[500];
 	    const char *offset = NULL;
 	    const char *next = query->files;
 
 	    do {
 		offset = next;
 		next = strchrnul(offset, ',');
 		snprintf(token, 499, "%.*s", (int)(next-offset), offset);
 
 		if (query->files && strstr(dp->filename, token) != NULL) {
 		    match = "file";
 		    bump = TRUE;
 
 		} else if(next[0] != 0) {
 		    next++;
 		}
 		
 	    } while(bump == FALSE && next != NULL && next[0] != 0);
 	}
 
 	if (query->formats && strstr(query->formats, dp->format) != NULL) {
 	    match = "format";
 	    bump = TRUE;
 	}
 	
 	if(bump) {
 	    nfound++;
 	    dp->bump = LOG_NOTICE;
 	    do_crm_log_always(LOG_INFO, "Detected '%s' match: %-12s %20s:%u fmt:%s",
 			      match, dp->function, dp->filename, dp->lineno, dp->format);
 	}
     }
 
     query->total += lpc;
     query->matches += nfound;
     return nfound;
 }
 
 #define _GNU_SOURCE
 #include <link.h>
 #include <stdlib.h>
 #include <stdio.h>
 static int
 ddebug_callback(struct dl_phdr_info *info, size_t size, void *data)
 {
     if(strlen(info->dlpi_name) > 0) {
 	struct _pcmk_ddebug_query *query = data;
 
 	void *handle;
 	void *start;
 	void *stop;
 	char *error;
 	
 	handle = dlopen (info->dlpi_name, RTLD_LAZY);
 	error = dlerror();
 	if (!handle || error) {
 	    crm_err("%s", error);
 	    if(handle) {
 		dlclose(handle);
 	    }
 	    return 0;
 	}
 	
 	start = dlsym(handle, "__start___verbose");
 	error = dlerror();
 	if (error)  {
 	    goto done;
 	}
 	
 	stop = dlsym(handle, "__stop___verbose");
 	error = dlerror();
 	if (error)  {
 	    goto done;
 	    
 	} else {
 	    unsigned long int len = (unsigned long int)stop - (unsigned long int)start;
 	    crm_info("Checking for query matches in %lu trace symbols from: %s (offset: %p)",
 		     len/sizeof(struct _pcmk_ddebug), info->dlpi_name, start);
 	    
 	    update_trace_data(query, start, stop);
 	}
       done:
 	dlclose(handle);
     }
     
     return 0;
 }
 #endif
 
 
 void update_all_trace_data(void) 
 {
 #if SUPPORT_TRACING
     gboolean search = FALSE;
     const char *env_value = NULL;
     struct _pcmk_ddebug_query query;
 
     memset(&query, 0, sizeof(struct _pcmk_ddebug_query));
 
     env_value = getenv("PCMK_trace_files");
     if(env_value) {
 	search = TRUE;
 	query.files = env_value;
     }
 
     env_value = getenv("PCMK_trace_formats");
     if(env_value) {
 	search = TRUE;
 	query.formats = env_value;
     }
 
     env_value = getenv("PCMK_trace_functions");
     if(env_value) {
 	search = TRUE;
 	query.functions = env_value;
     }
     
     if(search) {
 	update_trace_data(&query, __start___verbose, __stop___verbose);
 	dl_iterate_phdr(ddebug_callback, &query);
 	if(query.matches == 0) {
 	    do_crm_log_always(LOG_DEBUG,
 			      "no matches for query: {fn='%s', file='%s', fmt='%s'} in %llu entries",
 			      crm_str(query.functions), crm_str(query.files), crm_str(query.formats), query.total);
 	} else {
 	    do_crm_log_always(LOG_INFO,
 			      "%llu matches for query: {fn='%s', file='%s', fmt='%s'} in %llu entries",
 			      query.matches, crm_str(query.functions), crm_str(query.files), crm_str(query.formats),
 			      query.total);
 	}
     }
     /* return query.matches; */
 #endif
 }
 
 gboolean
 crm_log_init_worker(
     const char *entity, int level, gboolean coredir, gboolean to_stderr,
     int argc, char **argv, gboolean quiet)
 {
     /* Redirect messages from glib functions to our handler */
 /*  	cl_malloc_forced_for_glib(); */
 #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER
     glib_log_default = g_log_set_default_handler(crm_glib_handler, NULL);
 #endif
 	
     /* and for good measure... - this enum is a bit field (!) */
     g_log_set_always_fatal((GLogLevelFlags)0); /*value out of range*/
 
     if(entity) {
 	crm_system_name = entity;
 
     } else if(argc > 0 && argv != NULL) {
 	crm_system_name = basename(argv[0]);
 	if(strstr(crm_system_name, "lt-") == crm_system_name) {
 	    crm_system_name += 3;
 	}
 	    
     } else if(crm_system_name == NULL) {
 	crm_system_name = "Unknown";	    
     }
 	
     setenv("PCMK_service", crm_system_name, 1);
     cl_log_set_entity(crm_system_name);
     set_crm_log_level(level);
     crm_set_env_options();
 
     if(quiet) {
 	/* Nuke any syslog activity */
 	unsetenv("HA_logfacility");
 
     } else {
 	cl_log_args(argc, argv);
 	if(getenv("HA_logfacility") == NULL) {
 	    /* Set a default */
 	    cl_log_set_facility(HA_LOG_FACILITY);
 	} /* else: picked up by crm_set_env_options() */
     }
 	
     cl_log_enable_stderr(to_stderr);
 	
     if(coredir) {
 	const char *user = getenv("USER");
 	if(user != NULL && safe_str_neq(user, "root") && safe_str_neq(user, CRM_DAEMON_USER)) {
 	    crm_info("Not switching to corefile directory for %s", user);
 	    coredir = FALSE;
 	}
     }
 
     if(coredir) {
 	int user = getuid();
 	const char *base = HA_COREDIR;
 	struct passwd *pwent = getpwuid(user);
 	    
 	if (pwent == NULL) {
 	    crm_perror(LOG_ERR, "Cannot get name for uid: %d", user);
 
 	} else if(safe_str_neq(pwent->pw_name, "root")
 		  && safe_str_neq(pwent->pw_name, "nobody")
 		  && safe_str_neq(pwent->pw_name, CRM_DAEMON_USER)) {
 	    crm_debug("Don't change active directory for regular user: %s", pwent->pw_name);
 		
 	} else if (chdir(base) < 0) {
 	    crm_perror(LOG_ERR, "Cannot change active directory to %s", base);
 
 	} else if (chdir(pwent->pw_name) < 0) {
 	    crm_perror(LOG_ERR, "Cannot change active directory to %s/%s", base, pwent->pw_name);
 	} else {
 	    crm_info("Changed active directory to %s/%s", base, pwent->pw_name);
 #if 0
 	    {
 		char path[512];
 		snprintf(path, 512, "%s-%d", crm_system_name, getpid());
 		mkdir(path, 0750);
 		chdir(path);
 		crm_info("Changed active directory to %s/%s/%s",
 			 base, pwent->pw_name, path);
 	    }
 #endif
 	}
     }
 
     update_all_trace_data();
 
     crm_signal(DEBUG_INC, alter_debug);
     crm_signal(DEBUG_DEC, alter_debug);
 
     return TRUE;
 }
 
 /* returns the old value */
 unsigned int
 set_crm_log_level(unsigned int level)
 {
     unsigned int old = crm_log_level;
     crm_log_level = level;	
     return old;
 }
 
 unsigned int
 get_crm_log_level(void)
 {
     return crm_log_level;
 }
 
 static int
 crm_version_helper(const char *text, char **end_text)
 {
     int atoi_result = -1;
     CRM_ASSERT(end_text != NULL);
 
     errno = 0;
 	
     if(text != NULL && text[0] != 0) {
 	atoi_result = (int)strtol(text, end_text, 10);
 		
 	if(errno == EINVAL) {
 	    crm_err("Conversion of '%s' %c failed", text, text[0]);
 	    atoi_result = -1;
 	}
     }
     return atoi_result;
 }
 
 
 /*
  * version1 < version2 : -1
  * version1 = version2 :  0
  * version1 > version2 :  1
  */
 int
 compare_version(const char *version1, const char *version2)
 {
     int rc = 0;
     int lpc = 0;
     char *ver1_copy = NULL, *ver2_copy = NULL;
     char *rest1 = NULL, *rest2 = NULL;
 
     if(version1 == NULL && version2 == NULL) {
 	return 0;
     } else if(version1 == NULL) {
 	return -1;
     } else if(version2 == NULL) {
 	return 1;
     }
 	
     ver1_copy = crm_strdup(version1);
     ver2_copy = crm_strdup(version2);
     rest1 = ver1_copy;
     rest2 = ver2_copy;
 
     while(1) {
 	int digit1 = 0;
 	int digit2 = 0;
 
 	lpc++;
 
 	if(rest1 == rest2) {
 	    break;
 	}
 		
 	if(rest1 != NULL) {
 	    digit1 = crm_version_helper(rest1, &rest1);
 	}
 
 	if(rest2 != NULL) {
 	    digit2 = crm_version_helper(rest2, &rest2);
 	}
 
 	if(digit1 < digit2){
 	    rc = -1;
 	    crm_debug_5("%d < %d", digit1, digit2);
 	    break;
 			
 	} else if (digit1 > digit2){
 	    rc = 1;
 	    crm_debug_5("%d > %d", digit1, digit2);
 	    break;
 	}
 
 	if(rest1 != NULL && rest1[0] == '.') {
 	    rest1++;
 	}
 	if(rest1 != NULL && rest1[0] == 0) {
 	    rest1 = NULL;
 	}
 
 	if(rest2 != NULL && rest2[0] == '.') {
 	    rest2++;
 	}
 	if(rest2 != NULL && rest2[0] == 0) {
 	    rest2 = NULL;
 	}
     }
 	
     crm_free(ver1_copy);
     crm_free(ver2_copy);
 
     if(rc == 0) {
 	crm_debug_3("%s == %s (%d)", version1, version2, lpc);
     } else if(rc < 0) {
 	crm_debug_3("%s < %s (%d)", version1, version2, lpc);
     } else if(rc > 0) {
 	crm_debug_3("%s > %s (%d)", version1, version2, lpc);
     }
 
     return rc;
 }
 
 gboolean do_stderr = FALSE;
 
 void
 alter_debug(int nsig) 
 {
     crm_signal(DEBUG_INC, alter_debug);
     crm_signal(DEBUG_DEC, alter_debug);
 	
     switch(nsig) {
 	case DEBUG_INC:
 	    if (crm_log_level < 100) {
 		crm_log_level++;
 	    }
 	    break;
 
 	case DEBUG_DEC:
 	    if (crm_log_level > 0) {
 		crm_log_level--;
 	    }
 	    break;	
 
 	default:
 	    fprintf(stderr, "Unknown signal %d\n", nsig);
 	    cl_log(LOG_ERR, "Unknown signal %d", nsig);
 	    break;	
     }
 }
 
 
 void g_hash_destroy_str(gpointer data)
 {
     crm_free(data);
 }
 
 #include <sys/types.h>
 /* #include <stdlib.h> */
 /* #include <limits.h> */
 
 long long
 crm_int_helper(const char *text, char **end_text)
 {
     long long result = -1;
     char *local_end_text = NULL;
     int saved_errno = 0;
     
     errno = 0;
     
     if(text != NULL) {
 #ifdef ANSI_ONLY
 	if(end_text != NULL) {
 	    result = strtol(text, end_text, 10);
 	} else {
 	    result = strtol(text, &local_end_text, 10);
 	}
 #else
 	if(end_text != NULL) {
 	    result = strtoll(text, end_text, 10);
 	} else {
 	    result = strtoll(text, &local_end_text, 10);
 	}
 #endif
 
 	saved_errno = errno;
 /* 		CRM_CHECK(errno != EINVAL); */
 	if(errno == EINVAL) {
 	    crm_err("Conversion of %s failed", text);
 	    result = -1;
 	    
 	} else if(errno == ERANGE) {
 	    crm_err("Conversion of %s was clipped: %lld", text, result);
 
 	} else if(errno != 0) {
 	    crm_perror(LOG_ERR,"Conversion of %s failed:", text);
 	}
 			
 	if(local_end_text != NULL && local_end_text[0] != '\0') {
 	    crm_err("Characters left over after parsing '%s': '%s'", text, local_end_text);
 	}
 
 	errno = saved_errno;
     }
     return result;
 }
 
 int
 crm_parse_int(const char *text, const char *default_text)
 {
     int atoi_result = -1;
     if(text != NULL) {
 	atoi_result = crm_int_helper(text, NULL);
 	if(errno == 0) {
 	    return atoi_result;
 	}
     }
 	
     if(default_text != NULL) {
 	atoi_result = crm_int_helper(default_text, NULL);
 	if(errno == 0) {
 	    return atoi_result;
 	}
 
     } else {
 	crm_err("No default conversion value supplied");
     }
 
     return -1;
 }
 
 gboolean
 safe_str_neq(const char *a, const char *b)
 {
     if(a == b) {
 	return FALSE;
 
     } else if(a==NULL || b==NULL) {
 	return TRUE;
 
     } else if(strcasecmp(a, b) == 0) {
 	return FALSE;
     }
     return TRUE;
 }
 
 char *
 crm_strdup_fn(const char *src, const char *file, const char *fn, int line)
 {
     char *dup = NULL;
     CRM_CHECK(src != NULL,
 	      crm_err("Could not perform copy at %s:%d (%s)", file, line, fn);
 	      return NULL);
     crm_malloc0(dup, strlen(src) + 1);
     return strcpy(dup, src);
 }
 
 
 
 #define ENV_PREFIX "HA_"
 void
 crm_set_env_options(void) 
 {
     cl_inherit_logging_environment(500);
     cl_log_set_logd_channel_source(NULL, NULL);
 
     if(debug_level > 0 && (debug_level+LOG_INFO) > (int)crm_log_level) {
 	set_crm_log_level(LOG_INFO + debug_level);
     }
 }
 
 gboolean
 crm_is_true(const char * s)
 {
     gboolean ret = FALSE;
     if(s != NULL) {
 	crm_str_to_boolean(s, &ret);
     }
     return ret;
 }
 
 int
 crm_str_to_boolean(const char * s, int * ret)
 {
     if(s == NULL) {
 	return -1;
 
     } else if (strcasecmp(s, "true") == 0
 	       ||	strcasecmp(s, "on") == 0
 	       ||	strcasecmp(s, "yes") == 0
 	       ||	strcasecmp(s, "y") == 0
 	       ||	strcasecmp(s, "1") == 0){
 	*ret = TRUE;
 	return 1;
 
     } else if (strcasecmp(s, "false") == 0
 	       ||	strcasecmp(s, "off") == 0
 	       ||	strcasecmp(s, "no") == 0
 	       ||	strcasecmp(s, "n") == 0
 	       ||	strcasecmp(s, "0") == 0){
 	*ret = FALSE;
 	return 1;
     }
     return -1;
 }
 
 #ifndef NUMCHARS
 #    define	NUMCHARS	"0123456789."
 #endif
 
 #ifndef WHITESPACE
 #    define	WHITESPACE	" \t\n\r\f"
 #endif
 
 unsigned long long
 crm_get_interval(const char * input)
 {
     ha_time_t *interval = NULL;
     char *input_copy = crm_strdup(input);
     char *input_copy_mutable = input_copy;
     unsigned long long msec = 0;
     
     if(input == NULL) {
 	return 0;
 
     } else if(input[0] != 'P') {
 	crm_free(input_copy);
 	return crm_get_msec(input);
     }
     
     interval = parse_time_duration(&input_copy_mutable);
     msec = date_in_seconds(interval);
     free_ha_date(interval);
     crm_free(input_copy);
     return msec * 1000;
 }
 
 long long
 crm_get_msec(const char * input)
 {
     const char *cp = input;
     const char *units;
     long long multiplier = 1000;
     long long divisor = 1;
     long long msec = -1;
     char *end_text = NULL;
     /* double dret; */
 
     if(input == NULL) {
 	return msec;
     }
 	
     cp += strspn(cp, WHITESPACE);
     units = cp + strspn(cp, NUMCHARS);
     units += strspn(units, WHITESPACE);
 
     if (strchr(NUMCHARS, *cp) == NULL) {
 	return msec;
     }
 
     if (strncasecmp(units, "ms", 2) == 0
 	|| strncasecmp(units, "msec", 4) == 0) {
 	multiplier = 1;
 	divisor = 1;
     } else if (strncasecmp(units, "us", 2) == 0
 	       || strncasecmp(units, "usec", 4) == 0) {
 	multiplier = 1;
 	divisor = 1000;
     } else if (strncasecmp(units, "s", 1) == 0
 	       || strncasecmp(units, "sec", 3) == 0) {
 	multiplier = 1000;
 	divisor = 1;
     } else if (strncasecmp(units, "m", 1) == 0
 	       || strncasecmp(units, "min", 3) == 0) {
 	multiplier = 60*1000;
 	divisor = 1;
     } else if (strncasecmp(units, "h", 1) == 0
 	       || strncasecmp(units, "hr", 2) == 0) {
 	multiplier = 60*60*1000;
 	divisor = 1;
     } else if (*units != EOS && *units != '\n' && *units != '\r') {
 	return msec;
     }
 	
     msec = crm_int_helper(cp, &end_text);
     msec *= multiplier;
     msec /= divisor;
     /* dret += 0.5; */
     /* msec = (long long)dret; */
     return msec;
 }
 
 const char *
 op_status2text(op_status_t status)
 {
     switch(status) {
 	case LRM_OP_PENDING:
 	    return "pending";
 	    break;
 	case LRM_OP_DONE:
 	    return "complete";
 	    break;
 	case LRM_OP_ERROR:
 	    return "Error";
 	    break;
 	case LRM_OP_TIMEOUT:
 	    return "Timed Out";
 	    break;
 	case LRM_OP_NOTSUPPORTED:
 	    return "NOT SUPPORTED";
 	    break;
 	case LRM_OP_CANCELLED:
 	    return "Cancelled";
 	    break;
     }
     crm_err("Unknown status: %d", status);
     return "UNKNOWN!";
 }
 
 char *
 generate_op_key(const char *rsc_id, const char *op_type, int interval)
 {
     int len = 35;
     char *op_id = NULL;
 
     CRM_CHECK(rsc_id  != NULL, return NULL);
     CRM_CHECK(op_type != NULL, return NULL);
 	
     len += strlen(op_type);
     len += strlen(rsc_id);
     crm_malloc0(op_id, len);
     CRM_CHECK(op_id != NULL, return NULL);
     sprintf(op_id, "%s_%s_%d", rsc_id, op_type, interval);
     return op_id;
 }
 
 gboolean
 parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval)
 {
     char *notify = NULL;
     char *mutable_key = NULL;
     char *mutable_key_ptr = NULL;
     int len = 0, offset = 0, ch = 0;
 
     CRM_CHECK(key != NULL, return FALSE);
 	
     *interval = 0;
     len = strlen(key);
     offset = len-1;
 
     crm_debug_3("Source: %s", key);
 	
     while(offset > 0 && isdigit(key[offset])) {
 	int digits = len-offset;
 	ch = key[offset] - '0';
 	CRM_CHECK(ch < 10, return FALSE);
 	CRM_CHECK(ch >= 0, return FALSE);
 	while(digits > 1) {
 	    digits--;
 	    ch = ch * 10;
 	}
 	*interval +=  ch;
 	offset--;
     }
 
     crm_debug_3("  Interval: %d", *interval);
     CRM_CHECK(key[offset] == '_', return FALSE);
 
     mutable_key = crm_strdup(key);
     mutable_key_ptr = mutable_key_ptr;
     mutable_key[offset] = 0;
     offset--;
 
     while(offset > 0 && key[offset] != '_') {
 	offset--;
     }
 
     CRM_CHECK(key[offset] == '_',
 	      crm_free(mutable_key); return FALSE);
 
     mutable_key_ptr = mutable_key+offset+1;
 
     crm_debug_3("  Action: %s", mutable_key_ptr);
 	
     *op_type = crm_strdup(mutable_key_ptr);
 
     mutable_key[offset] = 0;
     offset--;
 
     CRM_CHECK(mutable_key != mutable_key_ptr,
 	      crm_free(mutable_key); return FALSE);
 
     notify = strstr(mutable_key, "_post_notify");
     if(safe_str_eq(notify, "_post_notify")) {
 	notify[0] = 0;
     }
 
     notify = strstr(mutable_key, "_pre_notify");
     if(safe_str_eq(notify, "_pre_notify")) {
 	notify[0] = 0;
     }
     
     crm_debug_3("  Resource: %s", mutable_key);
     *rsc_id = mutable_key;
 
     return TRUE;
 }
 
 char *
 generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type)
 {
     int len = 12;
     char *op_id = NULL;
 
     CRM_CHECK(rsc_id  != NULL, return NULL);
     CRM_CHECK(op_type != NULL, return NULL);
     CRM_CHECK(notify_type != NULL, return NULL);
 	
     len += strlen(op_type);
     len += strlen(rsc_id);
     len += strlen(notify_type);
     crm_malloc0(op_id, len);
     if(op_id != NULL) {
 	sprintf(op_id, "%s_%s_notify_%s_0", rsc_id, notify_type, op_type);
     }
     return op_id;
 }
 
 char *
 generate_transition_magic_v202(const char *transition_key, int op_status)
 {
     int len = 80;
     char *fail_state = NULL;
 
     CRM_CHECK(transition_key != NULL, return NULL);
 	
     len += strlen(transition_key);
 	
     crm_malloc0(fail_state, len);
     if(fail_state != NULL) {
 	snprintf(fail_state, len, "%d:%s", op_status,transition_key);
     }
     return fail_state;
 }
 
 char *
 generate_transition_magic(const char *transition_key, int op_status, int op_rc)
 {
     int len = 80;
     char *fail_state = NULL;
 
     CRM_CHECK(transition_key != NULL, return NULL);
 	
     len += strlen(transition_key);
 	
     crm_malloc0(fail_state, len);
     if(fail_state != NULL) {
 	snprintf(fail_state, len, "%d:%d;%s",
 		 op_status, op_rc, transition_key);
     }
     return fail_state;
 }
 
 gboolean
 decode_transition_magic(
     const char *magic, char **uuid, int *transition_id, int *action_id,
     int *op_status, int *op_rc, int *target_rc)
 {
     int res = 0;
     char *key = NULL;
     gboolean result = TRUE;
 
     CRM_CHECK(magic != NULL, return FALSE);
     CRM_CHECK(op_rc != NULL, return FALSE);
     CRM_CHECK(op_status != NULL, return FALSE);
     
     crm_malloc0(key, strlen(magic)+1);
     res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key);
     if(res != 3) {
 	crm_crit("Only found %d items in: %s", res, magic);
 	result = FALSE;
 	goto bail;
     }
     
     CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc),
 	      result = FALSE;
 	      goto bail;
 	);
     
   bail:
     crm_free(key);
     return result;
 }
 
 char *
 generate_transition_key(int transition_id, int action_id, int target_rc, const char *node)
 {
     int len = 40;
     char *fail_state = NULL;
 
     CRM_CHECK(node != NULL, return NULL);
 	
     len += strlen(node);
 	
     crm_malloc0(fail_state, len);
     if(fail_state != NULL) {
 	snprintf(fail_state, len, "%d:%d:%d:%s",
 		 action_id, transition_id, target_rc, node);
     }
     return fail_state;
 }
 
 
 gboolean
 decode_transition_key(
     const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
 {
     int res = 0;
     gboolean done = FALSE;
 
     CRM_CHECK(uuid != NULL, return FALSE);
     CRM_CHECK(target_rc != NULL, return FALSE);
     CRM_CHECK(action_id != NULL, return FALSE);
     CRM_CHECK(transition_id != NULL, return FALSE);
 	
     crm_malloc0(*uuid, strlen(key)+1);
     res = sscanf(key, "%d:%d:%d:%s", action_id, transition_id, target_rc, *uuid);
     switch(res) {
 	case 4:
 	    /* Post Pacemaker 0.6 */
 	    done = TRUE;
 	    break;
 	case 3:
 	case 2:
 	    /* this can be tricky - the UUID might start with an integer */
 
 	    /* Until Pacemaker 0.6 */
 	    done = TRUE;
 	    *target_rc = -1;
 	    res = sscanf(key, "%d:%d:%s", action_id, transition_id, *uuid);
 	    if(res == 2) {
 		*action_id = -1;
 		res = sscanf(key, "%d:%s", transition_id, *uuid);
 		CRM_CHECK(res == 2, done = FALSE);
 
 	    } else if(res != 3) {
 		CRM_CHECK(res == 3, done = FALSE);
 	    }
 	    break;
 		
 	case 1:
 	    /* Prior to Heartbeat 2.0.8 */
 	    done = TRUE;
 	    *action_id = -1;
 	    *target_rc = -1;
 	    res = sscanf(key, "%d:%s", transition_id, *uuid);
 	    CRM_CHECK(res == 2, done = FALSE);
 	    break;
 	default:
 	    crm_crit("Unhandled sscanf result (%d) for %s", res, key);
 		
     }
 
     if(strlen(*uuid) != 36) {
 	crm_warn("Bad UUID (%s) in sscanf result (%d) for %s", *uuid, res, key);		    
     }
 	
     if(done == FALSE) {
 	crm_err("Cannot decode '%s' rc=%d", key, res);
 	    
 	crm_free(*uuid);
 	*uuid = NULL;
 	*target_rc = -1;
 	*action_id = -1;
 	*transition_id = -1;
     }
 	
     return done;
 }
 
 void
 filter_action_parameters(xmlNode *param_set, const char *version) 
 {
     char *key = NULL;
     char *timeout = NULL;
     char *interval = NULL;
 	
     const char *attr_filter[] = {
 	XML_ATTR_ID,
 	XML_ATTR_CRM_VERSION,
 	XML_LRM_ATTR_OP_DIGEST,
     };
 
     gboolean do_delete = FALSE;
     int lpc = 0;
     static int meta_len = 0;
     if(meta_len == 0) {
 	meta_len  = strlen(CRM_META);
     }	
 	
     if(param_set == NULL) {
 	return;
     }
 
     for(lpc = 0; lpc < DIMOF(attr_filter); lpc++) {
 	xml_remove_prop(param_set, attr_filter[lpc]); 
     }
 
     key = crm_meta_name(XML_LRM_ATTR_INTERVAL);
     interval = crm_element_value_copy(param_set, key);
     crm_free(key);
 
     key = crm_meta_name(XML_ATTR_TIMEOUT);
     timeout = crm_element_value_copy(param_set, key);
 	
     xml_prop_name_iter(param_set, prop_name,      
 		       do_delete = FALSE;
 		       if(strncasecmp(prop_name, CRM_META, meta_len) == 0) {
 			   do_delete = TRUE;
 		       }
 
 		       if(do_delete) {
 			   xml_remove_prop(param_set, prop_name);
 		       }
 	);
 
     if(crm_get_msec(interval) > 0 && compare_version(version, "1.0.8") > 0) {
 	/* Re-instate the operation's timeout value */
 	if(timeout != NULL) {
 	    crm_xml_add(param_set, key, timeout);
 	}
     }
 
     crm_free(interval);
     crm_free(timeout);
     crm_free(key);
 }
 
 void
 filter_reload_parameters(xmlNode *param_set, const char *restart_string) 
 {
     int len = 0;
     char *name = NULL;
     char *match = NULL;
 	
     if(param_set == NULL) {
 	return;
     }
 
     xml_prop_name_iter(param_set, prop_name,
 		       name = NULL;
 		       len = strlen(prop_name) + 3;
 
 		       crm_malloc0(name, len);
 		       sprintf(name, " %s ", prop_name);
 		       name[len-1] = 0;
 		      
 		       match = strstr(restart_string, name);
 		       if(match == NULL) {
 			   crm_debug_3("%s not found in %s",
 				       prop_name, restart_string);
 			   xml_remove_prop(param_set, prop_name);
 		       }
 		       crm_free(name);
 	);
 }
 
 void
 crm_abort(const char *file, const char *function, int line,
 	  const char *assert_condition, gboolean do_core, gboolean do_fork)
 {
     int rc = 0;
     int pid = 0;
     int status = 0;
 
     if(do_core == FALSE) {
 	do_crm_log(LOG_ERR, "%s: Triggered assert at %s:%d : %s",
 		   function, file, line, assert_condition);
 	return;
 
     } else if(do_fork) {
 	pid=fork();
 
     } else {
 	do_crm_log(LOG_ERR, "%s: Triggered fatal assert at %s:%d : %s",
 		   function, file, line, assert_condition);
     }
 	
     switch(pid) {
 	case -1:
 	    do_crm_log(LOG_CRIT, "%s: Cannot create core for non-fatal assert at %s:%d : %s",
 		       function, file, line, assert_condition);
 	    return;
 
 	default:	/* Parent */
 	    do_crm_log(LOG_ERR, 
 		       "%s: Forked child %d to record non-fatal assert at %s:%d : %s",
 		       function, pid, file, line, assert_condition);
 	    do {
 		rc = waitpid(pid, &status, 0);
 		if(rc < 0 && errno != EINTR) {
 		    crm_perror(LOG_ERR,"%s: Cannot wait on forked child %d", function, pid);
 		}
 			    
 	    } while(rc < 0 && errno == EINTR);
 			    
 	    return;
 
 	case 0:	/* Child */
 	    abort();
 	    break;
     }
 }
 
 char *
 generate_series_filename(
     const char *directory, const char *series, int sequence, gboolean bzip)
 {
     int len = 40;
     char *filename = NULL;
     const char *ext = "raw";
 
     CRM_CHECK(directory  != NULL, return NULL);
     CRM_CHECK(series != NULL, return NULL);
 	
     len += strlen(directory);
     len += strlen(series);
     crm_malloc0(filename, len);
     CRM_CHECK(filename != NULL, return NULL);
 
     if(bzip) {
 	ext = "bz2";
     }
     sprintf(filename, "%s/%s-%d.%s", directory, series, sequence, ext);
 	
     return filename;
 }
 
 int
 get_last_sequence(const char *directory, const char *series)
 {
     FILE *file_strm = NULL;
     int start = 0, length = 0, read_len = 0;
     char *series_file = NULL;
     char *buffer = NULL;
     int seq = 0;
     int len = 36;
 
     CRM_CHECK(directory  != NULL, return 0);
     CRM_CHECK(series != NULL, return 0);
 	
     len += strlen(directory);
     len += strlen(series);
     crm_malloc0(series_file, len);
     CRM_CHECK(series_file != NULL, return 0);
     sprintf(series_file, "%s/%s.last", directory, series);
 	
     file_strm = fopen(series_file, "r");
     if(file_strm == NULL) {
 	crm_debug("Series file %s does not exist", series_file);
 	crm_free(series_file);
 	return 0;
     }
 	
     /* see how big the file is */
     start  = ftell(file_strm);
     fseek(file_strm, 0L, SEEK_END);
     length = ftell(file_strm);
     fseek(file_strm, 0L, start);
 	
     CRM_ASSERT(length >= 0);
     CRM_ASSERT(start == ftell(file_strm));
 
     crm_debug_3("Reading %d bytes from file", length);
     crm_malloc0(buffer, (length+1));
     read_len = fread(buffer, 1, length, file_strm);
 
     if(read_len != length) {
 	crm_err("Calculated and read bytes differ: %d vs. %d",
 		length, read_len);
 	crm_free(buffer);
 	buffer = NULL;
 		
     } else  if(length <= 0) {
 	crm_info("%s was not valid", series_file);
 	crm_free(buffer);
 	buffer = NULL;
     }
 	
     crm_free(series_file);
     seq = crm_parse_int(buffer, "0");
     crm_free(buffer);
     fclose(file_strm);
     return seq;
 }
 
 void
 write_last_sequence(
     const char *directory, const char *series, int sequence, int max)
 {
     int rc = 0;
     int len = 36;
     FILE *file_strm = NULL;
     char *series_file = NULL;
 
     CRM_CHECK(directory  != NULL, return);
     CRM_CHECK(series != NULL, return);
 
     if(max == 0) {
 	return;
     }
     while(max > 0 && sequence > max) {
 	sequence -= max;
     }
 	
     len += strlen(directory);
     len += strlen(series);
     crm_malloc0(series_file, len);
     sprintf(series_file, "%s/%s.last", directory, series);
 	
     file_strm = fopen(series_file, "w");
     if(file_strm == NULL) {
 	crm_err("Cannout open series file %s for writing", series_file);
 	goto bail;
     }
 
     rc = fprintf(file_strm, "%d", sequence);
     if(rc < 0) {
 	crm_perror(LOG_ERR,"Cannot write to series file %s", series_file);
     }
 
   bail:
     if(file_strm != NULL) {
 	fflush(file_strm);
 	fclose(file_strm);
     }
 	
     crm_free(series_file);
 }
 
 #define	LOCKSTRLEN	11
 
 int crm_pid_active(long pid)
 {
     int rc = 0;
     int running = 0;
     char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX];
 
     if(pid <= 0) {
 	return -1;
 
     } else if (kill(pid, 0) < 0 && errno == ESRCH) {
 	return 0;
     }
 
 #ifndef HAVE_PROC_PID
     return 1;
 #endif
 	
     /* check to make sure pid hasn't been reused by another process */
     snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid);
 	
     rc = readlink(proc_path, exe_path, PATH_MAX-1);
     if(rc < 0) {
 	crm_perror(LOG_ERR, "Could not read from %s", proc_path);
 	goto bail;
     }
 
     exe_path[rc] = 0;
     snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid());
     rc = readlink(proc_path, myexe_path, PATH_MAX-1);
     if(rc < 0) {
 	crm_perror(LOG_ERR, "Could not read from %s", proc_path);
 	goto bail;
     }
     
     myexe_path[rc] = 0;
     if(strcmp(exe_path, myexe_path) == 0) {
 	running = 1;
     }
 
   bail:
     return running;
 }
 
 
 int
 crm_read_pidfile(const char *filename)
 {
     int fd;
     long pid = -1;
     char  buf[LOCKSTRLEN+1];
     if ((fd = open(filename, O_RDONLY)) < 0) {
 	goto bail;
     }
     
     if (read(fd, buf, sizeof(buf)) < 1) {
 	goto bail;
     } 
     
     if (sscanf(buf, "%lu", &pid) > 0) {
 	if (pid <= 0){
 	    pid = -LSB_STATUS_STOPPED;
 	}
     }
     
   bail:
     if(fd >= 0) { close(fd); }
     return pid;
 }
 
 int
 crm_lock_pidfile(const char *filename)
 {
     struct stat sbuf;
     int fd = 0, rc = 0;
     long pid = 0, mypid = 0;
     char lf_name[256], tf_name[256], buf[LOCKSTRLEN+1];
 
     mypid = (unsigned long) getpid();
     snprintf(lf_name, sizeof(lf_name), "%s",filename);
     snprintf(tf_name, sizeof(tf_name), "%s.%lu", filename, mypid);
 	
     if ((fd = open(lf_name, O_RDONLY)) >= 0) {
 	if (fstat(fd, &sbuf) >= 0 && sbuf.st_size < LOCKSTRLEN) {
 	    sleep(1); /* if someone was about to create one,
 		       * give'm a sec to do so
 		       * Though if they follow our protocol,
 		       * this won't happen.  They should really
 		       * put the pid in, then link, not the
 		       * other way around.
 		       */
 	}
 	if (read(fd, buf, sizeof(buf)) > 0) {
 	    if (sscanf(buf, "%lu", &pid) > 0) {
 		if (pid > 1 && pid != getpid() && crm_pid_active(pid)) {
 		    /* locked by existing process - give up */
 		    close(fd);
 		    return -1;
 		}
 	    }
 	}
 	unlink(lf_name);
 	close(fd);
     }
     
     if ((fd = open(tf_name, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) {
 	/* Hmmh, why did we fail? Anyway, nothing we can do about it */
 	return -3;
     }
 
     /* Slight overkill with the %*d format ;-) */
     snprintf(buf, sizeof(buf), "%*lu\n", LOCKSTRLEN-1, mypid);
 
     if (write(fd, buf, LOCKSTRLEN) != LOCKSTRLEN) {
 	/* Again, nothing we can do about this */
 	rc = -3;
 	close(fd);
 	goto out;
     }
     close(fd);
 
     switch (link(tf_name, lf_name)) {
 	case 0:
 	    if (stat(tf_name, &sbuf) < 0) {
 		/* something weird happened */
 		rc = -3;
 
 	    } else if (sbuf.st_nlink < 2) {
 		/* somehow, it didn't get through - NFS trouble? */
 		rc = -2;
 
 	    } else {
 		rc = 0;
 	    }
 	    break;
 
 	case EEXIST:
 	    rc = -1;
 	    break;
 
 	default:
 	    rc = -3;
     }
   out:
     unlink(tf_name);
     return rc;
 }
 
 void
 crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile)
 {
     long pid;
     const char *devnull = "/dev/null";
 
     if(daemonize == FALSE) {
 	return;
     }
 	
     pid = fork();
     if (pid < 0) {
 	fprintf(stderr, "%s: could not start daemon\n", name);
 	crm_perror(LOG_ERR,"fork");
 	exit(LSB_EXIT_GENERIC);
 
     } else if (pid > 0) {
 	exit(LSB_EXIT_OK);
     }
 
     if (crm_lock_pidfile(pidfile) < 0 ) {
 	pid = crm_read_pidfile(pidfile);
 	if(crm_pid_active(pid) > 0) {
 	    crm_warn("%s: already running [pid %ld] (%s).\n", name, pid, pidfile);
 	    exit(LSB_EXIT_OK);
 	}
     }
 	
     umask(022);
     close(STDIN_FILENO);
     (void)open(devnull, O_RDONLY);		/* Stdin:  fd 0 */
     close(STDOUT_FILENO);
     (void)open(devnull, O_WRONLY);		/* Stdout: fd 1 */
     close(STDERR_FILENO);
     (void)open(devnull, O_WRONLY);		/* Stderr: fd 2 */
 }
 
 gboolean
 crm_is_writable(const char *dir, const char *file,
 		const char *user, const char *group, gboolean need_both)
 {
     int s_res = -1;
     struct stat buf;
     char *full_file = NULL;
     const char *target = NULL;
 	
     gboolean pass = TRUE;
     gboolean readwritable = FALSE;
 
     CRM_ASSERT(dir != NULL);
     if(file != NULL) {
 	full_file = crm_concat(dir, file, '/');
 	target = full_file;
 	s_res = stat(full_file, &buf);
 	if( s_res == 0 && S_ISREG(buf.st_mode) == FALSE ) {
 	    crm_err("%s must be a regular file", target);
 	    pass = FALSE;
 	    goto out;
 	}
     }
 	
     if (s_res != 0) {
 	target = dir;
 	s_res = stat(dir, &buf);
 	if(s_res != 0) {
 	    crm_err("%s must exist and be a directory", dir);
 	    pass = FALSE;
 	    goto out;
 
 	} else if( S_ISDIR(buf.st_mode) == FALSE ) {
 	    crm_err("%s must be a directory", dir);
 	    pass = FALSE;
 	}
     }
 
     if(user) {
 	struct passwd *sys_user = NULL;
 	sys_user = getpwnam(user);
 	readwritable = (sys_user != NULL
 			&& buf.st_uid == sys_user->pw_uid
 			&& (buf.st_mode & (S_IRUSR|S_IWUSR)));
 	if(readwritable == FALSE) {
 	    crm_err("%s must be owned and r/w by user %s",
 		    target, user);
 	    if(need_both) {
 		pass = FALSE;
 	    }
 	}
     }	
 
     if(group) {
 	struct group *sys_grp = getgrnam(group);
 	readwritable = (
 	    sys_grp != NULL
 	    && buf.st_gid == sys_grp->gr_gid
 	    && (buf.st_mode & (S_IRGRP|S_IWGRP)));		
 	if(readwritable == FALSE) {
 	    if(need_both || user == NULL) {
 		pass = FALSE;
 		crm_err("%s must be owned and r/w by group %s",
 			target, group);
 	    } else {
 		crm_warn("%s should be owned and r/w by group %s",
 			 target, group);
 	    }
 	}
     }
 
   out:
     crm_free(full_file);
     return pass;
 }
 
 static unsigned long long crm_bit_filter = 0; /* 0x00000002ULL; */
 static unsigned int bit_log_level = LOG_DEBUG_5;
 
 long long
 crm_clear_bit(const char *function, long long word, long long bit)
 {
     unsigned int level = bit_log_level;
     if(bit & crm_bit_filter) {
 	level = LOG_ERR;
     }
 
     do_crm_log_unlikely(level, "Bit 0x%.16llx cleared by %s", bit, function);
     word &= ~bit;
 
     return word;
 }
 
 long long
 crm_set_bit(const char *function, long long word, long long bit)
 {
     unsigned int level = bit_log_level;
     if(bit & crm_bit_filter) {
 	level = LOG_ERR;
     }
 
     do_crm_log_unlikely(level, "Bit 0x%.16llx set by %s", bit, function);
     word |= bit;
     return word;
 }
 
 const char *
 name_for_cluster_type(enum cluster_type_e type)
 {
     switch(type) {
 	case pcmk_cluster_classic_ais:
 	    return "classic openais (with plugin)";
 	case pcmk_cluster_cman:
 	    return "cman";
 	case pcmk_cluster_corosync:
 	    return "corosync";
 	case pcmk_cluster_heartbeat:
 	    return "heartbeat";
 	case pcmk_cluster_unknown:
 	    return "unknown";
 	case pcmk_cluster_invalid:
 	    return "invalid";
     }
     crm_err("Invalid cluster type: %d", type);
     return "invalid";
 }
 
 /* Do not expose these two */
 int set_cluster_type(enum cluster_type_e type);
 static enum cluster_type_e cluster_type = pcmk_cluster_unknown;
 
 int set_cluster_type(enum cluster_type_e type) 
 {
     if(cluster_type == pcmk_cluster_unknown) {
 	crm_info("Cluster type set to: %s", name_for_cluster_type(cluster_type));
 	cluster_type = type;
 	return 0;
     } else if(cluster_type == type) {
 	return 0;
 
     } else if(pcmk_cluster_unknown == type) {
 	cluster_type = type;
 	return 0;
     }
     crm_err("Cluster type already set to %s", name_for_cluster_type(cluster_type));
     return -1;
 }
 
 enum cluster_type_e
 get_cluster_type(void) 
 {
     if(cluster_type == pcmk_cluster_unknown) {
 	const char *cluster = getenv("HA_cluster_type");
 	cluster_type = pcmk_cluster_invalid;
 	if(cluster) {
 	    crm_info("Cluster type is: '%s'.", cluster);
+	} else {
+	    cluster = "heartbeat";
 	}
-	if(cluster == NULL || safe_str_eq(cluster, "heartbeat")) {
+	
+	if(safe_str_eq(cluster, "heartbeat")) {
 #if SUPPORT_HEARTBEAT
 	    cluster_type = pcmk_cluster_heartbeat;
 #else
 	    crm_crit("This installation of Pacemaker does not support the '%s' cluster infrastructure.  Terminating.",
 		     cluster);
 	    exit(100);
 #endif
 	} else if(safe_str_eq(cluster, "openais")) {
 #if SUPPORT_COROSYNC
 	    cluster_type = pcmk_cluster_classic_ais;
 #else
 	    crm_crit("This installation of Pacemaker does not support the '%s' cluster infrastructure.  Terminating.",
 		     cluster);
 	    exit(100);
 #endif
 	} else if(safe_str_eq(cluster, "corosync")) {
 #if SUPPORT_COROSYNC
 	    cluster_type = pcmk_cluster_corosync;
 #else
 	    crm_crit("This installation of Pacemaker does not support the '%s' cluster infrastructure.  Terminating.",
 		     cluster);
 	    exit(100);
 #endif
 	} else if(safe_str_eq(cluster, "cman")) {
 #if SUPPORT_CMAN
 	    cluster_type = pcmk_cluster_cman;
 #else
 	    crm_crit("This installation of Pacemaker does not support the '%s' cluster infrastructure.  Terminating.",
 		     cluster);
 	    exit(100);
 #endif
 	} else {
 	    crm_crit("Unknown cluster type: '%s'.  Terminating.", cluster);
 	    exit(100);
 	}
     }
     return cluster_type;
 }
 
 gboolean is_cman_cluster(void)
 {
     return get_cluster_type() == pcmk_cluster_cman;
 }
 
 gboolean is_corosync_cluster(void)
 {
     return get_cluster_type() == pcmk_cluster_corosync;
 }
 
 gboolean is_classic_ais_cluster(void)
 {
     return get_cluster_type() == pcmk_cluster_classic_ais;
 }
 
 gboolean is_openais_cluster(void)
 {
     enum cluster_type_e type = get_cluster_type();
     if(type == pcmk_cluster_classic_ais) {
 	return TRUE;
     } else if(type == pcmk_cluster_corosync) {
 	return TRUE;
     } else if(type == pcmk_cluster_cman) {
 	return TRUE;
     }
     return FALSE;
 }
 
 gboolean is_heartbeat_cluster(void)
 {
     return get_cluster_type() == pcmk_cluster_heartbeat;
 }
 
 gboolean crm_str_eq(const char *a, const char *b, gboolean use_case) 
 {
     if(a == b) {
 	return TRUE;
 	
     } else if(a == NULL || b == NULL) {
 	/* shouldn't be comparing NULLs */
 	return FALSE;
 	    
     } else if(use_case && a[0] != b[0]) {
 	return FALSE;		
 	
     } else if(strcasecmp(a, b) == 0) {
 	return TRUE;
     }
     return FALSE;
 }
 
 char *crm_meta_name(const char *field) 
 {
     int lpc = 0;
     int max = 0;
     char *crm_name = NULL;
 
     CRM_CHECK(field != NULL, return NULL);
     crm_name = crm_concat(CRM_META, field, '_');
 
     /* Massage the names so they can be used as shell variables */ 
     max = strlen(crm_name);
     for(; lpc < max; lpc++) {
 	switch(crm_name[lpc]) {
 	    case '-':
 		crm_name[lpc] = '_';
 		break;
 	}
     }
     return crm_name;
 }
 
 const char *crm_meta_value(GHashTable *hash, const char *field) 
 {
     char *key = NULL;
     const char *value = NULL;
 
     key = crm_meta_name(field);
     if(key) {
 	value = g_hash_table_lookup(hash, key);
 	crm_free(key);
     }
     
     return value;
 }
 
 static struct crm_option *crm_long_options = NULL;
 static const char *crm_app_description = NULL;
 static const char *crm_short_options = NULL;
 static const char *crm_app_usage = NULL;
 
 static struct option *crm_create_long_opts(struct crm_option *long_options) 
 {
     struct option *long_opts = NULL;
 
 #ifdef HAVE_GETOPT_H
     int index = 0, lpc = 0;
 
     /*
      * A previous, possibly poor, choice of '?' as the short form of --help
      * means that getopt_long() returns '?' for both --help and for "unknown option"
      *
      * This dummy entry allows us to differentiate between the two in crm_get_option()
      * and exit with the correct error code
      */
     crm_realloc(long_opts, (index+1) * sizeof(struct option));
     long_opts[index].name = "__dummmy__";
     long_opts[index].has_arg = 0;
     long_opts[index].flag = 0;
     long_opts[index].val = '_';
     index++;
     
     for(lpc = 0; long_options[lpc].name != NULL; lpc++) {
 	if(long_options[lpc].name[0] == '-') {
 	    continue;
 	}
 	
 	crm_realloc(long_opts, (index+1) * sizeof(struct option));
 	/*fprintf(stderr, "Creating %d %s = %c\n", index,
 	 * long_options[lpc].name, long_options[lpc].val);	*/
 	long_opts[index].name = long_options[lpc].name;
 	long_opts[index].has_arg = long_options[lpc].has_arg;
 	long_opts[index].flag = long_options[lpc].flag;
 	long_opts[index].val = long_options[lpc].val;
 	index++;
     }
 
     /* Now create the list terminator */
     crm_realloc(long_opts, (index+1) * sizeof(struct option));
     long_opts[index].name = NULL;
     long_opts[index].has_arg = 0;
     long_opts[index].flag = 0;
     long_opts[index].val = 0;
 #endif
     
     return long_opts;
 }
 
 void crm_set_options(const char *short_options, const char *app_usage, struct crm_option *long_options, const char *app_desc) 
 {
     if(short_options) {
 	crm_short_options = short_options;
     }
     if(long_options) {
 	crm_long_options = long_options;
     }
     if(app_desc) {
 	crm_app_description = app_desc;
     }
     if(app_usage) {
 	crm_app_usage = app_usage;
     }
 }
 
 int crm_get_option(int argc, char **argv, int *index) 
 {
 #ifdef HAVE_GETOPT_H
     static struct option *long_opts = NULL;
     if(long_opts == NULL && crm_long_options) {
 	long_opts = crm_create_long_opts(crm_long_options);
     }
     
     if(long_opts) {
 	int flag = getopt_long(argc, argv, crm_short_options, long_opts, index);
 	switch(flag) {
 	    case 0: return long_opts[*index].val;
 	    case -1:  /* End of option processing */ break;
 	    case ':': crm_debug_2("Missing argument"); crm_help('?', 1); break;
 	    case '?': crm_help('?', *index?0:1); break;
 	}
 	return flag;
     }
 #endif
 
     if(crm_short_options) {
 	return getopt(argc, argv, crm_short_options);
     }
     
     return -1;
 }
 
 void crm_help(char cmd, int exit_code) 
 {
     int i = 0;
     FILE *stream = (exit_code ? stderr : stdout);
 
     if(cmd == 'v' || cmd == '$') {
 	fprintf(stream, "Pacemaker %s\n", VERSION);
 	fprintf(stream, "Written by Andrew Beekhof\n");
 	goto out;
     }
 
     if(cmd == '!') {
 	fprintf(stream, "Pacemaker %s (Build: %s): %s\n", VERSION, BUILD_VERSION, CRM_FEATURES);
 	goto out;
     }
     
     fprintf(stream, "%s - %s\n", crm_system_name, crm_app_description);
 
     if(crm_app_usage) {
 	fprintf(stream, "Usage: %s %s\n", crm_system_name, crm_app_usage);
     }
 	
     if(crm_long_options) {
 	fprintf(stream, "Options:\n");
 	for(i = 0; crm_long_options[i].name != NULL; i++) {
 	    if(crm_long_options[i].flags & pcmk_option_hidden) {
 
 	    } else if(crm_long_options[i].flags & pcmk_option_paragraph) {
 		fprintf(stream, "%s\n\n", crm_long_options[i].desc);
 
 	    } else if(crm_long_options[i].flags & pcmk_option_example) {
 		fprintf(stream, "\t#%s\n\n", crm_long_options[i].desc);
 
 	    } else if(crm_long_options[i].val == '-' && crm_long_options[i].desc) {
 		fprintf(stream, "%s\n", crm_long_options[i].desc);
 		
 	    } else {
 		fprintf(stream, " -%c, --%s%c%s\t%s\n", crm_long_options[i].val, crm_long_options[i].name,
 			crm_long_options[i].has_arg?'=':' ',crm_long_options[i].has_arg?"value":"",
 			crm_long_options[i].desc?crm_long_options[i].desc:"");
 	    }
 	}
 	    
     } else if(crm_short_options) {
 	fprintf(stream, "Usage: %s - %s\n", crm_system_name, crm_app_description);
 	for(i = 0; crm_short_options[i] != 0; i++) {
 	    int has_arg = FALSE;
 		
 	    if(crm_short_options[i+1] == ':') {
 		has_arg = TRUE;
 	    }
 		
 	    fprintf(stream, " -%c %s\n", crm_short_options[i], has_arg?"{value}":"");
 	    if(has_arg) {
 		i++;
 	    }
 	}
     }
 
     fprintf(stream, "\nReport bugs to %s\n", PACKAGE_BUGREPORT);
 
   out:
     if(exit_code >= 0) {
 	exit(exit_code);
     }
 }
 
 #include <../../tools/attrd.h>
 gboolean attrd_update_delegate(IPC_Channel *cluster, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen, const char *user_name) 
 {
     gboolean success = FALSE;
     const char *reason = "Cluster connection failed";
 
     /* remap common aliases */
     if(safe_str_eq(section, "reboot")) {
 	section = XML_CIB_TAG_STATUS;
 	
     } else if(safe_str_eq(section, "forever")) {
 	section = XML_CIB_TAG_NODES;
     }
     
     if(cluster == NULL) {
 	reason = "No connection to the cluster";
 
     } else {
 	xmlNode *update = create_xml_node(NULL, __FUNCTION__);
 	crm_xml_add(update, F_TYPE, T_ATTRD);
 	crm_xml_add(update, F_ORIG, crm_system_name);
 
 	if(name == NULL && command == 'U') {
 	    command = 'R';
 	}
 	
 	switch(command) {
 	    case 'D':
 	    case 'U':
 	    case 'v':
 		crm_xml_add(update, F_ATTRD_TASK, "update");
 		crm_xml_add(update, F_ATTRD_ATTRIBUTE, name);
 		break;
 	    case 'R':
 		crm_xml_add(update, F_ATTRD_TASK, "refresh");
 		break;
 	    case 'q':
 		crm_xml_add(update, F_ATTRD_TASK, "query");
 		break;
 	}
 	
 	crm_xml_add(update, F_ATTRD_VALUE, value);
 	crm_xml_add(update, F_ATTRD_DAMPEN, dampen);
 	crm_xml_add(update, F_ATTRD_SECTION, section);
 	crm_xml_add(update, F_ATTRD_HOST, host);
 	crm_xml_add(update, F_ATTRD_SET, set);
 #if ENABLE_ACL
 	if (user_name) {
 		crm_xml_add(update, F_ATTRD_USER, user_name);
 	}
 #endif
 
 	success = send_ipc_message(cluster, update);
 	free_xml(update);
     }
  
     if(success) {
 	crm_debug("Sent update: %s=%s for %s", name, value, host?host:"localhost");
 	return TRUE;
     }
 
     crm_info("Could not send update: %s=%s for %s", name, value, host?host:"localhost");
     return FALSE;
 }
 
 gboolean attrd_lazy_update(char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) 
 {
     int max = 5;
     gboolean updated = FALSE;
     static IPC_Channel *cluster = NULL;
 
     while(updated == 0 && max > 0) {
 	if(cluster == NULL) {
 	    crm_info("Connecting to cluster... %d retries remaining", max);
 	    cluster = init_client_ipc_comms_nodispatch(T_ATTRD);
 	}
 
 	if(cluster != NULL) {
 	    updated = attrd_update(cluster, command, host, name, value, section, set, dampen);
 	}
 	
 	if(updated == 0) {
 	    cluster = NULL;
 	    sleep(2);
 	    max--;
 	}
     }
 
     return updated;
 }
 
 gboolean attrd_update_no_mainloop(int *connection, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) 
 {
     int max = 5;
     gboolean updated = FALSE;
     static IPC_Channel *cluster = NULL;
 
     if(connection && *connection == 0 && cluster) {
 	crm_info("Forcing a new connection to the cluster");
 	cluster = NULL;
     }
     
     while(updated == 0 && max > 0) {
 	if(cluster == NULL) {
 	    crm_info("Connecting to cluster... %d retries remaining", max);
 	    cluster = init_client_ipc_comms_nodispatch(T_ATTRD);
 	}
 
 	if(connection) {
 	    if(cluster != NULL) {
 		*connection = cluster->ops->get_recv_select_fd(cluster);
 	    } else {
 		*connection = 0;
 	    }
 	}
 	
 	if(cluster != NULL) {
 	    updated = attrd_update(cluster, command, host, name, value, section, set, dampen);
 	}
 	
 	if(updated == 0) {
 	    cluster = NULL;
 	    sleep(2);
 	    max--;
 	}
     }
     return updated;
 }
 
 #define FAKE_TE_ID	"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
 static void
 append_digest(lrm_op_t *op, xmlNode *update, const char *version, const char *magic, int level) 
 {
     /* this will enable us to later determine that the
      *   resource's parameters have changed and we should force
      *   a restart
      */
     char *digest = NULL;
     xmlNode *args_xml = NULL;
 
     if(op->params == NULL) {
 	return;
     }
     
     args_xml = create_xml_node(NULL, XML_TAG_PARAMS);
     g_hash_table_foreach(op->params, hash2field, args_xml);
     filter_action_parameters(args_xml, version);
     digest = calculate_operation_digest(args_xml, version);
 
 #if 0
     if(level < crm_log_level
        && op->interval == 0
        && crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) {
 	char *digest_source = dump_xml_unformatted(args_xml);
 	do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n", 
 		   digest, ID(update), magic, digest_source);
 	crm_free(digest_source);
     }
 #endif
     crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest);
 
     free_xml(args_xml);
     crm_free(digest);
 }
 
 xmlNode *
 create_operation_update(
     xmlNode *parent, lrm_op_t *op, const char *caller_version, int target_rc, const char *origin, int level)
 {
     char *magic = NULL;
     const char *task = NULL;
     xmlNode *xml_op = NULL;
     char *op_id = NULL;
     char *local_user_data = NULL;
     gboolean dc_munges_migrate_ops = (compare_version(caller_version, "3.0.3") < 0);
 
     CRM_CHECK(op != NULL, return NULL);
     do_crm_log(level, "%s: Updating resouce %s after %s %s op (interval=%d)",
 	       origin, op->rsc_id, op_status2text(op->op_status), op->op_type, op->interval);
 
     if(op->op_status == LRM_OP_CANCELLED) {
 	crm_debug_3("Ignoring cancelled op");
 	return NULL;
     }
 
     crm_debug_3("DC version: %s", caller_version);
 
     task = op->op_type;
     /* remap the task name under various scenarios
      * this makes life easier for the PE when its trying determin the current state 
      */
     if(crm_str_eq(task, "reload", TRUE)) {
 	if(op->op_status == LRM_OP_DONE) {
 	    task = CRMD_ACTION_START;
 	} else {
 	    task = CRMD_ACTION_STATUS;
 	}
 
     } else if(dc_munges_migrate_ops
 	      && crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) {
 	/* if the migrate_from fails it will have enough info to do the right thing */
 	if(op->op_status == LRM_OP_DONE) {
 	    task = CRMD_ACTION_STOP;
 	} else {
 	    task = CRMD_ACTION_STATUS;
 	}
 
     } else if(dc_munges_migrate_ops
 	      && op->op_status == LRM_OP_DONE
 	      && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) {
 	task = CRMD_ACTION_START;
 
     } else if(crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) {
 	const char *n_type = crm_meta_value(op->params, "notify_type");
 	const char *n_task = crm_meta_value(op->params, "notify_operation");
 	CRM_LOG_ASSERT(n_type != NULL);
 	CRM_LOG_ASSERT(n_task != NULL);
 	op_id = generate_notify_key(op->rsc_id, n_type, n_task);
 
 	/* these are not yet allowed to fail */
 	op->op_status = LRM_OP_DONE;
 	op->rc = 0;
     }
 
     if (op_id == NULL) {
 	op_id = generate_op_key(op->rsc_id, task, op->interval);
     }
 
     xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id);
     if(xml_op != NULL) {
 	crm_log_xml(LOG_DEBUG, "Replacing existing entry", xml_op);
 		
     } else {
 	xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP);
     }
 	
     if(op->user_data == NULL) {
 	crm_debug("Generating fake transition key for:"
 		  " %s_%s_%d %d from %s",
 		  op->rsc_id, op->op_type, op->interval, op->call_id,
 		  op->app_name);
 	local_user_data = generate_transition_key(-1, op->call_id, target_rc, FAKE_TE_ID);
 	op->user_data = local_user_data;
     }
 	
     magic = generate_transition_magic(op->user_data, op->op_status, op->rc);
 	
     crm_xml_add(xml_op, XML_ATTR_ID,			op_id);
     crm_xml_add(xml_op, XML_LRM_ATTR_TASK,		task);
     crm_xml_add(xml_op, XML_ATTR_ORIGIN,		origin);
     crm_xml_add(xml_op, XML_ATTR_CRM_VERSION,		caller_version);
     crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY,	op->user_data);
     crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC,	magic);
 
     crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID,	op->call_id);
     crm_xml_add_int(xml_op, XML_LRM_ATTR_RC,		op->rc);
     crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS,	op->op_status);
     crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL,	op->interval);
 
     if(compare_version("2.1", caller_version) <= 0) {
 	if(op->t_run || op->t_rcchange || op->exec_time || op->queue_time) {
 	    crm_debug_2("Timing data (%s_%s_%d): last=%lu change=%lu exec=%lu queue=%lu",
 			op->rsc_id, op->op_type, op->interval,
 			op->t_run, op->t_rcchange, op->exec_time, op->queue_time);
 
 	    if(op->interval == 0) {
 		crm_xml_add_int(xml_op, "last-run",       op->t_run);
 	    }
 	    crm_xml_add_int(xml_op, "last-rc-change", op->t_rcchange);
 	    crm_xml_add_int(xml_op, "exec-time",      op->exec_time);
 	    crm_xml_add_int(xml_op, "queue-time",     op->queue_time);
 	}
     }
 
     if(crm_str_eq(op->op_type, CRMD_ACTION_MIGRATE, TRUE)
        || crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) {
 	/*
 	 * Record migrate_source and migrate_target always for migrate ops.
 	 */
 	const char *name = XML_LRM_ATTR_MIGRATE_SOURCE;
 	crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
 
 	name = XML_LRM_ATTR_MIGRATE_TARGET;
 	crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
     }
 
     append_digest(op, xml_op, caller_version, magic, LOG_DEBUG);
     
     if(local_user_data) {
 	crm_free(local_user_data);
 	op->user_data = NULL;
     }
     crm_free(magic);	
     crm_free(op_id);
     return xml_op;
 }
 
 void
 free_lrm_op(lrm_op_t *op) 
 {
     g_hash_table_destroy(op->params);
     crm_free(op->user_data);
     crm_free(op->output);
     crm_free(op->rsc_id);
     crm_free(op->op_type);
     crm_free(op->app_name);
     crm_free(op);	
 }
 
 #if ENABLE_ACL
 void
 determine_request_user(char **user, IPC_Channel *channel, xmlNode *request, const char *field)
 {
     /* Get our internal validation out of the way first */
     CRM_CHECK(user != NULL && channel != NULL && field != NULL, return);
 
     if(*user == NULL) {
 	/* Figure out who our peer is and cache it... */
 	struct passwd *pwent = getpwuid(channel->farside_uid);
 	if(pwent == NULL) {
 	    crm_perror(LOG_ERR, "Cannot get password entry of uid: %d", channel->farside_uid);
 	} else {
 	    *user = crm_strdup(pwent->pw_name);
 	}
     }
 
     /* If our peer is a privileged user, we might be doing something on behalf of someone else */
     if(is_privileged(*user) == FALSE) {
 	/* We're not a privileged user, set or overwrite any existing value for $field */
 	crm_xml_replace(request, field, *user);
 
     } else if(crm_element_value(request, field) == NULL) {
 	/* Even if we're privileged, make sure there is always a value set */
 	crm_xml_replace(request, field, *user);
 
 /*  } else { Legal delegation */
     }
     
     crm_debug_2("Processing msg for user '%s'", crm_element_value(request, field));
 }
 #endif
diff --git a/pengine/pengine.c b/pengine/pengine.c
index 0d9224b218..45c37b4b6c 100644
--- a/pengine/pengine.c
+++ b/pengine/pengine.c
@@ -1,297 +1,297 @@
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 #include <crm/common/msg.h>
 
 #include <glib.h>
 
 #include <crm/pengine/status.h>
 #include <pengine.h>
 #include <allocate.h>
 #include <lib/pengine/utils.h>
 #include <utils.h>
 
 xmlNode * do_calculations(
 	pe_working_set_t *data_set, xmlNode *xml_input, ha_time_t *now);
 
 gboolean show_scores = FALSE;
 int scores_log_level = LOG_DEBUG_2;
 gboolean show_utilization = FALSE;
 int utilization_log_level = LOG_DEBUG_2;
 extern int transition_id;
 
 #define get_series() 	was_processing_error?1:was_processing_warning?2:3
 
 typedef struct series_s 
 {
 	int id;
 	const char *name;
 	const char *param;
 	int wrap;
 } series_t;
 
 series_t series[] = {
 	{ 0, "pe-unknown", "_dont_match_anything_", -1 },
 	{ 0, "pe-error",   "pe-error-series-max", -1 },
 	{ 0, "pe-warn",    "pe-warn-series-max", 200 },
 	{ 0, "pe-input",   "pe-input-series-max", 400 },
 };
 
 gboolean
 process_pe_message(xmlNode *msg, xmlNode *xml_data, IPC_Channel *sender)
 {
 	gboolean send_via_disk = FALSE;
 	const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
 	const char *op = crm_element_value(msg, F_CRM_TASK);
 	const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE);
 
 	crm_debug_3("Processing %s op (ref=%s)...", op, ref);
 	
 	if(op == NULL){
 		/* error */
 
 	} else if(strcasecmp(op, CRM_OP_HELLO) == 0) {
 		/* ignore */
 		
 	} else if(safe_str_eq(crm_element_value(msg, F_CRM_MSG_TYPE),
 			      XML_ATTR_RESPONSE)) {
 		/* ignore */
 		
 	} else if(sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_PENGINE) != 0) {
 		crm_debug_3("Bad sys-to %s", crm_str(sys_to));
 		return FALSE;
 		
 	} else if(strcasecmp(op, CRM_OP_PECALC) == 0) {
 		int seq = -1;
 		int series_id = 0;
 		int series_wrap = 0;
 		char *filename = NULL;
 		char *graph_file = NULL;
 		const char *value = NULL;
 		pe_working_set_t data_set;
 		xmlNode *converted = NULL;
 		xmlNode *reply = NULL;
 		gboolean process = TRUE;
 #if HAVE_BZLIB_H
 		gboolean compress = TRUE;
 #else
 		gboolean compress = FALSE;
 #endif
 
 		crm_config_error = FALSE;
 		crm_config_warning = FALSE;	
 		
 		was_processing_error = FALSE;
 		was_processing_warning = FALSE;
 
 		graph_file = crm_strdup(CRM_STATE_DIR"/graph.XXXXXX");
 		graph_file = mktemp(graph_file);
 
 		set_working_set_defaults(&data_set);
 
 		converted = copy_xml(xml_data);
 		if(cli_config_update(&converted, NULL, TRUE) == FALSE) {
 		    data_set.graph = create_xml_node(NULL, XML_TAG_GRAPH);
 		    crm_xml_add_int(data_set.graph, "transition_id", 0);
 		    crm_xml_add_int(data_set.graph, "cluster-delay", 0);
 		    process = FALSE;
 		}
 
 		if(process) {
 		    do_calculations(&data_set, converted, NULL);
 		}
 		
 		series_id = get_series();
 		series_wrap = series[series_id].wrap;
 		value = pe_pref(data_set.config_hash, series[series_id].param);
 
 		if(value != NULL) {
 			series_wrap = crm_int_helper(value, NULL);
 			if(errno != 0) {
 				series_wrap = series[series_id].wrap;
 			}
 
 		} else {
 			crm_config_warn("No value specified for cluster"
 					" preference: %s",
 					series[series_id].param);
 		}		
 
 		seq = get_last_sequence(PE_STATE_DIR, series[series_id].name);	
 		
 		data_set.input = NULL;
 		reply = create_reply(msg, data_set.graph);
 		CRM_ASSERT(reply != NULL);
 
 		filename = generate_series_filename(
 			PE_STATE_DIR, series[series_id].name, seq, compress);
 		crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename);
 		crm_xml_add_int(reply, "graph-errors", was_processing_error);
 		crm_xml_add_int(reply, "graph-warnings", was_processing_warning);
 		crm_xml_add_int(reply, "config-errors", crm_config_error);
 		crm_xml_add_int(reply, "config-warnings", crm_config_warning);
 
 		if(send_ipc_message(sender, reply) == FALSE) {
 		    if(sender && sender->ops->get_chan_status(sender) == IPC_CONNECT) {
 			send_via_disk = TRUE;
 			crm_err("Answer could not be sent via IPC, send via the disk instead");	           
-			crm_info("Writing the TE graph to %s", graph_file);
+			crm_notice("Writing the TE graph to %s", graph_file);
 			if(write_xml_file(data_set.graph, graph_file, FALSE) < 0) {
-				crm_err("TE graph could not be written to disk");
+			    crm_err("TE graph could not be written to disk");
 			}
 		    } else {
 			crm_info("Peer disconnected, discarding transition graph");
 		    }
 		}
 		
 		free_xml(reply);
 		cleanup_alloc_calculations(&data_set);
 
 		if(series_wrap != 0) {
 		    write_xml_file(xml_data, filename, compress);
 		    write_last_sequence(PE_STATE_DIR, series[series_id].name,
 					seq+1, series_wrap);
 		}
 		
 		if(was_processing_error) {
 			crm_err("Transition %d:"
 				" ERRORs found during PE processing."
 				" PEngine Input stored in: %s",
 				transition_id, filename);
 
 		} else if(was_processing_warning) {
 			crm_warn("Transition %d:"
 				 " WARNINGs found during PE processing."
 				 " PEngine Input stored in: %s",
 				 transition_id, filename);
 
 		} else {
-			crm_info("Transition %d: PEngine Input stored in: %s",
-				 transition_id, filename);
+			crm_notice("Transition %d: PEngine Input stored in: %s",
+				   transition_id, filename);
 		}
 
 		if(crm_config_error) {
-			crm_info("Configuration ERRORs found during PE processing."
-			       "  Please run \"crm_verify -L\" to identify issues.");
+			crm_notice("Configuration ERRORs found during PE processing."
+				   "  Please run \"crm_verify -L\" to identify issues.");
 
 		} else if(crm_config_warning) {
-			crm_info("Configuration WARNINGs found during PE processing."
-				 "  Please run \"crm_verify -L\" to identify issues.");
+			crm_notice("Configuration WARNINGs found during PE processing."
+				   "  Please run \"crm_verify -L\" to identify issues.");
 		}
 
 		if(send_via_disk) {
 			reply = create_reply(msg, NULL);
 			crm_xml_add(reply, F_CRM_TGRAPH, graph_file);
 			crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename);
 			CRM_ASSERT(reply != NULL);
 			if(send_ipc_message(sender, reply) == FALSE) {
 				crm_err("Answer could not be sent");
 			}
 			free_xml(reply);
 		}
 
 		free_xml(converted);
 		crm_free(graph_file);
 		crm_free(filename);
 		
 	} else if(strcasecmp(op, CRM_OP_QUIT) == 0) {
 		crm_warn("Received quit message, terminating");
 		exit(0);
 	}
 	
 	return TRUE;
 }
 
 xmlNode *
 do_calculations(pe_working_set_t *data_set, xmlNode *xml_input, ha_time_t *now)
 {
 	GListPtr gIter = NULL;
-	int rsc_log_level = LOG_NOTICE;
+	int rsc_log_level = LOG_INFO;
 /*	pe_debug_on(); */
 
 	CRM_ASSERT(xml_input || is_set(data_set->flags, pe_flag_have_status));
 	
 	if(is_set(data_set->flags, pe_flag_have_status) == FALSE) {
 	    set_working_set_defaults(data_set);
 	    data_set->input = xml_input;
 	    data_set->now = now;
 	    if(data_set->now == NULL) {
 		data_set->now = new_ha_date(TRUE);
 	    }
 	} else {
 	    crm_trace("Already have status - reusing");
 	}
 
 	crm_debug_5("Calculate cluster status");		  
 	stage0(data_set);
 	
 	gIter = data_set->resources;
 	for(; gIter != NULL; gIter = gIter->next) {
 	    resource_t *rsc = (resource_t*)gIter->data;
 	    
 	    if(is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) {
 		continue;
 	    }
 	    rsc->fns->print(rsc, NULL, pe_print_log, &rsc_log_level);
 	}
 
 	crm_trace("Applying placement constraints");	
 	stage2(data_set);
 
 	crm_trace("Create internal constraints");
 	stage3(data_set);
 
 	crm_trace("Check actions");
 	stage4(data_set);	
 	
 	crm_trace("Allocate resources");
 	stage5(data_set);
 
 	crm_trace("Processing fencing and shutdown cases");
 	stage6(data_set);
 	
 	crm_trace("Applying ordering constraints");
 	stage7(data_set);
 
 	crm_trace("Create transition graph");
 	stage8(data_set);
 
 	crm_trace("=#=#=#=#= Summary =#=#=#=#=");
 	crm_trace("\t========= Set %d (Un-runnable) =========", -1);
 	if(crm_log_level > LOG_DEBUG) {
 	    gIter = data_set->actions;
 	    for(; gIter != NULL; gIter = gIter->next) {
 		action_t *action = (action_t*)gIter->data;
 		if(is_set(action->flags, pe_action_optional) == FALSE
 		   && is_set(action->flags, pe_action_runnable) == FALSE
 		   && is_set(action->flags, pe_action_pseudo) == FALSE) {
 		    log_action(LOG_DEBUG_2, "\t", action, TRUE);
 		}
 	    }
 	}
 	
 	return data_set->graph;
 }
diff --git a/tools/crm_report.in b/tools/crm_report.in
index cd6e63fd1d..15df3ec7ba 100755
--- a/tools/crm_report.in
+++ b/tools/crm_report.in
@@ -1,400 +1,429 @@
 #!/bin/sh
 
  # Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
  # 
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public
  # License as published by the Free Software Foundation; either
  # version 2.1 of the License, or (at your option) any later version.
  # 
  # This software is distributed in the hope that it will be useful,
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  # General Public License for more details.
  # 
  # You should have received a copy of the GNU General Public
  # License along with this library; if not, write to the Free Software
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  #
 
 
 # Note the quotes around `$TEMP': they are essential!
 TEMP=`getopt				\
-    -o hv?xl:f:t:n:T:Lpc:dSACHu:MV	\
-    --long help,cts:,node:,nodes:,from:,to:logfile:,as-directory,single-node,cluster:,user:,version,features	\
-    -n 'pcmk_report' -- "$@"`
+    -o hv?xl:f:t:n:T:Lpc:dSACHu:MVs	\
+    --long help,cts:,cts-log:,node:,nodes:,from:,to:logfile:,as-directory,single-node,cluster:,user:,version,features	\
+    -n 'crm_report' -- "$@"`
 eval set -- "$TEMP"
 
 
 times=""
 tests=""
 nodes=""
 compress=1
 cluster="any"
 ssh_user="root"
 search_logs=1
 report_data=`dirname $0`
 
 
 extra_logs=""
-sanitize_patterns="passw.*"
+sanitize_patterns=""
 log_patterns="CRIT: ERROR:"
 
 usage() {
 
 cat<<EOF
 usage: `basename $0` -f {YYYY-M-D H:M:S} [-t {YYYY-M-D H:M:S}] [optional options] [dest]
 
   -v			increase verbosity
   -f, --from time	time to start from: YYYY-M-D H:M:S
   -t, --to time		time to finish at (default: now)
   -T, --cts test	CTS test or set of tests to extract
       --cts-log		CTS master logfile
   -n, --nodes nodes	node names for this cluster
 			only needed if the cluster is not active on the current machine
 			accepts both -n "a b" and -n a -n b
   -l, --logfile file	log file to collect, normally this will be determined automatically
   -p patt		additional regular expression to match variables to be removed
 			(default: "passw.*")
   -L patt		additional regular expression to match in log files for analysis
 			(default: $log_patterns)
   -M			collect only the logs specified by -l
   -S, --single-node	single node operation; don't try to start report collectors on other nodes
   -c, --cluster type	force the cluster type (corosync,openais,heartbeat,logmaster)
   -A, --openais		force the cluster type to be OpenAIS
   -C, --corosync	force the cluster type to be CoroSync
   -H, --heartbeat	force the cluster type to be Heartbeat
   -u, --user user	ssh username for cluster nodes (default: root)
   dest			a custom destination directory
 EOF
 }
 
 case "$1" in
     -v|--version)   echo "@VERSION@ - @BUILD_VERSION@"; exit 0;;
     --features)     echo "@VERSION@ - @BUILD_VERSION@: @PKG_FEATURES@"; exit 0;;
     -h|--help) usage; exit 0;;
 esac
 
 # Prefer helpers in the same directory if they exist, to simplify development
 if [ ! -f $report_data/report.common ]; then
     report_data=@datadir@/@PACKAGE@
 else
     echo "Using local helpers"
 fi
 
 . $report_data/report.common
 
 while true; do
     case "$1" in
 	-x) set -x; shift;;
 	-v) verbose=`expr $verbose + 1`; shift;;
 	-T|--cts) tests="$tests $2"; shift; shift;;
 	--cts-log) ctslog="$2"; shift; shift;;
 	-f|--from) start_time=`get_time "$2"`; shift; shift;;
 	-t|--to) end_time=`get_time "$2"`; shift; shift;;
 	-n|--node|--nodes) nodes="$nodes $2"; shift; shift;;
 	-S|--single-node) nodes="$nodes $host"; shift;;
 	-E|-l|--logfile) extra_logs="$extra_logs $2"; shift; shift;;
 	-p) sanitize_patterns="$sanitize_patterns $2"; shift; shift;;
 	-L) log_patterns="$log_patterns `echo $2 | sed 's/ /\\\W/g'`"; shift; shift;;
 	-d|--as-directory) compress=0; shift;;
 	-A|--openais)   cluster="openais";   shift;;
 	-C|--corosync)  cluster="corosync";  shift;;
 	-H|--heartbeat) cluster="heartbeat"; shift;;
 	-c|--cluster)   cluster="$2"; shift; shift;;
 	-u|--user)      ssh_user="$2"; shift; shift;;
 	-v|--version)   echo "@VERSION@ - @BUILD_VERSION@"; exit 0; shift;;
 	--features)     echo "@VERSION@ - @BUILD_VERSION@: @CRM_FEATURES@"; exit 0; shift;;
 	-M) search_logs=0; shift;;
 	--) DESTDIR=$2; break;;
 	-h|--help) usage; exit 0;;
+	# Options for compatability with hb_report
+	-s) shift;;
+
 	*) echo "Unknown argument: $1"; usage; exit 1;;
     esac
 done
 
 
 collect_data() {
     label="$1"
     start=`expr $2 - 10`
     end=`expr $3 + 10`
     masterlog=$4
 
     if [ "x$DESTDIR" != x ]; then
 	debug "Using custom scratch dir: $DESTDIR"
 	l_base=$DESTDIR
 	r_base=$DESTDIR
     else
 	l_base=$HOME/$label
 	r_base=$label
     fi
     mkdir -p $l_base
 
     if [ "x$masterlog" != "x" ]; then
 	dumplogset "$masterlog" $start $end > "$l_base/$HALOG_F"
     fi
 
     cat<<EOF>$l_base/.env
 LABEL="$label"
 REPORT_HOME="$r_base"
 REPORT_MASTER="$host"
 LOG_START=$start
 LOG_END=$end
 REMOVE=1
 SANITIZE="$sanitize_patterns"
 CLUSTER=$cluster
 LOG_PATTERNS="$log_patterns"
 EXTRA_LOGS="$extra_logs"
 SEARCH_LOGS=$search_logs
 verbose=$verbose
 EOF
 
     for node in $nodes; do
 	if [ `uname -n` = $node ]; then
 	    cat $l_base/.env $report_data/report.common $report_data/report.collector > $r_base/collector
 	    bash $r_base/collector
 	else
 	    cat $l_base/.env $report_data/report.common $report_data/report.collector \
 		| ssh -l $ssh_user -T $node -- "mkdir -p $r_base; cat > $r_base/collector; bash $r_base/collector" | (cd $l_base && tar xf -)
 	fi
     done
 
     analyze $l_base > $l_base/$ANALYSIS_F
     if [ -f $l_base/$HALOG_F ]; then
 	node_events $l_base/$HALOG_F > $l_base/$EVENTS_F
     fi
 
     for node in $nodes; do
 	cat $l_base/$node/$ANALYSIS_F >> $l_base/$ANALYSIS_F
 	if [ -s $l_base/$node/$EVENTS_F ]; then
 	    cat $l_base/$node/$EVENTS_F >> $l_base/$EVENTS_F
 	elif [ -s $l_base/$HALOG_F ]; then
 	    awk "\$4==\"$nodes\"" $l_base/$EVENTS_F >> $l_base/$n/$EVENTS_F
 	fi
     done
 
     log " "
     if [ $compress = 1 ]; then
 	fname=`shrink $l_base`
 	rm -rf $l_base
 	log "Collected results are available in $fname"
 	log " "
 	log "Please create a bug entry at"
 	log "    http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker"
 	log "Include a description of your problem and attach this tarball"
 	log " "
 	log "Thank you for taking time to create this report."
     else
 	log "Collected results are available in $l_base"
     fi
     log " "
 }
 
 #
 # check if files have same content in the cluster
 #
 cibdiff() {
     d1=`dirname $1`
     d2=`dirname $2`
     if [ -f $d1/RUNNING -a -f $d2/RUNNING ] ||
 	[ -f $d1/STOPPED -a -f $d2/STOPPED ]; then
 	if which crm_diff > /dev/null 2>&1; then
 	    crm_diff -c -n $1 -o $2
 	else
 	    info "crm_diff(8) not found, cannot diff CIBs"
 	fi
     else
 	echo "can't compare cibs from running and stopped systems"
     fi
 }
 
 diffcheck() {
     [ -f "$1" ] || {
 	echo "$1 does not exist"
 	return 1
     }
     [ -f "$2" ] || {
 	echo "$2 does not exist"
 	return 1
     }
     case `basename $1` in
 	$CIB_F)  cibdiff $1 $2;;
 	$B_CONF) diff -u $1 $2;; # confdiff?
 	*)       diff -u $1 $2;;
 esac
 }
 
 #
 # remove duplicates if files are same, make links instead
 #
 consolidate() {
     for n in $NODES; do
 	if [ -f $1/$2 ]; then
 	    rm $1/$n/$2
 	else
 	    mv $1/$n/$2 $1
 	fi
 	ln -s ../$2 $1/$n
     done
 }
 
 analyze_one() {
     rc=0
     node0=""
     for n in $NODES; do
 	if [ "$node0" ]; then
 	    diffcheck $1/$node0/$2 $1/$n/$2
 	    rc=$(($rc+$?))
 	else
 	    node0=$n
 	fi
     done
     return $rc
 }
 
 analyze() {
     flist="$HOSTCACHE $MEMBERSHIP_F $CIB_F $CRM_MON_F $B_CONF logd.cf $SYSINFO_F"
     for f in $flist; do
 	printf "Diff $f... "
 	ls $1/*/$f >/dev/null 2>&1 || {
 	    echo "no $1/*/$f :/"
 	    continue
 	}
 	if analyze_one $1 $f; then
 	    echo "OK"
 	    [ "$f" != $CIB_F ] && consolidate $1 $f
 	else
 	    echo ""
 	fi
     done
 }
 
 do_cts() {
-    if [ x$ctslog = x ]; then
-	ctslog=`findmsg 1 "CTS: Stack:"`
-    fi
-    if [ x$ctslog = x ]; then
-	fatal "No CTS control file detected"
-    fi
-
-    if [ -z "$nodes" ]; then
-	debug "Using CTS control file: $ctslog"
-	nodes=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u  | tr '\\n' ' '`
-    fi
-
     test_sets=`echo $tests | tr ',' ' '`
     for test_set in $test_sets; do
+
+	start_time=0
 	start_test=`echo $test_set | tr '-' ' ' | awk '{print $1}'`
+
+	end_time=0
 	end_test=`echo $test_set | tr '-' ' ' | awk '{print $2}'`
 	
 	if [ x$end_test = x ]; then
 	    msg="Extracting test $start_test"
 	    label="CTS-`date +"%a-%d-%b-%Y"`-$start_test"
 	    end_test=`expr $start_test + 1`
 	else
-	    msg="Extracting set $start_test to $end_test..."
+	    msg="Extracting set $start_test to $end_test"
 	    label="CTS-`date +"%a-%d-%b-%Y"`-$start_test-$end_test"
 	    end_test=`expr $end_test + 1`
 	fi
 
 	if [ $start_test = 0 ]; then
 	    start_pat="BEGINNING [0-9].* TESTS"
 	else
 	    start_pat="Running test.*\[ *$start_test\]"
 	fi
-	ctslog=`findmsg 1 "$start_pat"`
+
+	if [ x$ctslog = x ]; then
+	    ctslog=`findmsg 1 "$start_pat"`
+	fi
+	if [ x$ctslog = x ]; then
+	    fatal "No CTS control file detected"
+	fi
+
 	line=`grep -n "$start_pat" $ctslog | tail -1 | sed 's/:.*//'`
-	start_time=`linetime $ctslog $line`
+	if [ ! -z "$line" ]; then
+	    start_time=`linetime $ctslog $line`
+	fi
 
-	ctslog=`findmsg 1 "Running test.*\[ *$end_test\]"`
 	line=`grep -n "Running test.*\[ *$end_test\]" $ctslog | tail -1 | sed 's/:.*//'`
-	end_time=`linetime $ctslog $line`
+	if [ ! -z "$line" ]; then
+	    end_time=`linetime $ctslog $line`
+	fi
 
+	if [ -z "$nodes" ]; then
+	    debug "Using CTS control file: $ctslog"
+	    nodes=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u  | tr '\\n' ' '`
+	fi
+	
 	if [ $end_time -lt $start_time ]; then
 	    debug "Test didn't complete, grabbing everything up to now"
 	    end_time=`date +%s`
 	fi
 
-	log "$msg (`time2str $start_time` to `time2str $end_time`)"
-	collect_data $label $start_time $end_time $ctslog
+	if [ $start_time != 0 ];then
+	    log "$msg (`time2str $start_time` to `time2str $end_time`)"
+	    collect_data $label $start_time $end_time $ctslog
+	else
+	    fatal "$msg failed: not found"
+	fi
     done 
 }
 
 getnodes() {
     if [ -z $1 ]; then
 	cluster=`get_cluster_type`
     else
 	cluster=$1
     fi
 
     cluster_cf=`find_cluster_cf $cluster`
     # 1. Live
     if 
 	ps -ef | egrep -qs [c]rmd
     then
 	debug "Querying CRM for nodes"
 	cibadmin -Ql -o nodes |	awk '
 	  /type="normal"/ {
 		for( i=1; i<=NF; i++ )
 			if( $i~/^uname=/ ) {
 				sub("uname=.","",$i);
 				sub("\".*","",$i);
 				print $i;
 				next;
 			}
 	  }
 	'
 
     # 2. hostcache
     elif [ -f $HA_STATE_DIR/hostcache ]; then
 	debug "Reading nodes from $HA_STATE_DIR/hostcache"
 	awk '{print $1}' $HA_STATE_DIR/hostcache
 
     # 3. ha.cf
     elif [ "x$cluster" = "xheartbeat" ]; then
 	debug "Reading nodes from $cluster_cf"
 	getcfvar $cluster node $cluster_cf
 
     else
 	# Look in the logs...
 	logfile=`findmsg 1 "crm_update_peer"`
 	debug "Reading nodes from $logfile"
 	if [ ! -z "$logfile" ]; then
 	    grep crm_update_peer: $logfile | sed s/.*crm_update_peer// | sed s/://g | awk '{print $2}' | grep -v "(null)" | sort -u | tr '\n' ' '
 	fi
     fi
 }
 
 if [ "x$tests" != "x" ]; then
     do_cts
 
 elif [ "x$start_time" != "x" ]; then
     masterlog=""
+
+    if [ -z "$sanitize_patterns" ]; then
+	log "WARNING: The tarball produced by this program may contain"
+	log "         sensitive information such as passwords."
+	log ""
+	log "We will attempt to remove such information if you use the"
+	log "-p option. For example: -p \"pass.*\" -p \"user.*\""
+	log ""
+	log "However, doing this may reduce the ability for the recipients"
+	log "to diagnose issues and generally provide assistance."
+	log ""
+	log "IT IS YOUR RESPONSIBILITY TO PROTECT SENSITIVE DATA FROM EXPOSURE"
+	log ""
+    fi
+
     if [ -z "$nodes" ]; then
 	nodes=`getnodes $cluster`
 	log "Calculated node list: $nodes"
     fi
 
     if [ -z "$nodes" ]; then
 	fatal "Cannot determine node list, please specify manually with --nodes"
     fi
 
     if
 	echo $nodes | grep -qs $host 
     then
 	debug "We are a cluster node"
     else
 	debug "We are a log master"
 	masterlog=`findmsg 1 "crmd\\|CTS"`
     fi
     
 
     if [ -z $end_time ]; then
 	end_time=`perl -e 'print time()'`
     fi
     label="pcmk-`date +"%a-%d-%b-%Y"`"
     log "Collecting data from $nodes (`time2str $start_time` to `time2str $end_time`)"
     collect_data $label $start_time $end_time $masterlog
 else
-    fatal "Not sure what to do, no tests or times to extract"
+    fatal "Not sure what to do, no tests or time ranges to extract"
 fi
 
diff --git a/tools/hb2openais.sh.in b/tools/hb2openais.sh.in
index 2acd9816bc..6723c3d789 100755
--- a/tools/hb2openais.sh.in
+++ b/tools/hb2openais.sh.in
@@ -1,804 +1,804 @@
 #!/bin/bash
 
  # Copyright (C) 2008,2009 Dejan Muhamedagic <dmuhamedagic@suse.de>
  # 
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public
  # License as published by the Free Software Foundation; either
  # version 2.1 of the License, or (at your option) any later version.
  # 
  # This software is distributed in the hope that it will be useful,
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  # General Public License for more details.
  # 
  # You should have received a copy of the GNU General Public
  # License along with this library; if not, write to the Free Software
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  #
 
 . @sysconfdir@/ha.d/shellfuncs
 # utillib.sh moved (sigh!)
 # cluster-glue doesn't make its shared data dir available
 # we guess, and guess that that's safe, that the datadir is the same
 testdirs="@datadir@/cluster-glue $HA_NOARCHBIN/utillib.sh"
 for d in $testdirs; do
 	if [ -f $d/utillib.sh ]; then
 		NOARCH_DIR=$d
 		break
 	fi
 done
 test -f $NOARCH_DIR/utillib.sh || {
 	echo "sorry, could not find utillib.sh in $testdirs"
 	exit 1
 }
 
 . $NOARCH_DIR/utillib.sh
 . $NOARCH_DIR/ha_cf_support.sh
 
 PROG=`basename $0`
 PROGDIR=`dirname $0`
 
 # the default syslog facility is not (yet) exported by heartbeat
 # to shell scripts
 #
 DEFAULT_HA_LOGFACILITY="daemon"
 export DEFAULT_HA_LOGFACILITY
 
 : ${SSH_OPTS="-T"}
 
 usage() {
 	cat<<EOF
 
 usage: $PROG [-UF] [-u user] [-T directory] [revert]
 
 	-U: skip upgrade the CIB to v1.0
 	-F: force conversion despite it being done beforehand
 	-u user: a user to sudo with (otherwise, you'd
 	         have to run this as root)
 	-C: force conversion to corosync (default is openais)
 	-T directory: a directory containing ha.cf/logd.cf/cib.xml/hostcache
 	         (use for testing); with this option files are not
 	         copied to other nodes and there are no destructive
 	         commands executed; you may run as unprivileged uid
 
 EOF
 	exit
 }
 
 SUDO_USER=""
 MYSUDO=""
 TEST_DIR=""
 FORCE=""
 UPGRADE="1"
 while getopts UFCu:T:h o; do
 	case "$o" in
 		h) usage;;
 		U) UPGRADE="";;
 		F) FORCE=1;;
 		u) SUDO_USER="$OPTARG";;
 		T) TEST_DIR="$OPTARG";;
 		C) COROSYNC="1";;
 		[?]) usage;;
 	esac
 done
 shift $(($OPTIND-1))
 [ $# -gt 1 ] && usage
 [ "$TEST_DIR" ] && [ $# -ne 0 ] && usage
 
 if [ "$COROSYNC" -o -d /etc/corosync ]; then
 	AIS_CONF=/etc/corosync/corosync.conf
 	AIS_KEYF=/etc/corosync/authkey
 	KEYGEN_PROG="corosync-keygen"
 	COROSYNC=1
 else
 	AIS_CONF=/etc/ais/openais.conf
 	AIS_KEYF=/etc/ais/authkey
 	KEYGEN_PROG="ais-keygen"
 	COROSYNC=""
 fi
 AIS_CONF_BASE=`basename $AIS_CONF`
 PRODUCT=`basename $AIS_CONF_BASE .conf`
 AUTHENTICATION=on
 MAXINTERFACE=2
 MCASTPORT=5405
 RRP_MODE=active
 SUPPORTED_RESPAWNS="pingd evmsd"
 
 PY_HELPER=$HA_BIN/hb2openais-helper.py
 CRM_VARLIB=$HA_VARLIB/crm
 CIB=$CRM_VARLIB/cib.xml
 CIBSIG=$CRM_VARLIB/cib.xml.sig
 CIBLAST=$CRM_VARLIB/cib.xml.last
 CIBLAST_SIG=$CRM_VARLIB/cib.xml.sig.last
 HOSTCACHE=$HA_VARLIB/hostcache
 HB_UUID=$HA_VARLIB/hb_uuid
 DONE_F=$HA_VARRUN/heartbeat/.$PROG.conv_done
 BACKUPDIR=/var/tmp/`basename $PROG .sh`.backup
 RM_FILES=" $CIBSIG $CIBLAST $CIBLAST_SIG"
 REMOTE_RM_FILES=" $CIB $RM_FILES"
 BACKUP_FILES=" $AIS_CONF $AIS_KEYF $REMOTE_RM_FILES "
 DIST_FILES=" $AIS_CONF $AIS_KEYF $DONE_F "
 MAN_TARF=/var/tmp/`basename $PROG .sh`.tar.gz
 
 if [ "$TEST_DIR" ]; then
 	cp $TEST_DIR/cib.xml $TEST_DIR/cib-out.xml
 	CIB=$TEST_DIR/cib-out.xml
 	HOSTCACHE=$TEST_DIR/hostcache
 	HA_CF=$TEST_DIR/ha.cf
 	AIS_CONF=$TEST_DIR/$AIS_CONF_BASE
 	if [ "$SUDO_USER" ]; then
 		warning "-u option ignored when used with -T"
 	fi
 else
 	ps -ef | grep -wqs [c]rmd &&
 		fatal "you must first stop heartbeat on _all_ nodes"
 	if [ "$SUDO_USER" ]; then
 		MYSUDO="sudo -u $SUDO_USER"
 	fi
 fi
 
 CIB_file=$CIB
 CONF=$HA_CF
 LOGD_CF=`findlogdcf $TEST_DIR /etc $HA_DIR`
 export CIB_file LOGD_CF
 
 prerequisites() {
 	test -f $HA_CF ||
 		fatal "$HA_CF does not exist: cannot proceed"
 	iscfvartrue crm || grep -w "^crm" $HA_CF | grep -wqs respawn ||
 		fatal "crm is not enabled: we cannot convert v1 configurations"
 	$DRY test -f $CIB ||
 		fatal "CIB $CIB does not exist: cannot proceed"
 	[ "$FORCE" ] && rm -f "$DONE_F"
 	if [ -f "$DONE_F" ]; then
 		info "Conversion to OpenAIS already done, exiting"
 		exit 0
 	fi
 }
 # some notes about unsupported stuff
 unsupported() {
 	respawned_progs=`awk '/^respawn/{print $3}' $HA_CF |while read p; do basename $p; done`
 	grep -qs "^serial" $HA_CF &&
 		warning "serial media is not supported by OpenAIS"
 	for prog in $respawned_progs; do
 		case $prog in
 		mgmtd|pingd|evmsd) : these are fine
 			;;
 		*)
 			warning "program $prog is being controlled by heartbeat (thru respawn)"
 			warning "you have to find another way of running it"
 			;;
 		esac
 	done
 }
 #
 # find nodes for this cluster
 #
 getnodes() {
 	# 1. hostcache
 	if [ -f $HOSTCACHE ]; then
 		awk '{print $1}' $HOSTCACHE
 		return
 	fi
 	# 2. ha.cf
 	getcfvar node
 }
 #
 # does ssh work?
 #
 testsshuser() {
 	if [ "$2" ]; then
 		ssh -T -o Batchmode=yes $2@$1 true 2>/dev/null
 	else
 		ssh -T -o Batchmode=yes $1 true 2>/dev/null
 	fi
 }
 findsshuser() {
 	for u in "" $TRY_SSH; do
 		rc=0
 		for n in `getnodes`; do
 			[ "$node" = "$WE" ] && continue
 			testsshuser $n $u || {
 				rc=1
 				break
 			}
 		done
 		if [ $rc -eq 0 ]; then
 			echo $u
 			return 0
 		fi
 	done
 	return 1
 }
 important() {
 	echo "IMPORTANT: $*" >&2
 }
 newportinfo() {
 	important "the multicast port number on $1 is set to $2"
 	important "please update your firewall rules (if any)"
 }
 changemediainfo() {
 	important "$PRODUCT uses multicast for communication"
 	important "please make sure that your network infrastructure supports it"
 }
 multicastinfo() {
 	info "multicast for $PRODUCT ring $1 set to $2:$3"
 }
 netaddrinfo() {
 	info "network address for $PRODUCT ring $1 set to $2"
 }
 backup_files() {
 	[ "$TEST_DIR" ] && return
 	info "backing up $BACKUP_FILES to $BACKUPDIR"
 	$DRY mkdir $BACKUPDIR || {
 		echo sorry, could not create $BACKUPDIR directory
 		echo please cleanup
 		exit 1
 	}
 	if [ -z "$DRY" ]; then
 		tar cf - $BACKUP_FILES | gzip > $BACKUPDIR/$WE.tar.gz || {
 			echo sorry, could not create $BACKUPDIR/$WE.tar.gz
 			exit 1
 		}
 	else
 		$DRY "tar cf - $BACKUP_FILES | gzip > $BACKUPDIR/$WE.tar.gz"
 	fi
 }
 revert() {
 	[ "$TEST_DIR" ] && return
 	test -d $BACKUPDIR || {
 		echo sorry, there is no $BACKUPDIR directory
 		echo cannot revert
 		exit 1
 	}
 	info "restoring $BACKUP_FILES from $BACKUPDIR/$WE.tar.gz"
 	gzip -dc $BACKUPDIR/$WE.tar.gz | (cd / && tar xf -) || {
 		echo sorry, could not unpack $BACKUPDIR/$WE.tar.gz
 		exit 1
 	}
 }
 pls_press_enter() {
 	[ "$TEST_DIR" ] && return
 	cat<<EOF
 
 Please press enter to continue or ^C to exit ...
 EOF
 	read junk
 	echo ""
 }
 introduction() {
 	cat<<EOF
 
 This is a Heartbeat to OpenAIS conversion tool.
 
 * IMPORTANT * IMPORTANT * IMPORTANT * IMPORTANT * IMPORTANT *
 
 Please read this and don't proceed before understanding what
 we try to do and what is required.
 
 1. You need to know your cluster in detail. This program will
 inform you on changes it makes. It is up to you to verify
 that the changes are meaningful. We will also probably ask
 some questions now and again.
 
 2. This procedure is supposed to be run on one node only.
 Although the main cluster configuration (the CIB) is
 automatically replicated, there are some things which have to
 be copied by other means. For that to work, we need sshd
 running on all nodes and root access working.
 
 3. Do not run this procedure on more than one node!
 EOF
 	pls_press_enter
 	cat<<EOF
 The procedure consists of two parts: the OpenAIS
 configuration and the Pacemaker/CRM CIB configuration.
 
 The first part is obligatory. The second part may be skipped
 unless your cluster configuration requires changes due to the
 change from Heartbeat to OpenAIS.
 
 We will try to analyze your configuration and let you know
 whether the CIB configuration should be changed as well.
 However, you will still have a choice to skip the CIB
 mangling part in case you want to do that yourself.
 
 The next step is to create the OpenAIS configuration. If you
 want to leave, now is the time to interrupt the program.
 EOF
 	pls_press_enter
 }
 confirm() {
 	while :; do
 		printf "$1 (y/n) "
 		read ans
 		if echo $ans | grep -iqs '^[yn]'; then
 			echo $ans | grep -iqs '^y'
 			return $?
 		else
 			echo Please answer with y or n
 		fi
 	done
 }
 want_to_proceed() {
 	[ "$TEST_DIR" ] && return 0
 	confirm "Do you want to proceed?"
 }
 intro_part2() {
 	cat<<EOF
 
 The second part of the configuration deals with the CIB.
 According to our analysis (you should have seen some
 messages), this step is necessary.
 EOF
 	want_to_proceed || return
 }
 
 gethbmedia() {
 	grep "^[bum]cast" $HA_CF
 }
 pl_ipcalc() {
 perl -e '
 # stolen from internet!
 my $ipaddr=$ARGV[0];
 my $nmask=$ARGV[1];
 my @addrarr=split(/\./,$ipaddr);
 my ( $ipaddress ) = unpack( "N", pack( "C4",@addrarr ) );
 my @maskarr=split(/\./,$nmask);
 my ( $netmask ) = unpack( "N", pack( "C4",@maskarr ) );
 # Calculate network address by logical AND operation of addr &
 # netmask
 # and convert network address to IP address format
 my $netadd = ( $ipaddress & $netmask );
 my @netarr=unpack( "C4", pack( "N",$netadd ) );
 my $netaddress=join(".",@netarr);
 print "$netaddress\n";
 ' $1 $2
 }
 get_if_val() {
 	test "$1" || return
 	awk -v key=$1 '
 	{ for( i=1; i<=NF; i++ )
 		if( match($i,key) ) {
 			sub(key,"",$i);
 			print $i
 			exit
 		}
 	}'
 }
 netaddress() {
 	ip=`ifconfig $1 | grep 'inet addr:' | get_if_val addr:`
 	mask=`ifconfig $1 | grep 'Mask:' | get_if_val Mask:`
 	if test "$mask"; then
 		pl_ipcalc $ip $mask
 	else
 		warning "could not get the network mask for interface $1"
 	fi
 }
 
 sw=0
 do_tabs() {
 	for i in `seq $sw`; do printf "\t"; done
 }
 newstanza() {
 	do_tabs
 	printf "%s {\n" $1
-	let sw=sw+1
+	sw=$(($sw+1))
 }
 endstanza() {
-	let sw=sw-1
+	sw=$(($sw-1))
 	do_tabs
 	printf "}\n"
 }
 setvalue() {
 	name=$1
 	val=$2
 	test "$val" || {
 		warning "sorry, no value set for $name"
 	}
 	do_tabs
 	echo "$name: $val"
 }
 setcomment() {
 	do_tabs
 	echo "# $*"
 }
 setdebug() {
 	[ "$HA_LOGLEVEL" = debug ] &&
 		echo "on" || echo "off"
 }
 
 WE=`uname -n`  # who am i?
 
 if [ "$1" = revert ]; then
 	revert
 	exit
 fi
 
 test -d $BACKUPDIR &&
 	fatal "please remove the backup directory: $BACKUPDIR"
 
 prerequisites
 
 introduction
 
 backup_files
 
 unsupported
 
 # 1. Generate the openais.conf
 
 prochbmedia() {
 	while read media_type iface address rest; do
 		info "Processing interface $iface of type $media_type ..."
 		case "$media_type" in
 			ucast|bcast) mcastaddr=226.94.1.1 ;;
 			mcast) mcastaddr=$address ;;
 		esac
 		if [ -z "$local_mcastport" ]; then
 			local_mcastport="$MCASTPORT"
 		fi
 		netaddress="`netaddress $iface`"
 		if [ "$netaddress" ]; then
 			let local_mcastport=$local_mcastport+1
 			newportinfo $iface $local_mcastport
 			echo "$netaddress" "$mcastaddr" "$local_mcastport"
 		else
 			warning "cannot process interface $iface!"
 		fi
 	done
 }
 
 openaisconf() {
 
 info "Generating $AIS_CONF from $HA_CF ..."
 
 # the totem stanza
 
 cpunum=`grep -c ^processor /proc/cpuinfo`
 setcomment "Generated by hb2openais on `date`"
 setcomment "Please read the $AIS_CONF_BASE.5 manual page"
 
 [ "$COROSYNC" ] &&
 	setvalue compatibility whitetank
 
 newstanza aisexec
 setcomment "Run as root - this is necessary to be able to manage resources with Pacemaker"
 setvalue user	root
 setvalue group	root
 endstanza
 
 newstanza service
 setcomment "Load the Pacemaker Cluster Resource Manager"
 setvalue name	pacemaker
 setvalue ver	0
 if uselogd; then
 	setvalue use_logd	yes
 	important "Make sure that the logd service is started (chkconfig logd on)"
 fi
 if grep -qs "^respawn.*mgmtd" $HA_CF; then
 	setvalue use_mgmtd	yes
 fi
 endstanza
 
 newstanza totem
 setvalue version 2
 setcomment "How long before declaring a token lost (ms)"
 setvalue token          1000
 setcomment "How many token retransmits before forming a new configuration"
 setvalue token_retransmits_before_loss_const 20
 setcomment "How long to wait for join messages in the membership protocol (ms)"
 setvalue join           50
 setcomment "How long to wait for consensus to be achieved before"
 setcomment "starting a new round of membership configuration (ms)"
 setvalue consensus      1200
 setcomment "Turn off the virtual synchrony filter"
 setvalue vsftype        none
 setcomment "Number of messages that may be sent by one processor on receipt of the token"
 setvalue max_messages   20
 setcomment "Limit generated nodeids to 31-bits (positive signed integers)"
 setvalue clear_node_high_bit yes
 setcomment "Enable encryption"
 setvalue secauth $AUTHENTICATION
 if [ "$AUTHENTICATION" = on ]; then
 	setvalue threads $cpunum
 else
 	setvalue threads 0
 fi
 setcomment "Optionally assign a fixed node id (integer)"
 setcomment "nodeid:         1234"
 ring=0
 gethbmedia | prochbmedia |
 sort -u |
 while read network addr port; do
 	if [ $ring -ge $MAXINTERFACE ]; then
 		warning "$PRODUCT supports only $MAXINTERFACE rings!"
 		info "consider bonding interfaces"
 		warning "skipping communication link on $network"
 		setcomment "$network skipped: too many rings"
 		continue
 	fi
 	newstanza interface
 	setvalue ringnumber $ring
 	setvalue bindnetaddr $network
 	netaddrinfo $ring $network
 	multicastinfo $ring $addr $port
 	setvalue mcastport $port
 	setvalue mcastaddr $addr
-	let ring=$ring+1
+	ring=$(($ring+1))
 	endstanza
 done
 mediacnt=`gethbmedia 2>/dev/null | prochbmedia 2>/dev/null | sort -u | wc -l`
 if [ $mediacnt -ge 2 ]; then
 	setvalue rrp_mode $RRP_MODE
 fi
 changemediainfo
 endstanza
 
 # the logging stanza
 
 getlogvars
 # enforce some syslog facility
 [ "$COROSYNC" ] &&
 	TO_FILE=to_logfile ||
 	TO_FILE=to_file
 debugsetting=`setdebug`
 newstanza logging
 setvalue debug $debugsetting
 setvalue fileline off
 setvalue to_stderr no
 setvalue timestamp off
 if [ "$HA_LOGFILE" ]; then
 	setvalue $TO_FILE yes
 	setvalue logfile $HA_LOGFILE
 else
 	setvalue $TO_FILE no
 fi
 if [ "$HA_LOGFACILITY" ]; then
 	setvalue to_syslog yes
 	setvalue syslog_facility $HA_LOGFACILITY
 else
 	setvalue to_syslog no
 fi
 newstanza logger_subsys
 setvalue subsys AMF
 setvalue debug $debugsetting
 endstanza
 endstanza
 
 newstanza amf
 setvalue mode disabled
 endstanza
 
 }
 
 if [ -z "$DRY" ]; then
 	openaisconf > $AIS_CONF ||
 		fatal "cannot create $AIS_CONF"
 	grep -wqs interface $AIS_CONF ||
 		fatal "no media found in $HA_CF"
 else
 	openaisconf
 fi
 
 [ "$AIS_KEYF" ] && {
 	info "Generating a key for OpenAIS authentication ..."
 	if [ "$TEST_DIR" ]; then
 		echo would run: $DRY $KEYGEN_PROG
 	else
 		$DRY $KEYGEN_PROG ||
 			fatal "cannot generate the key using $KEYGEN_PROG"
 	fi
 }
 
 # remove various files which could get in a way
 
 if [ -z "$TEST_DIR" ]; then
 	$DRY rm -f $RM_FILES
 fi
 
 fixcibperms() {
 	[ "$TEST_DIR" ] && return
 	uid=`ls -ldn $CRM_VARLIB | awk '{print $3}'`
 	gid=`ls -ldn $CRM_VARLIB | awk '{print $4}'`
 	$DRY $MYSUDO chown $uid:$gid $CIB
 }
 upgrade_cib() {
 	$DRY $MYSUDO cibadmin --upgrade --force
 	$DRY $MYSUDO crm_verify -V -x $CIB_file
 }
 py_proc_cib() {
 	tmpfile=`maketempfile`
 	$MYSUDO sh -c "python $PY_HELPER $* <$CIB >$tmpfile" ||
 		fatal "cannot process cib: $PY_HELPER $*"
 	$DRY $MYSUDO mv $tmpfile $CIB
 }
 set_property() {
 	py_proc_cib set_property $*
 }
 
 # remove the nodes section from the CIB
 py_proc_cib set_node_ids
 info "Edited the nodes' ids in the CIB"
 
 numnodes=`getnodes | wc -w`
 [ $numnodes -eq 2 ] &&
 	set_property no-quorum-policy ignore
 
 set_property expected-nodes $numnodes overwrite
 
 info "Done converting ha.cf to $AIS_CONF_BASE"
 important "Please check the resulting $AIS_CONF"
 important "and in particular interface stanzas and logging."
 important "If you find problems, please edit $AIS_CONF now!"
 #
 # first part done (openais), on to the CIB
 
 analyze_cib() {
 	info "Analyzing the CIB..."
 	$MYSUDO sh -c "python $PY_HELPER analyze_cib <$CIB"
 }
 check_respawns() {
 	rc=1
 	for p in $SUPPORTED_RESPAWNS; do
 		grep -qs "^respawn.*$p" $HA_CF && {
 			info "a $p resource has to be created"
 			rc=0
 		}
 	done
 	return $rc
 }
 
 part2() {
 	intro_part2 || return 0
 	opts="-c $HA_CF"
 	[ "$TEST_DIR" ] && opts="-T $opts"
 	py_proc_cib $opts convert_cib
 	info "Processed the CIB successfully"
 }
 # make the user believe that something's happening :)
 some_dots_idle() {
 	[ "$TEST_DIR" ] && return
 	cnt=0
 	printf "$2 ."
 	while [ $cnt -lt $1 ]; do
 		sleep 1
 		printf "."
 		ctn=$((cnt+1))
 	done
 	echo
 }
 print_dc() {
 	crm_mon -1 | awk '/Current DC/{print $3}'
 }
 dcidle() {
 	dc=`$MYSUDO print_dc`
 	if [ "$dc" = "$WE" ]; then
 		maxcnt=60 cnt=0
 		while [ $cnt -lt $maxcnt ]; do
 			stat=`$MYSUDO crmadmin -S $dc`
 			echo $stat | grep -qs S_IDLE && break
 			[ "$1" = "-v" ] && echo $stat
 			sleep 1
 			printf "."
 			cnt=$((cnt+1))
 		done
 		echo $stat | grep -qs S_IDLE
 	else
 		some_dots_idle 10 #just wait for 10 seconds
 	fi
 }
 wait_crm() {
 	[ "$TEST_DIR" ] && return
 	cnt=10
 	dc=""
 	while [ -z "$dc" -a $cnt -gt 0 ]; do
 		dc=`$MYSUDO print_dc`
 		cnt=$((cnt-1))
 	done
 
 	if [ x = x"$dc" ]; then
 		echo "sorry, no dc found/elected"
 		exit 1
 	fi
 	dcidle
 }
 manage_cluster() {
 	if [ "$TEST_DIR" ]; then
 		echo would run: /etc/init.d/openais $1
 	else
 		$DRY /etc/init.d/openais $1
 	fi
 }
 tune_ocfs2() {
 	cat<<EOF
 The ocfs2 metadata has to change to reflect the cluster stack
 change. To do that, we have to start the cluster stack on
 this node.
 EOF
 	pls_press_enter
 	py_proc_cib manage_ocfs2 stop
 	manage_cluster start
 	some_dots_idle 10 "waiting for crm to start"
 	if $DRY wait_crm; then
 		for fsdev; do
 			info "converting the ocfs2 meta-data on $fsdev"
 			if [ "$TEST_DIR" ]; then
 				echo would run: tunefs.ocfs2 --update-cluster-stack -y $fsdev
 			else
 				$DRY tunefs.ocfs2 --update-cluster-stack -y $fsdev
 			fi
 		done
 	else
 		fatal "could not start pacemaker; please check the logs"
 	fi
 	manage_cluster stop
 	py_proc_cib manage_ocfs2 start
 }
 convert_csm() {
 	info "converting all EVMS2 CSM containers"
 	if [ "$TEST_DIR" ]; then
 		echo would run: /usr/sbin/csm-converter --scan
 	else
 		$DRY /usr/sbin/csm-converter --scan ||
 			fatal "CSM conversion failed! Aborting"
 	fi
 }
 
 analyze_cib
 rc=$?
 [ $rc -gt 1 ] && fatal "error while analyzing CIB"
 if [ $rc -eq 1 ] || check_respawns; then
 	part2
 else
 	info "No need to process CIB further"
 fi
 
 # upgrade the CIB to v1.0
 if [ "$UPGRADE" ]; then
 	upgrade_cib
 	info "Upgraded the CIB to v1.0"
 else
 	info "Skipped upgrading the CIB to v1.0"
 	important "You should do this sooner rather than later!"
 fi
 fixcibperms
 
 convert_csm
 ocfs2_devs=`$MYSUDO sh -c "python $PY_HELPER $opts print_ocfs2_devs <$CIB"`
 [ "$ocfs2_devs" ] &&
 	tune_ocfs2 $ocfs2_devs
 
 [ "$TEST_DIR" ] && exit
 
 $DRY touch $DONE_F
 
 # finally, copy files to all nodes
 info "Copying files to other nodes ..."
 info "(please provide root password if prompted)"
 ssh_opts="-l root $SSH_OPTS"
 rc=0
 for node in `getnodes`; do
 	[ "$node" = "$WE" ] &&
 		continue
 	if [ "$DRY" ]; then
 		$DRY "(cd / && tar cf - $DIST_FILES) |
 		ssh $ssh_opts $node \"rm -f $REMOTE_RM_FILES &&
 			cd / && tar xf -\""
 	else
 		echo "Copying to node $node ..."
 		(cd / && tar cf - $DIST_FILES) |
 		ssh $ssh_opts $node "rm -f $REMOTE_RM_FILES &&
 			cd / && tar xf -"
-		let rc=$rc+$?
+		rc=$(($rc+$?))
 	fi
 done
 info "Done transfering files"
 if [ $rc -ne 0 ]; then
 	warning "we could not update some ssh nodes"
 	important "before starting the cluster stack on those nodes:"
 	important "copy and unpack $MAN_TARF (from the / directory)"
 	important "and execute: rm -f $REMOTE_RM_FILES"
 	(cd / && tar cf - $DIST_FILES | gzip > $MAN_TARF)
 fi
diff --git a/tools/report.collector b/tools/report.collector
index c708723372..930047e433 100644
--- a/tools/report.collector
+++ b/tools/report.collector
@@ -1,651 +1,654 @@
  # Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
  #			Almost everything as part of hb_report
  # Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
  #			Cleanups, refactoring, extensions
  # 
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public
  # License as published by the Free Software Foundation; either
  # version 2.1 of the License, or (at your option) any later version.
  # 
  # This software is distributed in the hope that it will be useful,
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  # General Public License for more details.
  # 
  # You should have received a copy of the GNU General Public
  # License along with this library; if not, write to the Free Software
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  #
 
 if 
     echo $REPORT_HOME | grep -qs '^/'
 then
-    debug "Using full path to working directory"
+    debug "Using full path to working directory: $REPORT_HOME"
 else
-    debug "Canonicalizing working directory path"
     REPORT_HOME="$HOME/$REPORT_HOME"
+    debug "Canonicalizing working directory path: $REPORT_HOME"
 fi
 
 findlogdcf() {
     for f in \
 	`test -x $CRM_DAEMON_DIR/ha_logd &&
 	which strings > /dev/null 2>&1 &&
 	strings $CRM_DAEMON_DIR/ha_logd | grep 'logd\.cf'` \
 	    `for d; do echo $d/logd.cf $d/ha_logd.cf; done`
     do
 	if [ -f "$f" ]; then
 	    echo $f
 	    debug "Located logd.cf at: $f"
 	    return 0
 	fi
     done
     debug "Could not determine logd.cf location"
     return 1
 }
 
 #
 # find files newer than a and older than b
 #
 isnumber() {
     echo "$*" | grep -qs '^[0-9][0-9]*$'
 }
 
 touchfile() {
     t=`mktemp` &&
         perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' &&
         echo $t
 }
 
 find_files_clean() {
     [ -z "$from_stamp" ] || rm -f "$from_stamp"
     [ -z "$to_stamp" ] || rm -f "$to_stamp"
     from_stamp=""
     to_stamp=""
 }
 
 find_files() {
     dirs=$1
     from_time=$2
     to_time=$3
     isnumber "$from_time" && [ "$from_time" -gt 0 ] || {
 	warning "sorry, can't find files in [ $1 ] based on time if you don't supply time"
 	return
     }
     trap find_files_clean 0
     if ! from_stamp=`touchfile $from_time`; then
 	warning "sorry, can't create temporary file for find_files"
 	return
     fi
     findexp="-newer $from_stamp"
     if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then
 	if ! to_stamp=`touchfile $to_time`; then
 	    warning "sorry, can't create temporary file for find_files"
 	    find_files_clean
 	    return
 	fi
 	findexp="$findexp ! -newer $to_stamp"
     fi
     find $dirs -type f $findexp
     find_files_clean
     trap "" 0
 }
 
 #
 # check permissions of files/dirs
 #
 pl_checkperms() {
     perl -e '
 	# check permissions and ownership
 	# uid and gid are numeric
 	# everything must match exactly
 	# no error checking! (file should exist, etc)
 	($filename, $perms, $in_uid, $in_gid) = @ARGV;
 	($mode,$uid,$gid) = (stat($filename))[2,4,5];
 	$p=sprintf("%04o", $mode & 07777);
 	$p ne $perms and exit(1);
 	$uid ne $in_uid and exit(1);
 	$gid ne $in_gid and exit(1);
     ' $*
 }
 
 num_id() {
     getent $1 $2 | awk -F: '{print $3}'
 }
 
 chk_id() {
     [ "$2" ] && return 0
     echo "$1: id not found"
     return 1
 }
 
 check_perms() {
     while read type f p uid gid; do
 	[ -$type $f ] || {
 	    echo "$f wrong type or doesn't exist"
 	    continue
 	}
 	n_uid=`num_id passwd $uid`
 	chk_id "$uid" "$n_uid" || continue
 	n_gid=`num_id group $gid`
 	chk_id "$gid" "$n_gid" || continue
 	pl_checkperms $f $p $n_uid $n_gid || {
 	    echo "wrong permissions or ownership for $f:"
 	    ls -ld $f
 	}
     done
 }
 
 #
 # coredumps
 #
 findbinary() {
     random_binary=`which cat 2>/dev/null` # suppose we are lucky
     binary=`gdb $random_binary $1 < /dev/null 2>/dev/null |
 	grep 'Core was generated' | awk '{print $5}' |
 	sed "s/^.//;s/[.':]*$//"`
     if [ x = x"$binary" ]; then
 	debug "Could not detect the program name for core $1 from the gdb output; will try with file(1)"
 	binary=$(file $1 | awk '/from/{
 			for( i=1; i<=NF; i++ )
 				if( $i == "from" ) {
 					print $(i+1)
 					break
 				}
 			}')
 	binary=`echo $binary | tr -d "'"`
 	binary=$(echo $binary | tr -d '`')
 	if [ "$binary" ]; then
 	    binary=`which $binary 2>/dev/null`
 	fi
     fi
     if [ x = x"$binary" ]; then
 	warning "Could not find the program path for core $1"
 	return
     fi
     fullpath=`which $binary 2>/dev/null`
     if [ x = x"$fullpath" ]; then
 	if [ -x $CRM_DAEMON_DIR/$binary ]; then
 	    echo $CRM_DAEMON_DIR/$binary
 	    debug "Found the program at $CRM_DAEMON_DIR/$binary for core $1"
 	else
 	    warning "Could not find the program path for core $1"
 	fi
     else
 	echo $fullpath
 	debug "Found the program at $fullpath for core $1"
     fi
 }
 
 getbt() {
     which gdb > /dev/null 2>&1 || {
 	warning "Please install gdb to get backtraces"
 	return
     }
     for corefile; do
 	absbinpath=`findbinary $corefile`
 	[ x = x"$absbinpath" ] && continue
 	echo "====================== start backtrace ======================"
 	ls -l $corefile
 	gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \
 	    $absbinpath $corefile 2>/dev/null
 	echo "======================= end backtrace ======================="
     done
 }
 
 getconfig() {
     target=$1; shift;
 
     for cf in $*; do
-	if [ -f "$cf" ]; then
-	    cp -p "$cf" $target/
+	if [ -e "$cf" ]; then
+	    cp -a "$cf" $target/
 	fi
     done
 
     crm_uuid -r > $target/$HB_UUID_F 2>&1
 
     if 
 	ps -ef | egrep -qs [c]rmd
     then
 	crm_mon -1 2>&1 | grep -v '^Last upd' > $target/$CRM_MON_F
-	cibadmin -Ql > $target/${CIB_F}.live
+	cibadmin -Ql 2>/dev/null > $target/${CIB_F}.live
 	crm_node -p > $target/$MEMBERSHIP_F 2>&1
 	echo "$host" > $target/RUNNING
     else
 	echo "$host" > $target/STOPPED
     fi
 
     if [ -f "$target/$CIB_F" ]; then
 	crm_verify -V -x $target/$CIB_F >$target/$CRM_VERIFY_F 2>&1
 	CIB_file=$target/$CIB_F crm configure show >$target/$CIB_TXT_F 2>&1
     fi
 }
 
 #
 # remove values of sensitive attributes
 #
 # this is not proper xml parsing, but it will work under the
 # circumstances
 sanitize_xml_attrs() {
     sed $(
 	for patt in $SANITIZE; do
 	    echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/"
 	done
     )
 }
 
 sanitize_hacf() {
     awk '
 	$1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; }
 	{print}
 	'
 }
 
 sanitize_one_clean() {
     [ -z "$tmp" ] || rm -f "$tmp"
     tmp=""
     [ -z "$ref" ] || rm -f "$ref"
     ref=""
 }
 
 sanitize() {
     file=$1
     compress=""
     if [ -z $SANITIZE ]; then
 	return 
     fi
     echo $file | grep -qs 'gz$' && compress=gzip
     echo $file | grep -qs 'bz2$' && compress=bzip2
     if [ "$compress" ]; then
 	decompress="$compress -dc"
     else
 	compress=cat
 	decompress=cat
     fi
     trap sanitize_one_clean 0
     tmp=`mktemp`
     ref=`mktemp`
     if [ -z "$tmp" -o -z "$ref" ]; then
 	sanitize_one_clean
 	fatal "cannot create temporary files"
     fi
     touch -r $file $ref  # save the mtime
     if [ "`basename $file`" = ha.cf ]; then
 	sanitize_hacf
     else
 	$decompress | sanitize_xml_attrs | $compress
     fi < $file > $tmp
     mv $tmp $file
 	# note: cleaning $tmp up is still needed even after it's renamed
 	# because its temp directory is still there.
 
 	touch -r $ref $file
 	sanitize_one_clean
 	trap "" 0
 }
 
 pickfirst() {
     for x; do
 	which $x >/dev/null 2>&1 && {
 	    echo $x
 	    return 0
 	}
     done
     return 1
 }
 
 #
 # get some system info
 #
 distro() {
     if
 	which lsb_release >/dev/null 2>&1
     then
 	lsb_release -d
 	debug "Using lsb_release for distribution info"
 	return
     fi
 
     relf=`ls /etc/debian_version 2>/dev/null` ||
     relf=`ls /etc/slackware-version 2>/dev/null` ||
     relf=`ls -d /etc/*-release 2>/dev/null` && {
 	for f in $relf; do
 	    test -f $f && {
 		echo "`ls $f` `cat $f`"
 		debug "Found `echo $relf | tr '\n' ' '` distribution release file(s)"
 		return
 	    }
 	done
     }
     warning "No lsb_release, no /etc/*-release, no /etc/debian_version: no distro information"
 }
 
 pkg_ver() {
     if which dpkg >/dev/null 2>&1 ; then
 	pkg_mgr="deb"
     elif which rpm >/dev/null 2>&1 ; then
 	pkg_mgr="rpm"
     elif which pkg_info >/dev/null 2>&1 ; then 
 	pkg_mgr="pkg_info"
     elif which pkginfo >/dev/null 2>&1 ; then 
 	pkg_mgr="pkginfo"
     else
 	warning "Unknown package manager"
 	return
     fi
     debug "The package manager is: $pkg_mgr"
     echo "The package manager is: $pkg_mgr"
 
     # for Linux .deb based systems
     case $pkg_mgr in
 	deb)
 	    dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W
 	    for pkg in $*; do
 		if dpkg-query -W $pkg 2>/dev/null ; then
 		    debug "Verifying installation of: $pkg"
 		    echo "Verifying installation of: $pkg"
 		    debsums -s $pkg 2>/dev/null
 		fi
 	    done
 	    ;;
 	rpm)
 	    rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n'
 	    for pkg in $*; do
 		if rpm -q $pkg >/dev/null 2>&1 ; then
 		    debug "Verifying installation of: $pkg"
 		    echo "Verifying installation of: $pkg"
 		    rpm --verify $pkg
 		fi
 	    done
 	    ;;
 	pkg_info)
 	    pkg_info
 	    ;;
 	pkginfo)
 	    pkginfo | awk '{print $3}'  # format?
 	    ;;
     esac
 }
 
 getbacktraces() {
     debug "Looking for backtraces: $*"
     flist=$(
 	for f in `find_files "$CRM_CORE_DIRS" $1 $2`; do
 	    bf=`basename $f`
 	    test `expr match $bf core` -gt 0 &&
 	    echo $f
 	done)
     if [ "$flist" ]; then
 	log "Found core files: `echo $flist | tr '\n' ' '`"
 	getbt "$flist"
     fi
 }
 
 getpeinputs() {
     flist=$(
 	find_files $PE_STATE_DIR $1 $2 | sed "s,`dirname $PE_STATE_DIR`/,,g"
     )
     if [ "$flist" ]; then
 	(cd `dirname $PE_STATE_DIR` && tar cf - $flist) | (cd $3 && tar xf -)
 	debug "found `echo $flist | wc -w` pengine input files in $PE_STATE_DIR"
     fi
 }
 
 #
 # some basic system info and stats
 #
 sys_info() {
     cluster=$1; shift
     echo "Platform: `uname`"
     echo "Kernel release: `uname -r`"
     echo "Architecture: `uname -m`"
     if [ `uname` = Linux ]; then
 	echo "Distribution: `distro`"
     fi
 
     cibadmin --version 2>&1
     cibadmin -! 2>&1
     case $1 in
 	openais)
 	    : echo "openais version: how?"
 	    ;;
 	corosync)
 	    /usr/sbin/corosync -v 2>&1
 	    ;;
 	heartbeat)
 	    heartbeat version: `$CRM_DAEMON_DIR/heartbeat -V`  2>&1
 	    ;;
     esac
 
-    # TODO: Get cluster-glue build version
+    # Cluster glue version hash (if available)
+    stonith -V
+    
+    # Resource agents version hash
     echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`"
 
     pkg_ver $*
 }
 
 sys_stats() {
     set -x
     uname -n
     uptime
     ps axf
     ps auxw
     top -b -n 1
     ifconfig -a
     ip addr list
     netstat -i
     arp -an
     test -d /proc && {
 	cat /proc/cpuinfo
     }
     lsscsi
     lspci
     mount
     df
     set +x
 }
 
 dlm_dump() {
     if which dlm_tool >/dev/null 2>&1 ; then
 	echo NOTICE - Lockspace overview:
 	dlm_tool ls
 	dlm_tool ls | grep name |
 	while read X N ; do 
 	    echo NOTICE - Lockspace $N:
 	    dlm_tool lockdump $N
 	done
 	echo NOTICE - Lockspace history:
 	dlm_tool dump
     fi
 }
 
 iscfvarset() {
     test "`getcfvar $1 $2`"
 }
 
 iscfvartrue() {
     getcfvar $1 $2 $3 | egrep -qsi "^(true|y|yes|on|1)"
 }
 
 uselogd() {
     cf_file=$2
     case $1 in
 	heartbeat)
 	    iscfvartrue $1 use_logd $cf_file && return 0  # if use_logd true
 	    iscfvarset $1 logfacility $cf_file ||
 	        iscfvarset $1 logfile $cf_file ||
 	        iscfvarset $1 debugfile $cf_file ||
 	        return 0  # or none of the log options set
 	    false
 	    ;;
 	*)
 	    iscfvartrue $1 use_logd $cf_file
 	    ;;
     esac
 }
 
 get_logfile() {
     cf_type=$1
     cf_file="$2"
     cf_logd="$3"
     facility_var="logfacility"
 
     if [ -f "$cf_logd" ]; then
 	if uselogd; then
 	    cf_file="$cf_logd"
 	    cf_type="logd"
 	fi
     fi
 
     debug "Reading $cf_type log settings"
     case $cf_type in
 	openais|corosync)
 	    debug "Reading log settings from $cf_file"
 	    if iscfvartrue $cf_type to_syslog $cf_file; then
 		facility_var=syslog_facility
 	    elif iscfvartrue $cf_type to_file $cf_file; then
 		logfile=`getcfvar $cf_type logfile $cf_file syslog_facility`
 	    fi
 	    ;;
 	heartbeat|logd)
 	    debug "Reading log settings from $cf_file"
 	    if
 		iscfvartrue $cf_type debug $cf_file
 	    then
 		logfile=`getcfvar $cf_type debugfile $cf_file`
 	    else
 		logfile=`getcfvar $cf_type logfile $cf_file`
 	    fi
 	    ;;
 	*) debug "Unknown cluster type: $cf_type"
 	   echo "/var/log/messages"
 	   ;;
     esac
 
     if [ "x$logfile" != "x" -a -f "$logfile" ]; then
 	echo $logfile
 
     else
 	facility=`getcfvar $cf_type $facility_var $cf_file`
 	[ "" = "$facility" ] && facility="daemon"
 	if [ "none" = "$facility" ]; then
 	    fatal "No logging is configured"
 	fi
 	msg="Mark:pcmk:`perl -e 'print time()'`"
 	logger -p $facility.info $msg >/dev/null 2>&1
 	findmsg 1 "$msg"
     fi
 }
 
 essential_files() {
 	cat<<EOF
 d $HA_STATE_DIR 0755 root root
 d $PE_STATE_DIR 0750 hacluster haclient
 d $CRM_STATE_DIR 0750 hacluster haclient
 EOF
     case $1 in
 	openais|corosync)
 	    ;;
 	heartbeat)
 	cat<<EOF
 d $HA_STATE_DIR/ccm 0750 hacluster haclient
 EOF
 	    ;;
     esac
 }
 
 debug "Initializing $host subdir"
 mkdir -p $REPORT_HOME/$host
 cd $REPORT_HOME/$host
 
 case $CLUSTER in
     any) cluster=`get_cluster_type`;;
     *) cluster=$CLUSTER;;
 esac
 
 logd_cf=`findlogdcf`
 cluster_cf=`find_cluster_cf $cluster`
 if [ $SEARCH_LOGS = 1 ]; then
     logfile=`get_logfile $cluster "$cluster_cf" "$logd_cf"`
 fi
 debug "Config: $cluster $cluster_cf $logd_cf $logfile"
 
 sys_info $cluster $PACKAGES > $SYSINFO_F
 essential_files $cluster | check_perms  > $PERMISSIONS_F 2>&1
-getconfig "$REPORT_HOME/$host" "$cluster_cf" "$logd_cf" "$HA_STATE_DIR/crm/$CIB_F" "$HA_STATE_DIR/hostcache"
-
-dlm_dump  > $DLM_DUMP_F 2>&1
-sys_stats > $SYSSTATS_F 2>&1
+getconfig "$REPORT_HOME/$host" "$cluster_cf" "$logd_cf" "$HA_STATE_DIR/crm/$CIB_F" "$HA_STATE_DIR/hostcache" "/etc/drbd.conf" "/etc/drbd.d"
 
 getpeinputs    $LOG_START $LOG_END $REPORT_HOME/$host
 getbacktraces  $LOG_START $LOG_END > $REPORT_HOME/$host/$BT_F
 
 dc=`crm_mon -1 2>/dev/null | awk '/Current DC/ {print $3}'`
 if [ "$host" = "$dc" ]; then
     echo "$host" > DC
 fi
 
+dlm_dump  > $DLM_DUMP_F 2>&1
+sys_stats > $SYSSTATS_F 2>&1
+
 debug "Sanitizing files"
 #
 # replace sensitive info with '****'
 #
 for f in `basename $cluster_cf` $CIB_F $CIB_TXT_F $CIB_F.live pengine/*; do
     if [ -f "$f" ]; then
 	sanitize $f
     fi
 done
 
 # Grab logs
 #debug "Gathering logs: $logfile $EXTRA_LOGS"
 trap '[ -z "$pattfile" ] || rm -f "$pattfile"' 0
 pattfile=`mktemp` || fatal "cannot create temporary files"
 for p in $LOG_PATTERNS; do
     echo "$p"
 done > $pattfile
 
 for l in $logfile $EXTRA_LOGS; do
     b=`basename $l`
     if [ ! -f "$l" ]; then
 	# Not a file
 	continue
 
     elif [ -f "$b" ]; then
 	# We already have it
 	continue
     fi
     dumplogset "$l" $LOG_START $LOG_END > "$b"
     echo "Log patterns $host:"  > $ANALYSIS_F
     cat $b | grep -f $pattfile >> $ANALYSIS_F
 done
 
 rm -f $pattfile
 trap "" 0
 
 # Purge files containing no information 
 for f in `ls -1`; do
     if [ -d "$f" ]; then
 	continue
     elif [ ! -s "$f" ]; then
 	debug "Removing empty file: $f"
 	rm -f $f
     fi
 done
 
 # Parse for events
 for l in $logfile $EXTRA_LOGS; do
     node_events `basename $logfile` > $EVENTS_F
 
     # Link the first logfile to a standard name if it doesn't yet exist
     if [ ! -e $HALOG_F ]; then
 	ln -s `basename $l` $HALOG_F 
     fi
 done
 
 if [ "$REPORT_MASTER" != "$host" ]; then
     debug "Streaming report back to $REPORT_MASTER"
     (cd $REPORT_HOME && tar cf - $host)
     if [ "$REMOVE" = "1" ]; then
 	cd
 	rm -rf $REPORT_HOME
     fi
 fi
 
diff --git a/tools/report.common b/tools/report.common
index b700a46df0..991c165110 100644
--- a/tools/report.common
+++ b/tools/report.common
@@ -1,629 +1,633 @@
  # Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
  #			Almost everything as part of hb_report
  # Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
  #			Cleanups, refactoring, extensions
  # 
  # 
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public
  # License as published by the Free Software Foundation; either
  # version 2.1 of the License, or (at your option) any later version.
  # 
  # This software is distributed in the hope that it will be useful,
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  # General Public License for more details.
  # 
  # You should have received a copy of the GNU General Public
  # License along with this library; if not, write to the Free Software
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  #
 
 host=`uname -n`
 shorthost=`echo $host | sed s:\\\\..*::`
 if [ -z $verbose ]; then
     verbose=0
 fi
 
 # Target Files
 EVENTS_F=events.txt
 ANALYSIS_F=analysis.txt
 DESCRIPTION_F=description.txt
 HALOG_F=cluster-log.txt
 BT_F=backtraces.txt
 SYSINFO_F=sysinfo.txt
 SYSSTATS_F=sysstats.txt
 DLM_DUMP_F=dlm_dump.txt
 CRM_MON_F=crm_mon.txt
 MEMBERSHIP_F=members.txt
 HB_UUID_F=hb_uuid.txt
 HOSTCACHE=hostcache
 CRM_VERIFY_F=crm_verify.txt
 PERMISSIONS_F=permissions.txt
 CIB_F=cib.xml
 CIB_TXT_F=cib.txt
 
 EVENT_PATTERNS="
 state		do_state_transition
 membership	pcmk_peer_update.*(lost|memb):
-quorum		crmd.*ais.disp.*quorum.(lost|ac?quir)
+quorum		crmd.*crm_update_quorum|crmd.*ais.disp.*quorum.(lost|ac?quir)
 pause		Process.pause.detected
 resources	lrmd.*rsc:(start|stop)
-stonith		stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=)
-start_stop	Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete
+stonith		te_fence_node|stonith-ng.*log_oper.*report|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=)
+start_stop	Starting.heartbeat|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete
 "
 
 PACKAGES="pacemaker pacemaker-libs libpacemaker3 
 pacemaker-pygui pacemaker-pymgmt pymgmt-client
 openais libopenais2 libopenais3 corosync libcorosync4
 resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord
 heartbeat heartbeat-common heartbeat-resources libheartbeat2
 ocfs2-tools ocfs2-tools-o2cb ocfs2console
 ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace
 drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace
 drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen
 lvm2 lvm2-clvm cmirrord
 libdlm libdlm2 libdlm3
 hawk ruby lighttpd
 kernel-default kernel-pae kernel-xen
 glibc
 "
 
 #
 # keep the user posted
 #
 
 log() {
     printf "%-10s  $*\n" "$shorthost:" 1>&2
 }
 
 debug() {
     if [ $verbose -gt 0 ]; then
 	log "Debug: $*"
     fi
 }
 
 info() {
     log "$*"
 }
 
 warning() {
     log "WARN: $*"
 }
 
 fatal() {
     log "ERROR: $*"
     exit 1
 }
 
 detect_host() {
     local_state_dir=/var
     if [ -d $local_state_dir/run/crm ]; then
 	CRM_STATE_DIR=$local_state_dir/run/crm
     else
 	for d in `find / -type d -name run`; do
 	    if [ -d $d/crm ]; then
 		CRM_STATE_DIR=$d/crm
 		local_state_dir=`dirname $d`
 	    fi
 	done
     fi
     if [ ! -d $CRM_STATE_DIR ]; then
 	fatal "Non-standard Pacemaker installation: State directory not found"
     fi
 
     debug "Machine state directory: $local_state_dir"
     debug "State files located in: $CRM_STATE_DIR"
 
     if [ -d $local_state_dir/lib/pengine ]; then
 	PE_STATE_DIR=$local_state_dir/lib/pengine
     else
 	for d in `find / -type d -name pengine`; do
 	    PE_STATE_DIR=$d
 	    break
 	done
     fi
     if [ -z $PE_STATE_DIR ]; then
 	fatal "Non-standard Pacemaker installation: Policy Engine directory not found"
     fi
     debug "PE files located in: $PE_STATE_DIR"
 
     HA_STATE_DIR=$local_state_dir/lib/heartbeat
     if [ ! -d $HA_STATE_DIR ]; then
 	# TODO: Go looking
 	fatal "Non-standard Heartbeat installation: Heartbeat state directory not found"
     fi
     debug "Heartbeat state files located in: $HA_STATE_DIR"
 
 
     CRM_CORE_DIRS=""
     for d in $HA_STATE_DIR/cores $local_state_dir/lib/corosync $local_state_dir/lib/openais; do
 	if [ -d $d ]; then
 	    CRM_CORE_DIRS="$CRM_CORE_DIRS $d"
 	fi
     done
     debug "Core files located under: $CRM_CORE_DIRS"
 
     for d in /usr/lib/heartbeat /usr/lib64/heartbeat; do
 	if [ -f $d/crmd ]; then
 	    CRM_DAEMON_DIR=$d
 	    break
 	fi
     done
     if [ ! -d $CRM_DAEMON_DIR ]; then
 	for d in `find / -type d -name heartbeat`; do
 	    if [ -f $d/crmd ]; then
 		CRM_DAEMON_DIR=$d
 		break
 	    fi
 	done
     fi
 
     if [ ! -d $CRM_DAEMON_DIR ]; then
 	for f in `find / -type f -name crmd`; do
 	    if [ -f $f ]; then
 		CRM_DAEMON_DIR=`basename $f`
 	    fi
 	done
     fi
     if [ ! -d $CRM_DAEMON_DIR ]; then
 	fatal "Non-standard Pacemaker installation: daemons not found"
     fi
     debug "Pacemaker daemons located under: $CRM_DAEMON_DIR"
 }
 
 time2str() {
 	perl -e "use POSIX; print strftime('%x %X',localtime($1));"
 }
 
 get_time() {
 	perl -e "\$time='$*';" -e '
 	eval "use Date::Parse";
 	if (!$@) {
 		print str2time($time);
 	} else {
 		eval "use Date::Manip";
 		if (!$@) {
 			print UnixDate(ParseDateString($time), "%s");
 		}
 	}
 	'
 }
 
 get_time_() {
     warning "No time format specified for: $*"
 }
 
 get_time_syslog() {
     awk '{print $1,$2,$3}'
 }
 
 get_time_legacy() {
     awk '{print $2}' | sed 's/_/ /'
 }
 
 get_time_format() {
     t=0 l="" func=""
     trycnt=10
     while [ $trycnt -gt 0 ] && read l; do
 	t=$(get_time `echo $l | get_time_syslog`)
 	if [ "$t" ]; then
 	    func="syslog"
 	    break
 	fi
 	t=$(get_time `echo $l | get_time_legacy`)
 	if [ "$t" ]; then
 	    func="legacy"
 	    break
 	fi
 	trycnt=$(($trycnt-1))
     done
     #debug "Logfile uses the $func time format"
     echo $func
 }
 
 linetime() {
     format=`get_time_format < $1`
     l=`tail -n +$2 $1 | grep ":[0-5][0-9]:" | head -1 | get_time_$format`
     get_time "$l"
 }
 
 # Find pattern in a logfile somewhere
 # Return $max ordered results by age (newest first)
 findmsg() {
 	max=$1
 	pattern=$2
 	logfiles=""
 	syslogdirs="/var/log /var/logs /var/syslog /var/adm /var/log/ha /var/log/cluster"
 
 	for d in $syslogdirs; do
 	    if [ -d $d ]; then
 		logfiles=`grep -l -e "$pattern" $d/*` && break
 	    fi
 	done 2>/dev/null
 
 	if [ "x$logfiles" != "x" ]; then
 	    list=`ls -t $logfiles | head -n $max | tr '\n' ' '`
 	    echo $list
 	    debug "Pattern \'$pattern\' found in: [ $list ]"
 	else
 	    debug "Pattern \'$pattern\' not found anywhere"
 	fi
 }
 
 node_events() {
     Epatt=`echo "$EVENT_PATTERNS" |
       while read title p; do [ -n "$p" ] && echo -n "|$p"; done |
       sed 's/.//'
       `
     grep -E "$Epatt" $1
 }
 
 pickfirst() {
     for x; do
 	which $x >/dev/null 2>&1 && {
 	    echo $x
 	    return 0
 	}
     done
     return 1
 }
 
 shrink() {
     src=$*
     target=$1.tar
     tar_options=cf
 
     variant=`pickfirst bzip2 gzip false`
     case $variant in
 	bz*)
 	    tar_options="jcf"
 	    target="$target.bz2"
 	    ;;
 	gz*)
 	    tar_options="zcf"
 	    target="$target.gz"
 	    ;;
 	*)
 	    warning "Could not find a compression program, the resulting tarball may be huge"
 	    ;;
     esac
 
     tar $tar_options $target $src >/dev/null 2>&1
     echo $target
 }
 
 findln_by_time() {
     local logf=$1
     local tm=$2
     local first=1
     local last=`wc -l < $logf`
     while [ $first -le $last ]; do
 	mid=$((($last+$first)/2))
 	trycnt=10
 	while [ $trycnt -gt 0 ]; do
 	    tmid=`linetime $logf $mid`
 	    [ "$tmid" ] && break
 	    warning "cannot extract time: $logf:$mid; will try the next one"
 	    trycnt=$(($trycnt-1))
 			# shift the whole first-last segment
 	    first=$(($first-1))
 	    last=$(($last-1))
 	    mid=$((($last+$first)/2))
 	done
 	if [ -z "$tmid" ]; then
 	    warning "giving up on log..."
 	    return
 	fi
 	if [ $tmid -gt $tm ]; then
 	    last=$(($mid-1))
 	elif [ $tmid -lt $tm ]; then
 	    first=$(($mid+1))
 	else
 	    break
 	fi
     done
     echo $mid
 }
 
 dumplog() {
     local logf=$1
     local from_line=$2
     local to_line=$3
     [ "$from_line" ] ||
     return
     tail -n +$from_line $logf |
     if [ "$to_line" ]; then
 	head -$(($to_line-$from_line+1))
     else
 	cat
     fi
 }
 
 #
 # find log/set of logs which are interesting for us
 #
 #
 # find log slices
 #
 
 find_decompressor() {
 	if echo $1 | grep -qs 'bz2$'; then
 		echo "bzip2 -dc"
 	elif echo $1 | grep -qs 'gz$'; then
 		echo "gzip -dc"
 	else
 		echo "cat"
 	fi
 }
 #
 # check if the log contains a piece of our segment
 #
 is_our_log() {
 	local logf=$1
 	local from_time=$2
 	local to_time=$3
 
 	local cat=`find_decompressor $logf`
 	local format=`$cat $logf | get_time_format`
 	local first_time=$(get_time "`$cat $logf | head -1 | get_time_$format`")
 	local last_time=$(get_time "`$cat $logf | tail -1 | get_time_$format`")
 
 	if [ x = "x$first_time" -o x = "x$last_time" ]; then
 		return 0 # skip (empty log?)
 	fi
 	if [ $from_time -gt $last_time ]; then
 		# we shouldn't get here anyway if the logs are in order
 		return 2 # we're past good logs; exit
 	fi
 	if [ $from_time -ge $first_time ]; then
 		return 3 # this is the last good log
 	fi
 	# have to go further back
 	if [ x = "x$to_time" -o $to_time -ge $first_time ]; then
 		return 1 # include this log
 	else
 		return 0 # don't include this log
 	fi
 }
 #
 # go through archived logs (timewise backwards) and see if there
 # are lines belonging to us
 # (we rely on untouched log files, i.e. that modify time
 # hasn't been changed)
 #
 arch_logs() {
 	local logf=$1
 	local from_time=$2
 	local to_time=$3
 
 	# look for files such as: ha-log-20090308 or
 	# ha-log-20090308.gz (.bz2) or ha-log.0, etc
 	ls -t $logf $logf*[0-9z] 2>/dev/null |
 	while read next_log; do
 		is_our_log $next_log $from_time $to_time
 		case $? in
 		0) ;;  # noop, continue
 		1) echo $next_log  # include log and continue
 			debug "Found log $next_log"
 			;;
 		2) break;; # don't go through older logs!
 		3) echo $next_log  # include log and continue
 			debug "Found log $next_log"
 			break
 			;; # don't go through older logs!
 		esac
 	done
 }
 
 #
 # print part of the log
 #
 drop_tmp_file() {
 	[ -z "$tmp" ] || rm -f "$tmp"
 }
 
 print_logseg() {
 	local logf=$1
 	local from_time=$2
 	local to_time=$3
 
 	# uncompress to a temp file (if necessary)
 	local cat=`find_decompressor $logf`
 	if [ "$cat" != "cat" ]; then
 		tmp=`mktemp`
 		$cat $logf > $tmp
 		trap drop_tmp_file 0
 		sourcef=$tmp
 	else
 		sourcef=$logf
 		tmp=""
 	fi
 
 	if [ "$from_time" = 0 ]; then
 		FROM_LINE=1
 	else
 		FROM_LINE=`findln_by_time $sourcef $from_time`
 	fi
 	if [ -z "$FROM_LINE" ]; then
 		warning "couldn't find line for time $from_time; corrupt log file?"
 		return
 	fi
 
 	TO_LINE=""
 	if [ "$to_time" != 0 ]; then
 		TO_LINE=`findln_by_time $sourcef $to_time`
 		if [ -z "$TO_LINE" ]; then
 			warning "couldn't find line for time $to_time; corrupt log file?"
 			return
 		fi
 	fi
-	dumplog $sourcef $FROM_LINE $TO_LINE
-	log "Including segment [$FROM_LINE-$TO_LINE] from $logf"
+	if [ $FROM_LINE -lt $TO_LINE ]; then
+	    dumplog $sourcef $FROM_LINE $TO_LINE
+	    log "Including segment [$FROM_LINE-$TO_LINE] from $logf"
+	else
+	    log "Segment from $logf finished before it started, line: $FROM_LINE to $TO_LINE"
+	fi
 	drop_tmp_file
 	trap "" 0
 }
 
 #
 # find log/set of logs which are interesting for us
 #
 dumplogset() {
 	local logf=$1
 	local from_time=$2
 	local to_time=$3
 
 	local logf_set=`arch_logs $logf $from_time $to_time`
 	if [ x = "x$logf_set" ]; then
 		return
 	fi
 
 	local num_logs=`echo "$logf_set" | wc -l`
 	local oldest=`echo $logf_set | awk '{print $NF}'`
 	local newest=`echo $logf_set | awk '{print $1}'`
 	local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'`
 
 	# the first logfile: from $from_time to $to_time (or end)
 	# logfiles in the middle: all
 	# the last logfile: from beginning to $to_time (or end)
 	case $num_logs in
 	1) print_logseg $newest $from_time $to_time;;
 	*)
 		print_logseg $oldest $from_time 0
 		for f in $mid_logfiles; do
 		    `find_decompressor $f` $f
 		    debug "including complete $f logfile"
 		done
 		print_logseg $newest 0 $to_time
 	;;
 	esac
 }
 
 # cut out a stanza
 getstanza() {
 	awk -v name="$1" '
 	!in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start
 		if ($1 == name)
 			in_stanza = 1
 	}
 	in_stanza { print }
 	in_stanza && NF==1 && $1 == "}" { exit }
 	'
 }
 # supply stanza in $1 and variable name in $2
 # (stanza is optional)
 getcfvar() {
     cf_type=$1; shift;
     cf_var=$1; shift;
     cf_file=$*
 
     [ -f "$cf_file" ] || return
     case $cf_type in
 	corosync|openais)
 	    sed 's/#.*//' < $cf_file |
 	        if [ $# -eq 2 ]; then
 			getstanza "$cf_var"
 			shift 1
 		else
 			cat
 		fi |
 		awk -v varname="$cf_var" '
 		NF==2 && match($1,varname":$")==1 { print $2; exit; }
 		'
 	;;
 	heartbeat)
 	    sed 's/#.*//' < $cf_file |
 		grep -w "^$cf_var" |
 		sed 's/^[^[:space:]]*[[:space:]]*//'
 
 	    ;;
 	logd)
 	    sed 's/#.*//' < $cf_file |
 		grep -w "^$cf_var" |
 		sed 's/^[^[:space:]]*[[:space:]]*//'
 	    
 	    ;;
     esac
 }
 
 #
 # figure out the cluster type, depending on the process list
 # and existence of configuration files
 #
 get_cluster_type() {
     if 
 	ps -ef | egrep -qs '[c]orosync'
     then
 	stack="corosync"
     elif
 	ps -ef | egrep -qs '[a]isexec'
     then
 	stack="openais"
     elif
-	ps -ef | egrep -qs '[h]eartbeat'
+	ps -ef | grep -v -e grep -e "eartbeat/[clasp]" | egrep -qs '[h]eartbeat'
     then
 	stack="heartbeat"
 
     # Now we're guessing...
 
     # TODO: Technically these could be anywhere :-/
     elif [ -f /etc/corosync/corosync.conf ]; then
 	stack="corosync"
 
     elif [ -f /etc/ais/openais.conf ]; then
 	stack="openais"
 
     else
 	stack="heartbeat"
     fi
 
-    debug "Detected the $stack cluster stack"
+    debug "Detected the '$stack' cluster stack"
     echo $stack
 }
 
 find_cluster_cf() {
     case $1 in
 	corosync)
 	    best_size=0
 	    best_file=""
 
 	    # TODO: Technically these could be anywhere :-/
 	    for cf in /etc/ais/openais.conf /etc/corosync/corosync.conf; do
 		if [ -f $cf ]; then
 		    size=`wc -l $cf | awk '{print $1}'`
 		    if [ $size -gt $best_size ]; then
 			best_size=$size
 			best_file=$cf
 		    fi
 		fi
 	    done
 	    echo "$best_file"
 	    ;;
 	openais)
 	    # TODO: Technically it could be anywhere :-/
 	    cf="/etc/ais/openais.conf"
 	    if [ -f $cf ]; then
 		echo "$cf"
 	    fi
 	    ;;
 	heartbeat)
 	    cf="/etc/ha.d/ha.cf"
 	    if [ -f $cf ]; then
 		echo "$cf"
 	    fi
 	    ;;
 	*)
 	    warning "Unknown cluster type: $1"
 	    ;;
     esac
 }
 
 #
 # check for the major prereq for a) parameter parsing and b)
 # parsing logs
 #
 t=`get_time "12:00"`
 if [ "$t" = "" ]; then
 	fatal "please install the perl Date::Parse module"
 fi
 
 detect_host