diff --git a/crm/cib/main.c b/crm/cib/main.c
index 0d41590e67..774ed77140 100644
--- a/crm/cib/main.c
+++ b/crm/cib/main.c
@@ -1,621 +1,621 @@
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include <lha_internal.h>
 
 #include <sys/param.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 
 #include <hb_api.h>
 #include <heartbeat.h>
 #include <clplumbing/cl_misc.h>
 #include <clplumbing/uids.h>
 #include <clplumbing/coredumps.h>
 #include <clplumbing/Gmain_timeout.h>
 
 /* #include <lha_internal.h> */
 #include <ocf/oc_event.h>
 /* #include <ocf/oc_membership.h> */
 
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 #include <crm/common/ipc.h>
 #include <crm/common/ctrl.h>
 #include <crm/common/xml.h>
 #include <crm/common/msg.h>
 
 #include <cibio.h>
 #include <callbacks.h>
 
 #if HAVE_LIBXML2
 #  include <libxml/parser.h>
 #endif
 
 #ifdef HAVE_GETOPT_H
 #  include <getopt.h>
 #endif
 
 extern int init_remote_listener(int port);
 extern gboolean ccm_connect(void);
 
 gboolean cib_shutdown_flag = FALSE;
 gboolean stand_alone = FALSE;
 gboolean per_action_cib = FALSE;
 enum cib_errors cib_status = cib_ok;
 
 extern char *ccm_transition_id;
 extern void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int );
 
 GMainLoop*  mainloop = NULL;
 const char* crm_system_name = CRM_SYSTEM_CIB;
 const char* cib_root = WORKING_DIR;
 char *cib_our_uname = NULL;
 oc_ev_t *cib_ev_token;
 gboolean preserve_status = FALSE;
 gboolean cib_writes_enabled = TRUE;
 
 void usage(const char* cmd, int exit_status);
 int cib_init(void);
 gboolean cib_register_ha(ll_cluster_t *hb_cluster, const char *client_name);
 gboolean cib_shutdown(int nsig, gpointer unused);
 void cib_ha_connection_destroy(gpointer user_data);
 gboolean startCib(const char *filename);
 extern gboolean cib_msg_timeout(gpointer data);
 extern int write_cib_contents(gpointer p);
 
 GHashTable *client_list    = NULL;
 GHashTable *ccm_membership = NULL;
 GHashTable *peer_hash = NULL;
 
 ll_cluster_t *hb_conn = NULL;
 GTRIGSource *cib_writer = NULL;
 
 char *channel1 = NULL;
 char *channel2 = NULL;
 char *channel3 = NULL;
 char *channel4 = NULL;
 char *channel5 = NULL;
 
 #define OPTARGS	"aswr:V?"
 void cib_cleanup(void);
 
 static void
 cib_diskwrite_complete(gpointer userdata, int status, int signo, int exitcode)
 {
 	if(exitcode != LSB_EXIT_OK || signo != 0 || status != 0) {
 		crm_err("Disk write failed: status=%d, signo=%d, exitcode=%d",
 			status, signo, exitcode);
 
 		if(cib_writes_enabled) {
 			crm_err("Disabling disk writes after write failure");
 			cib_writes_enabled = FALSE;
 		}
 		
 	} else {
 		crm_debug_2("Disk write passed");
 	}
 }
 
 int
 main(int argc, char ** argv)
 {
 	int flag;
 	int rc = 0;
 	int argerr = 0;
 #ifdef HAVE_GETOPT_H
 	int option_index = 0;
 	static struct option long_options[] = {
 		{"per-action-cib", 0, 0, 'a'},
 		{"stand-alone",    0, 0, 's'},
 		{"disk-writes",    0, 0, 'w'},
 
 		{"cib-root",    1, 0, 'r'},
 
 		{"verbose",     0, 0, 'V'},
 		{"help",        0, 0, '?'},
 
 		{0, 0, 0, 0}
 	};
 #endif
 	
 	crm_log_init(crm_system_name, LOG_INFO, TRUE, FALSE, 0, NULL);
 	G_main_add_SignalHandler(
 		G_PRIORITY_HIGH, SIGTERM, cib_shutdown, NULL, NULL);
 	
 	cib_writer = G_main_add_tempproc_trigger(			
 		G_PRIORITY_LOW, write_cib_contents, "write_cib_contents",
 		NULL, NULL, NULL, cib_diskwrite_complete);
 
 	EnableProcLogging();
-	set_sigchld_proctrack(G_PRIORITY_HIGH);
+	set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME);
 
 	client_list = g_hash_table_new(g_str_hash, g_str_equal);
 	ccm_membership = g_hash_table_new_full(
 		g_str_hash, g_str_equal, g_hash_destroy_str, NULL);
 	peer_hash = g_hash_table_new_full(
 		g_str_hash, g_str_equal,g_hash_destroy_str, g_hash_destroy_str);
 	
 	while (1) {
 #ifdef HAVE_GETOPT_H
 		flag = getopt_long(argc, argv, OPTARGS,
 				   long_options, &option_index);
 #else
 		flag = getopt(argc, argv, OPTARGS);
 #endif
 		if (flag == -1)
 			break;
 		
 		switch(flag) {
 			case 'V':
 				alter_debug(DEBUG_INC);
 				break;
 			case 's':
 				stand_alone = TRUE;
 				preserve_status = TRUE;
 				cib_writes_enabled = FALSE;
 				cl_log_enable_stderr(1);
 				break;
 			case '?':		/* Help message */
 				usage(crm_system_name, LSB_EXIT_OK);
 				break;
 			case 'f':
 				per_action_cib = TRUE;
 				break;
 			case 'w':
 				cib_writes_enabled = TRUE;
 				break;
 			case 'r':
 				cib_root = optarg;
 				break;
 			default:
 				++argerr;
 				break;
 		}
 	}
 
 	crm_info("Retrieval of a per-action CIB: %s",
 		 per_action_cib?"enabled":"disabled");
 	
 	if (optind > argc) {
 		++argerr;
 	}
     
 	if (argerr) {
 		usage(crm_system_name,LSB_EXIT_GENERIC);
 	}
     
 	/* read local config file */
 	rc = cib_init();
 
 	CRM_CHECK(g_hash_table_size(client_list) == 0,
 		  crm_warn("Not all clients gone at exit"));
 	cib_cleanup();
 
 	if(hb_conn) {
 		hb_conn->llc_ops->delete(hb_conn);
 	}
 	
 	crm_info("Done");
 	return rc;
 }
 
 void
 cib_cleanup(void) 
 {
 	g_hash_table_destroy(ccm_membership);	
 	g_hash_table_destroy(client_list);
 	g_hash_table_destroy(peer_hash);
 	crm_free(ccm_transition_id);
 	crm_free(cib_our_uname);
 #if HAVE_LIBXML2
 	xmlCleanupParser();
 #endif
 	crm_free(channel1);
 	crm_free(channel2);
 	crm_free(channel3);
 	crm_free(channel4);
 	crm_free(channel5);
 }
 
 unsigned long cib_num_ops = 0;
 const char *cib_stat_interval = "10min";
 unsigned long cib_num_local = 0, cib_num_updates = 0, cib_num_fail = 0;
 unsigned long cib_bad_connects = 0, cib_num_timeouts = 0;
 longclock_t cib_call_time = 0;
 
 gboolean cib_stats(gpointer data);
 
 gboolean
 cib_stats(gpointer data)
 {
 	int local_log_level = LOG_DEBUG;
 	static unsigned long last_stat = 0;
 	unsigned int cib_calls_ms = 0;
 	static unsigned long cib_stat_interval_ms = 0;
 
 	if(cib_stat_interval_ms == 0) {
 		cib_stat_interval_ms = crm_get_msec(cib_stat_interval);
 	}
 	
 	cib_calls_ms = longclockto_ms(cib_call_time);
 
 	if((cib_num_ops - last_stat) > 0) {
 		unsigned long calls_diff = cib_num_ops - last_stat;
 		double stat_1 = (1000*cib_calls_ms)/calls_diff;
 		
 		local_log_level = LOG_INFO;
 		do_crm_log(local_log_level,
 			      "Processed %lu operations"
 			      " (%.2fus average, %lu%% utilization) in the last %s",
 			      calls_diff, stat_1, 
 			      (100*cib_calls_ms)/cib_stat_interval_ms,
 			      cib_stat_interval);
 	}
 	
 	do_crm_log(local_log_level+1,
 		      "\tDetail: %lu operations (%ums total)"
 		      " (%lu local, %lu updates, %lu failures,"
 		      " %lu timeouts, %lu bad connects)",
 		      cib_num_ops, cib_calls_ms, cib_num_local, cib_num_updates,
 		      cib_num_fail, cib_bad_connects, cib_num_timeouts);
 
 	last_stat = cib_num_ops;
 	cib_call_time = 0;
 	return TRUE;
 }
 
 static void
 ccm_connection_destroy(gpointer user_data)
 {
     crm_err("CCM connection failed... blocking while we reconnect");
     CRM_ASSERT(ccm_connect());
     return;
 }
 
 extern int current_instance;
 
 gboolean ccm_connect(void) 
 {
     gboolean did_fail = TRUE;
     int num_ccm_fails = 0;
     int max_ccm_fails = 30;
     int ret;
     int cib_ev_fd;
     
     while(did_fail) {
 	did_fail = FALSE;
 	crm_info("Registering with CCM...");
 	ret = oc_ev_register(&cib_ev_token);
 	if (ret != 0) {
 	    did_fail = TRUE;
 	}
 	
 	if(did_fail == FALSE) {
 	    crm_debug_3("Setting up CCM callbacks");
 	    ret = oc_ev_set_callback(
 		cib_ev_token, OC_EV_MEMB_CLASS,
 		cib_ccm_msg_callback, NULL);
 	    if (ret != 0) {
 		crm_warn("CCM callback not set");
 		did_fail = TRUE;
 	    }
 	}
 	if(did_fail == FALSE) {
 	    oc_ev_special(cib_ev_token, OC_EV_MEMB_CLASS, 0);
 	    
 	    crm_debug_3("Activating CCM token");
 	    ret = oc_ev_activate(cib_ev_token, &cib_ev_fd);
 	    if (ret != 0){
 		crm_warn("CCM Activation failed");
 		did_fail = TRUE;
 	    }
 	}
 	
 	if(did_fail) {
 	    num_ccm_fails++;
 	    oc_ev_unregister(cib_ev_token);
 	    
 	    if(num_ccm_fails < max_ccm_fails){
 		crm_warn("CCM Connection failed %d times (%d max)",
 			 num_ccm_fails, max_ccm_fails);
 		sleep(1);
 		
 	    } else {
 		crm_err("CCM Activation failed %d (max) times",
 			num_ccm_fails);
 		return FALSE;
 	    }
 	}
     }
     
     current_instance = 0;
     crm_debug("CCM Activation passed... all set to go!");
     G_main_add_fd(G_PRIORITY_HIGH, cib_ev_fd, FALSE,
 		  cib_ccm_dispatch, cib_ev_token,
 		  ccm_connection_destroy);
     
     return TRUE;    
 }
 
 int
 cib_init(void)
 {
 	gboolean was_error = FALSE;
 	if(startCib("cib.xml") == FALSE){
 		crm_crit("Cannot start CIB... terminating");
 		exit(1);
 	}
 
 	if(stand_alone == FALSE) {
 		hb_conn = ll_cluster_new("heartbeat");
 		if(cib_register_ha(hb_conn, CRM_SYSTEM_CIB) == FALSE) {
 			crm_crit("Cannot sign in to heartbeat... terminating");
 			exit(1);
 		}
 	} else {
 		cib_our_uname = crm_strdup("localhost");
 	}
 
 	channel1 = crm_strdup(cib_channel_callback);
 	was_error = init_server_ipc_comms(
 		channel1, cib_client_connect_null,
 		default_ipc_connection_destroy);
 
 	channel2 = crm_strdup(cib_channel_ro);
 	was_error = was_error || init_server_ipc_comms(
 		channel2, cib_client_connect_rw_ro,
 		default_ipc_connection_destroy);
 
 	channel3 = crm_strdup(cib_channel_rw);
 	was_error = was_error || init_server_ipc_comms(
 		channel3, cib_client_connect_rw_ro,
 		default_ipc_connection_destroy);
 
 	channel4 = crm_strdup(cib_channel_rw_synchronous);
 	was_error = was_error || init_server_ipc_comms(
 		channel4, cib_client_connect_rw_synch,
 		default_ipc_connection_destroy);
 
 	channel5 = crm_strdup(cib_channel_ro_synchronous);
 	was_error = was_error || init_server_ipc_comms(
 		channel5, cib_client_connect_ro_synch,
 		default_ipc_connection_destroy);
 
 	if(stand_alone) {
 		if(was_error) {
 			crm_err("Couldnt start");
 			return 1;
 		}
 		cib_is_master = TRUE;
 		
 		/* Create the mainloop and run it... */
 		mainloop = g_main_new(FALSE);
 		crm_info("Starting %s mainloop", crm_system_name);
 
 /* 		Gmain_timeout_add(crm_get_msec("10s"), cib_msg_timeout, NULL); */
 /* 		Gmain_timeout_add( */
 /* 			crm_get_msec(cib_stat_interval), cib_stats, NULL);  */
 		
 		g_main_run(mainloop);
 		return_to_orig_privs();
 		return 0;
 	}	
 	
 	if(was_error == FALSE) {
 		crm_debug_3("Be informed of CRM Client Status changes");
 		if (HA_OK != hb_conn->llc_ops->set_cstatus_callback(
 			    hb_conn, cib_client_status_callback, hb_conn)) {
 			
 			crm_err("Cannot set cstatus callback: %s",
 				hb_conn->llc_ops->errmsg(hb_conn));
 			was_error = TRUE;
 		} else {
 			crm_debug_3("Client Status callback set");
 		}
 	}
 
 	if(was_error == FALSE) {
 	    was_error = (ccm_connect() == FALSE);
 	}
 
 	if(was_error == FALSE) {
 		/* Async get client status information in the cluster */
 		crm_debug_3("Requesting an initial dump of CIB client_status");
 		hb_conn->llc_ops->client_status(
 			hb_conn, NULL, CRM_SYSTEM_CIB, -1);
 
 		/* Create the mainloop and run it... */
 		mainloop = g_main_new(FALSE);
 		crm_info("Starting %s mainloop", crm_system_name);
 
 		Gmain_timeout_add(crm_get_msec("10s"), cib_msg_timeout, NULL);
 		Gmain_timeout_add(
 			crm_get_msec(cib_stat_interval), cib_stats, NULL); 
 		
 		g_main_run(mainloop);
 		return_to_orig_privs();
 
 	} else {
 		crm_err("Couldnt start all communication channels, exiting.");
 	}
 	
 	return 0;
 }
 
 void
 usage(const char* cmd, int exit_status)
 {
 	FILE* stream;
 
 	stream = exit_status ? stderr : stdout;
 
 	fprintf(stream, "usage: %s [-%s]\n", cmd, OPTARGS);
 	fprintf(stream, "\t--%s (-%c)\t\tTurn on debug info."
 		"  Additional instances increase verbosity\n", "verbose", 'V');
 	fprintf(stream, "\t--%s (-%c)\t\tThis help message\n", "help", '?');
 	fprintf(stream, "\t--%s (-%c)\tAdvanced use only\n", "per-action-cib", 'a');
 	fprintf(stream, "\t--%s (-%c)\tAdvanced use only\n", "stand-alone", 's');
 	fprintf(stream, "\t--%s (-%c)\tAdvanced use only\n", "disk-writes", 'w');
 	fprintf(stream, "\t--%s (-%c)\t\tAdvanced use only\n", "cib-root", 'r');
 	fflush(stream);
 
 	exit(exit_status);
 }
 
 gboolean
 cib_register_ha(ll_cluster_t *hb_cluster, const char *client_name)
 {
 	const char *uname = NULL;
 	
 	crm_info("Signing in with Heartbeat");
 	if (hb_cluster->llc_ops->signon(hb_cluster, client_name)!= HA_OK) {
 		crm_err("Cannot sign on with heartbeat: %s",
 			hb_cluster->llc_ops->errmsg(hb_cluster));
 		return FALSE;
 	}
 
 	crm_debug_3("Be informed of CIB messages");
 	if (HA_OK != hb_cluster->llc_ops->set_msg_callback(
 		    hb_cluster, T_CIB, cib_peer_callback, hb_cluster)){
 		
 		crm_err("Cannot set msg callback: %s",
 			hb_cluster->llc_ops->errmsg(hb_cluster));
 		return FALSE;
 	}
 
 	crm_debug_3("Finding our node name");
 	if ((uname = hb_cluster->llc_ops->get_mynodeid(hb_cluster)) == NULL) {
 		crm_err("get_mynodeid() failed");
 		return FALSE;
 	}
 	
 	cib_our_uname = crm_strdup(uname);
 	crm_info("FSA Hostname: %s", cib_our_uname);
 
 	crm_debug_3("Adding channel to mainloop");
 	G_main_add_IPC_Channel(
 		G_PRIORITY_DEFAULT, hb_cluster->llc_ops->ipcchan(hb_cluster),
 		FALSE, cib_ha_dispatch, hb_cluster /* userdata  */,  
 		cib_ha_connection_destroy);
 
 	return TRUE;
 }
 
 void
 cib_ha_connection_destroy(gpointer user_data)
 {
 	if(cib_shutdown_flag) {
 		crm_info("Heartbeat disconnection complete... exiting");
 	} else {
 		crm_err("Heartbeat connection lost!  Exiting.");
 	}
 		
 	uninitializeCib();
 
 	if (mainloop != NULL && g_main_is_running(mainloop)) {
 		g_main_quit(mainloop);
 		
 	} else {
 		exit(LSB_EXIT_OK);
 	}
 }
 
 
 static void
 disconnect_cib_client(gpointer key, gpointer value, gpointer user_data) 
 {
 	cib_client_t *a_client = value;
 	crm_debug_2("Processing client %s/%s... send=%d, recv=%d",
 		  crm_str(a_client->name), crm_str(a_client->channel_name),
 		  (int)a_client->channel->send_queue->current_qlen,
 		  (int)a_client->channel->recv_queue->current_qlen);
 
 	if(a_client->channel->ch_status == IPC_CONNECT) {
 		a_client->channel->ops->resume_io(a_client->channel);
 		if(a_client->channel->send_queue->current_qlen != 0
 		   || a_client->channel->recv_queue->current_qlen != 0) {
 			crm_info("Flushed messages to/from %s/%s... send=%d, recv=%d",
 				crm_str(a_client->name),
 				crm_str(a_client->channel_name),
 				(int)a_client->channel->send_queue->current_qlen,
 				(int)a_client->channel->recv_queue->current_qlen);
 		}
 	}
 
 	if(a_client->channel->ch_status == IPC_CONNECT) {
 		crm_warn("Disconnecting %s/%s...",
 			 crm_str(a_client->name),
 			 crm_str(a_client->channel_name));
 		a_client->channel->ops->disconnect(a_client->channel);
 	}
 }
 
 extern gboolean cib_process_disconnect(
 	IPC_Channel *channel, cib_client_t *cib_client);
 
 gboolean
 cib_shutdown(int nsig, gpointer unused)
 {
 	if(cib_shutdown_flag == FALSE) {
 		cib_shutdown_flag = TRUE;
 		crm_debug("Disconnecting %d clients",
 			 g_hash_table_size(client_list));
 		g_hash_table_foreach(client_list, disconnect_cib_client, NULL);
 		crm_info("Disconnected %d clients",
 			 g_hash_table_size(client_list));
 		cib_process_disconnect(NULL, NULL);
 
 	} else {
 		crm_info("Waiting for %d clients to disconnect...",
 			 g_hash_table_size(client_list));
 	}
 	
 	
 	return TRUE;
 }
 
 gboolean
 startCib(const char *filename)
 {
 	gboolean active = FALSE;
 	crm_data_t *cib = readCibXmlFile(cib_root, filename, !preserve_status);
 
 	CRM_ASSERT(cib != NULL);
 	
 	if(activateCibXml(cib, TRUE) == 0) {
 		int port = 0;
 		active = TRUE;
 		ha_msg_value_int(cib, "remote_access_port", &port);
 		init_remote_listener(port);
 
 		crm_info("CIB Initialization completed successfully");
 		if(per_action_cib) {
 			uninitializeCib();
 		}
 	}
 	
 	return active;
 }
diff --git a/crm/crmd/control.c b/crm/crmd/control.c
index d314765a77..ebe3e633a8 100644
--- a/crm/crmd/control.c
+++ b/crm/crmd/control.c
@@ -1,907 +1,907 @@
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include <lha_internal.h>
 
 #include <sys/param.h>
 
 #include <heartbeat.h>
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 #include <crm/common/ctrl.h>
 #include <crm/pengine/rules.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <fsa_proto.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
 
 
 char *ipc_server = NULL;
 
 extern void crmd_ha_connection_destroy(gpointer user_data);
 
 gboolean crm_shutdown(int nsig, gpointer unused);
 gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name);
 void populate_cib_nodes(ll_cluster_t *hb_cluster, gboolean with_client_status);
 
 
 GHashTable   *ipc_clients = NULL;
 GTRIGSource  *fsa_source = NULL;
 
 /*	 A_HA_CONNECT	*/
 enum crmd_fsa_input
 do_ha_control(long long action,
 	       enum crmd_fsa_cause cause,
 	       enum crmd_fsa_state cur_state,
 	       enum crmd_fsa_input current_input,
 	       fsa_data_t *msg_data)
 {
 	gboolean registered = FALSE;
 	
 	if(action & A_HA_DISCONNECT) {
 		if(fsa_cluster_conn != NULL) {
 			set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED);
 			fsa_cluster_conn->llc_ops->signoff(
 				fsa_cluster_conn, FALSE);
 		}
 		crm_info("Disconnected from Heartbeat");
 	}
 	
 	if(action & A_HA_CONNECT) {
 		if(fsa_cluster_conn == NULL) {
 			fsa_cluster_conn = ll_cluster_new("heartbeat");
 		}
 		
 		/* make sure we are disconnected first */
 		fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE);
 		
 		registered = register_with_ha(
 			fsa_cluster_conn, crm_system_name);
 		
 		if(registered == FALSE) {
 			register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 			return I_NULL;
 		}
 		clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED);
 		crm_info("Connected to Heartbeat");
 	} 
 	
 	if(action & ~(A_HA_CONNECT|A_HA_DISCONNECT)) {
 		crm_err("Unexpected action %s in %s",
 		       fsa_action2string(action), __FUNCTION__);
 	}
 	
 	
 	return I_NULL;
 }
 
 /*	 A_SHUTDOWN	*/
 enum crmd_fsa_input
 do_shutdown(long long action,
 	    enum crmd_fsa_cause cause,
 	    enum crmd_fsa_state cur_state,
 	    enum crmd_fsa_input current_input,
 	    fsa_data_t *msg_data)
 {
 	int lpc = 0;
 	gboolean continue_shutdown = TRUE;
 	struct crm_subsystem_s *subsystems[] = {
 		pe_subsystem,
 		te_subsystem
 	};
 
 	/* just in case */
 	set_bit_inplace(fsa_input_register, R_SHUTDOWN);
 
 	for(lpc = 0; lpc < DIMOF(subsystems); lpc++) {
 		struct crm_subsystem_s *a_subsystem = subsystems[lpc];
 		if(is_set(fsa_input_register, a_subsystem->flag_connected)) {
 			crm_info("Terminating the %s", a_subsystem->name);
 			if(stop_subsystem(a_subsystem, TRUE) == FALSE) {
 				/* its gone... */
 				crm_err("Faking %s exit", a_subsystem->name);
 				clear_bit_inplace(fsa_input_register,
 						  a_subsystem->flag_connected);
 			}
 			continue_shutdown = FALSE;
 		}
 	}
     
 	if(continue_shutdown == FALSE) {
 		crm_info("Waiting for subsystems to exit");
 		crmd_fsa_stall(NULL);
 	}
 	
 	return I_NULL;
 }
 
 /*	 A_SHUTDOWN_REQ	*/
 enum crmd_fsa_input
 do_shutdown_req(long long action,
 	    enum crmd_fsa_cause cause,
 	    enum crmd_fsa_state cur_state,
 	    enum crmd_fsa_input current_input,
 	    fsa_data_t *msg_data)
 {
 	HA_Message *msg = NULL;
 	
 	crm_info("Sending shutdown request to DC: %s", crm_str(fsa_our_dc));
 	msg = create_request(
 		CRM_OP_SHUTDOWN_REQ, NULL, NULL,
 		CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
 
 /* 	set_bit_inplace(fsa_input_register, R_STAYDOWN); */
 	if(send_request(msg, NULL) == FALSE) {
 		if(AM_I_DC) {
 			crm_info("Processing shutdown locally");
 		} else {
 			register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 		}
 	}
 
 	return I_NULL;
 }
 
 extern char *max_generation_from;
 extern crm_data_t *max_generation_xml;
 extern GHashTable *meta_hash;
 extern GHashTable *resources;
 extern GHashTable *voted;
 
 void log_connected_client(gpointer key, gpointer value, gpointer user_data);
 
 void
 log_connected_client(gpointer key, gpointer value, gpointer user_data)
 {
 	crmd_client_t *client = value;
 	crm_err("%s is still connected at exit", client->table_key);
 }
 
 
 static void free_mem(fsa_data_t *msg_data) 
 {
 	if(fsa_cluster_conn) {
 		fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn);
 		fsa_cluster_conn = NULL;
 	}
 	
 	slist_destroy(fsa_data_t, fsa_data, fsa_message_queue, 
 		      crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
 			       fsa_input2string(fsa_data->fsa_input),
 			       fsa_state2string(fsa_state),
 			       fsa_cause2string(fsa_data->fsa_cause),
 			       fsa_data->origin);
 		      delete_fsa_input(fsa_data);
 		);
 	
 	delete_fsa_input(msg_data);
 
 	if(ipc_clients) {
 		crm_debug("Number of connected clients: %d",
 			  g_hash_table_size(ipc_clients));
 /* 		g_hash_table_foreach(ipc_clients, log_connected_client, NULL); */
 		g_hash_table_destroy(ipc_clients);
 	}
 	
 	empty_uuid_cache();
 	free_ccm_cache(fsa_membership_copy);
 
 	if(te_subsystem->client && te_subsystem->client->client_source) {
 		crm_debug("Full destroy: TE");
 		G_main_del_IPC_Channel(te_subsystem->client->client_source);
 	} else {
 		crm_debug("Partial destroy: TE");
 		crmd_ipc_connection_destroy(te_subsystem->client);
 	}
 	crm_free(te_subsystem);
 	
 	if(pe_subsystem->client && pe_subsystem->client->client_source) {
 		crm_debug("Full destroy: PE");
 		G_main_del_IPC_Channel(pe_subsystem->client->client_source);
 	} else {
 		crm_debug("Partial destroy: PE");
 		crmd_ipc_connection_destroy(pe_subsystem->client);
 	}
 	crm_free(pe_subsystem);
 	
 	crm_free(cib_subsystem);
 	
 	if(integrated_nodes) {
 		g_hash_table_destroy(integrated_nodes);
 	}
 	if(finalized_nodes) {
 		g_hash_table_destroy(finalized_nodes);
 	}
 	if(confirmed_nodes) {
 		g_hash_table_destroy(confirmed_nodes);
 	}
 	if(crmd_peer_state) {
 		g_hash_table_destroy(crmd_peer_state);
 	}
 	if(meta_hash) {
 		g_hash_table_destroy(meta_hash);
 	}
 	if(resources) {
 		g_hash_table_destroy(resources);
 	}
 	if(voted) {
 		g_hash_table_destroy(voted);
 	}
 
 	cib_delete(fsa_cib_conn);
 	fsa_cib_conn = NULL;
 
 	if(fsa_lrm_conn) {
 		fsa_lrm_conn->lrm_ops->delete(fsa_lrm_conn);
 	}
 	
 	crm_free(integration_timer);
 	crm_free(finalization_timer);
 	crm_free(election_trigger);
 	crm_free(election_timeout);
 	crm_free(shutdown_escalation_timer);
 	crm_free(wait_timer);
 	crm_free(recheck_timer);
 
 	crm_free(fsa_our_dc_version);
 	crm_free(fsa_our_uuid);
 	crm_free(fsa_our_dc);
 	crm_free(ipc_server);
 
  	crm_free(max_generation_from);
  	free_xml(max_generation_xml);
 }
 
 /*	 A_EXIT_0, A_EXIT_1	*/
 enum crmd_fsa_input
 do_exit(long long action,
 	enum crmd_fsa_cause cause,
 	enum crmd_fsa_state cur_state,
 	enum crmd_fsa_input current_input,
 	fsa_data_t *msg_data)
 {
 	int exit_code = 0;
 	int log_level = LOG_INFO;
 	const char *exit_type = "gracefully";
 	
 	if(action & A_EXIT_1) {
 		exit_code = 1;
 		log_level = LOG_ERR;
 		exit_type = "forcefully";
 	}
 	
 	verify_stopped(cur_state, LOG_ERR);
 	do_crm_log(log_level, "Performing %s - %s exiting the CRMd",
 		      fsa_action2string(action), exit_type);
 	
 	if(is_set(fsa_input_register, R_IN_RECOVERY)) {
 		crm_err("Could not recover from internal error");
 		exit_code = 2;		
 	} 
 	if(is_set(fsa_input_register, R_STAYDOWN)) {
 		crm_warn("Inhibiting respawn by Heartbeat");
 		exit_code = 100;
 	}
 
 	free_mem(msg_data);
 	
 	crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
 	cl_flush_logs();
 	exit(exit_code);
 
 	return I_NULL;
 }
 
 /*	 A_STARTUP	*/
 enum crmd_fsa_input
 do_startup(long long action,
 	   enum crmd_fsa_cause cause,
 	   enum crmd_fsa_state cur_state,
 	   enum crmd_fsa_input current_input,
 	   fsa_data_t *msg_data)
 {
 	int was_error = 0;
 	int interval = 1; /* seconds between DC heartbeats */
 
 	crm_debug("Registering Signal Handlers");
 	G_main_add_SignalHandler(
 		G_PRIORITY_HIGH, SIGTERM, crm_shutdown, NULL, NULL);
 
 	fsa_source = G_main_add_TriggerHandler(
 		G_PRIORITY_HIGH, crm_fsa_trigger, NULL, NULL);
 
 	ipc_clients = g_hash_table_new(g_str_hash, g_str_equal);
 	
 	crm_debug("Creating CIB and LRM objects");
 	fsa_cib_conn = cib_new();
 	fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM);	
 	
 	crm_debug("Init server comms");
 	if(ipc_server == NULL) {
 		ipc_server = crm_strdup(CRM_SYSTEM_CRMD);
 	}
 
 	was_error = init_server_ipc_comms(ipc_server, crmd_client_connect,
 					  default_ipc_connection_destroy);
 
 	/* set up the timers */
 	crm_malloc0(integration_timer, sizeof(fsa_timer_t));
 	crm_malloc0(finalization_timer, sizeof(fsa_timer_t));
 	crm_malloc0(election_trigger, sizeof(fsa_timer_t));
 	crm_malloc0(election_timeout, sizeof(fsa_timer_t));
 	crm_malloc0(shutdown_escalation_timer, sizeof(fsa_timer_t));
 	crm_malloc0(wait_timer, sizeof(fsa_timer_t));
 	crm_malloc0(recheck_timer, sizeof(fsa_timer_t));
 
 	interval = interval * 1000;
 
 	if(election_trigger != NULL) {
 		election_trigger->source_id = 0;
 		election_trigger->period_ms = -1;
 		election_trigger->fsa_input = I_DC_TIMEOUT;
 		election_trigger->callback = crm_timer_popped;
 		election_trigger->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(election_timeout != NULL) {
 		election_timeout->source_id = 0;
 		election_timeout->period_ms = -1;
 		election_timeout->fsa_input = I_ELECTION_DC;
 		election_timeout->callback = crm_timer_popped;
 		election_timeout->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(integration_timer != NULL) {
 		integration_timer->source_id = 0;
 		integration_timer->period_ms = -1;
 		integration_timer->fsa_input = I_INTEGRATED;
 		integration_timer->callback = crm_timer_popped;
 		integration_timer->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(finalization_timer != NULL) {
 		finalization_timer->source_id = 0;
 		finalization_timer->period_ms = -1;
 		finalization_timer->fsa_input = I_FINALIZED;
 		finalization_timer->callback = crm_timer_popped;
 		finalization_timer->repeat = FALSE;
 		/* for possible enabling... a bug in the join protocol left
 		 *    a slave in S_PENDING while we think its in S_NOT_DC
 		 *
 		 * raising I_FINALIZED put us into a transition loop which is
 		 *    never resolved.
 		 * in this loop we continually send probes which the node
 		 *    NACK's because its in S_PENDING
 		 *
 		 * if we have nodes where heartbeat is active but the
 		 *    CRM is not... then this will be handled in the
 		 *    integration phase
 		 */
 		finalization_timer->fsa_input = I_ELECTION;
 
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(shutdown_escalation_timer != NULL) {
 		shutdown_escalation_timer->source_id = 0;
 		shutdown_escalation_timer->period_ms = -1;
 		shutdown_escalation_timer->fsa_input = I_STOP;
 		shutdown_escalation_timer->callback = crm_timer_popped;
 		shutdown_escalation_timer->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(wait_timer != NULL) {
 		wait_timer->source_id = 0;
 		wait_timer->period_ms = 500;
 		wait_timer->fsa_input = I_NULL;
 		wait_timer->callback = crm_timer_popped;
 		wait_timer->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 
 	if(recheck_timer != NULL) {
 		recheck_timer->source_id = 0;
 		recheck_timer->period_ms = -1;
 		recheck_timer->fsa_input = I_PE_CALC;
 		recheck_timer->callback = crm_timer_popped;
 		recheck_timer->repeat = FALSE;
 	} else {
 		was_error = TRUE;
 	}
 	
 	/* set up the sub systems */
 	crm_malloc0(cib_subsystem, sizeof(struct crm_subsystem_s));
 	crm_malloc0(te_subsystem,  sizeof(struct crm_subsystem_s));
 	crm_malloc0(pe_subsystem,  sizeof(struct crm_subsystem_s));
 
 	if(cib_subsystem != NULL) {
 		cib_subsystem->pid      = -1;	
 		cib_subsystem->path     = BIN_DIR;
 		cib_subsystem->name     = CRM_SYSTEM_CIB;
 		cib_subsystem->command  = BIN_DIR"/"CRM_SYSTEM_CIB;
 		cib_subsystem->args     = "-VVc";
 		cib_subsystem->flag_connected = R_CIB_CONNECTED;	
 		cib_subsystem->flag_required  = R_CIB_REQUIRED;	
 
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(te_subsystem != NULL) {
 		te_subsystem->pid      = -1;	
 		te_subsystem->path     = BIN_DIR;
 		te_subsystem->name     = CRM_SYSTEM_TENGINE;
 		te_subsystem->command  = BIN_DIR"/"CRM_SYSTEM_TENGINE;
 		te_subsystem->args     = NULL;
 		te_subsystem->flag_connected = R_TE_CONNECTED;	
 		te_subsystem->flag_required  = R_TE_REQUIRED;	
 		
 	} else {
 		was_error = TRUE;
 	}
 	
 	if(pe_subsystem != NULL) {
 		pe_subsystem->pid      = -1;	
 		pe_subsystem->path     = BIN_DIR;
 		pe_subsystem->name     = CRM_SYSTEM_PENGINE;
 		pe_subsystem->command  = BIN_DIR"/"CRM_SYSTEM_PENGINE;
 		pe_subsystem->args     = NULL;
 		pe_subsystem->flag_connected = R_PE_CONNECTED;	
 		pe_subsystem->flag_required  = R_PE_REQUIRED;	
 		
 	} else {
 		was_error = TRUE;
 	}
 
 	if(was_error) {
 		register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 	}
 	
 	welcomed_nodes = g_hash_table_new_full(
 		g_str_hash, g_str_equal,
 		g_hash_destroy_str, g_hash_destroy_str);
 	integrated_nodes = g_hash_table_new_full(
 		g_str_hash, g_str_equal,
 		g_hash_destroy_str, g_hash_destroy_str);
 	finalized_nodes = g_hash_table_new_full(
 		g_str_hash, g_str_equal,
 		g_hash_destroy_str, g_hash_destroy_str);
 	confirmed_nodes = g_hash_table_new_full(
 		g_str_hash, g_str_equal,
 		g_hash_destroy_str, g_hash_destroy_str);
 	crmd_peer_state = g_hash_table_new_full(
 		g_str_hash, g_str_equal,
 		g_hash_destroy_str, g_hash_destroy_str);
 
-	set_sigchld_proctrack(G_PRIORITY_HIGH);
+	set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME);
 	
 	return I_NULL;
 }
 
 /*	 A_STOP	*/
 enum crmd_fsa_input
 do_stop(long long action,
 	enum crmd_fsa_cause cause,
 	enum crmd_fsa_state cur_state,
 	enum crmd_fsa_input current_input,
 	fsa_data_t *msg_data)
 {
     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
     return I_NULL;
 }
 
 /*	 A_STARTED	*/
 enum crmd_fsa_input
 do_started(long long action,
 	   enum crmd_fsa_cause cause,
 	   enum crmd_fsa_state cur_state,
 	   enum crmd_fsa_input current_input,
 	   fsa_data_t *msg_data)
 {
 	if(is_set(fsa_input_register, R_CCM_DATA) == FALSE) {
 		crm_info("Delaying start, CCM (%.16llx) not connected",
 			 R_CCM_DATA);
 
 		crmd_fsa_stall(NULL);
 		return I_NULL;
 
 	} else if(is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
 		crm_info("Delaying start, LRM (%.16llx) not connected",
 			 R_LRM_CONNECTED);
 
 		crmd_fsa_stall(NULL);
 		return I_NULL;
 
 	} else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
 		crm_info("Delaying start, CIB (%.16llx) not connected",
 			 R_CIB_CONNECTED);
 
 		crmd_fsa_stall(NULL);
 		return I_NULL;
 
 	} else if(is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
 		crm_info("Delaying start, Config not read (%.16llx)",
 			 R_READ_CONFIG);
 
 		crmd_fsa_stall(NULL);
 		return I_NULL;
 
 	} else if(is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
 		HA_Message *	msg = NULL;
 
 		/* try reading from HA */
 		crm_info("Delaying start, Peer data (%.16llx) not recieved",
 			 R_PEER_DATA);
 
 		crm_debug_3("Looking for a HA message");
 		msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0);
 		if(msg != NULL) {
 			crm_debug_3("There was a HA message");
  			crm_msg_del(msg);
 		}
 		
 		crm_timer_start(wait_timer);
 		crmd_fsa_stall(NULL);
 		return I_NULL;
 	}
 
 	crm_info("The local CRM is operational");
 	clear_bit_inplace(fsa_input_register, R_STARTING);
 	register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
 	
 	return I_NULL;
 }
 
 /*	 A_RECOVER	*/
 enum crmd_fsa_input
 do_recover(long long action,
 	   enum crmd_fsa_cause cause,
 	   enum crmd_fsa_state cur_state,
 	   enum crmd_fsa_input current_input,
 	   fsa_data_t *msg_data)
 {
 	set_bit_inplace(fsa_input_register, R_IN_RECOVERY);
 	crm_err("Action %s (%.16llx) not supported",
 	       fsa_action2string(action), action);
 
 	register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 
 	return I_NULL;
 }
 
 pe_cluster_option crmd_opts[] = {
 	/* name, old-name, validate, default, description */
 	{ XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "10s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed and load of your network." },
 	{ XML_CONFIG_ATTR_RECHECK, NULL, "time", "Zero disables polling.  Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "0", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time.  To ensure these changes take effect, we can optionally poll the cluster's status for changes." },
 	{ XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." },
 	{ XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." },
 	{ "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." },
 	{ "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." },
 };
 
 void
 crmd_metadata(void)
 {
 	config_metadata("CRM Daemon", "1.0",
 			"CRM Daemon Options",
 			"This is a fake resource that details the options that can be configured for the CRM Daemon.",
 			crmd_opts, DIMOF(crmd_opts));
 }
 
 static void
 verify_crmd_options(GHashTable *options)
 {
 	verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
 }
 
 static const char *
 crmd_pref(GHashTable *options, const char *name)
 {
 	return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
 }
 
 static void
 config_query_callback(const HA_Message *msg, int call_id, int rc,
 		      crm_data_t *output, void *user_data) 
 {
 	const char *value = NULL;
 	GHashTable *config_hash = NULL;
 
 	if(rc != cib_ok) {
 		fsa_data_t *msg_data = NULL;
 		crm_err("Local CIB query resulted in an error: %s",
 			cib_error2string(rc));
 		register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 
 		if(rc == cib_bad_permissions
 		   || rc == cib_bad_digest
 		   || rc == cib_bad_config) {
 			crm_err("The cluster is mis-configured - shutting down and staying down");
 			set_bit_inplace(fsa_input_register, R_STAYDOWN);
 		}
 		return;
 	}
 
 	crm_debug("Call %d : Parsing CIB options", call_id);
 	config_hash = g_hash_table_new_full(
 		g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str);
 
 	unpack_instance_attributes(
 		output, XML_CIB_TAG_PROPSET, NULL, config_hash,
 		CIB_OPTIONS_FIRST, NULL);
 	
 	value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
 	if(value == NULL) {
 		/* apparently we're not allowed to free the result of getenv */
 		char *param_val = getenv(ENV_PREFIX "" KEY_INITDEAD);
 
 		value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
 		if(param_val != NULL) {
 			int from_env = crm_get_msec(param_val) / 2;
 			int from_defaults = crm_get_msec(value);
 			if(from_env > from_defaults) {
 				g_hash_table_replace(
 					config_hash, crm_strdup(XML_CONFIG_ATTR_DC_DEADTIME),
 					crm_strdup(param_val));
 			}
 		}
 	}
 
 	verify_crmd_options(config_hash);
 
 	value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
 	election_trigger->period_ms = crm_get_msec(value);
 	
 	value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
 	shutdown_escalation_timer->period_ms = crm_get_msec(value);
 
 	value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
 	election_timeout->period_ms = crm_get_msec(value);
 	
 	value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
 	recheck_timer->period_ms = crm_get_msec(value);
 
 	value = crmd_pref(config_hash, "crmd-integration-timeout");
 	integration_timer->period_ms  = crm_get_msec(value);
 
 	value = crmd_pref(config_hash, "crmd-finalization-timeout");
 	finalization_timer->period_ms = crm_get_msec(value);
 
 	set_bit_inplace(fsa_input_register, R_READ_CONFIG);
 	crm_debug_3("Triggering FSA: %s", __FUNCTION__);
 	G_main_set_trigger(fsa_source);
 	
 	g_hash_table_destroy(config_hash);
 }
 
 /*	 A_READCONFIG	*/
 enum crmd_fsa_input
 do_read_config(long long action,
 	       enum crmd_fsa_cause cause,
 	       enum crmd_fsa_state cur_state,
 	       enum crmd_fsa_input current_input,
 	       fsa_data_t *msg_data)
 {
 	int call_id = fsa_cib_conn->cmds->query(
  		fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local);
 
 	add_cib_op_callback(call_id, FALSE, NULL, config_query_callback);
 	crm_debug_2("Querying the CIB... call %d", call_id);
 	
 	return I_NULL;
 }
 
 
 gboolean
 crm_shutdown(int nsig, gpointer unused)
 {
 	if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) {
 		if(is_set(fsa_input_register, R_SHUTDOWN)) {
 			crm_err("Escalating the shutdown");
 			register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
 
 		} else {
 			crm_info("Requesting shutdown");
 			set_bit_inplace(fsa_input_register, R_SHUTDOWN);
 			register_fsa_input(C_SHUTDOWN,I_SHUTDOWN,NULL);
 
 			if(shutdown_escalation_timer->period_ms < 1) {
 				GHashTable *config_hash = g_hash_table_new_full(
 					g_str_hash, g_str_equal,
 					g_hash_destroy_str, g_hash_destroy_str);
 				const char *value = crmd_pref(
 					config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
 				int msec = crm_get_msec(value);
 				crm_info("Using default shutdown escalation: %dms", msec);
 				shutdown_escalation_timer->period_ms = msec;
 				g_hash_table_destroy(config_hash);
 			}
 
 			/* cant rely on this... */
 			crm_timer_start(shutdown_escalation_timer);
 		}
 		
 	} else {
 		crm_info("exit from shutdown");
 		exit(LSB_EXIT_OK);
 	    
 	}
 	return TRUE;
 }
 
 static void
 default_cib_update_callback(const HA_Message *msg, int call_id, int rc,
 		     crm_data_t *output, void *user_data) 
 {
 	if(rc != cib_ok) {
 		fsa_data_t *msg_data = NULL;
 		crm_err("CIB Update failed: %s", cib_error2string(rc));
 		crm_log_xml_warn(output, "update:failed");
 		
 		register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 	}
 }
 
 void
 populate_cib_nodes(ll_cluster_t *hb_cluster, gboolean with_client_status)
 {
 	int call_id = 0;
 	const char *ha_node = NULL;
 	crm_data_t *cib_node_list = NULL;
 	
 	/* Async get client status information in the cluster */
 	crm_debug_2("Invoked");
 	if(with_client_status) {
 		crm_debug_3("Requesting an initial dump of CRMD client_status");
 		fsa_cluster_conn->llc_ops->client_status(
 			fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1);
 	}
 	
 	crm_info("Requesting the list of configured nodes");
 	fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn);
 
 	cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
 	do {
 		const char *ha_node_type = NULL;
 		const char *ha_node_uuid = NULL;
 		crm_data_t *cib_new_node = NULL;
 
 		ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn);
 		if(ha_node == NULL) {
 			continue;
 		}
 		
 		ha_node_type = fsa_cluster_conn->llc_ops->node_type(
 			fsa_cluster_conn, ha_node);
 		if(safe_str_neq(NORMALNODE, ha_node_type)) {
 			crm_debug("Node %s: skipping '%s'",
 				  ha_node, ha_node_type);
 			continue;
 		}
 
 		ha_node_uuid = get_uuid(fsa_cluster_conn, ha_node);
 		if(ha_node_uuid == NULL) {
 			crm_warn("Node %s: no uuid found", ha_node);
 			continue;	
 		}
 		
 		crm_notice("Node: %s (uuid: %s)", ha_node, ha_node_uuid);
 		cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE);
 		crm_xml_add(cib_new_node, XML_ATTR_ID,    ha_node_uuid);
 		crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node);
 		crm_xml_add(cib_new_node, XML_ATTR_TYPE,  ha_node_type);
 
 	} while(ha_node != NULL);
 
 	fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn);
 	
 	/* Now update the CIB with the list of nodes */
 	fsa_cib_update(
 		XML_CIB_TAG_NODES, cib_node_list,
 		cib_scope_local|cib_quorum_override|cib_inhibit_bcast, call_id);
 	add_cib_op_callback(call_id, FALSE, NULL, default_cib_update_callback);
 
 	free_xml(cib_node_list);
 	crm_debug_2("Complete");
 }
 
 gboolean
 register_with_ha(ll_cluster_t *hb_cluster, const char *client_name)
 {
 	const char *const_uuid = NULL;
 	crm_debug("Signing in with Heartbeat");
 	if (hb_cluster->llc_ops->signon(hb_cluster, client_name)!= HA_OK) {
 
 		crm_err("Cannot sign on with heartbeat: %s",
 			hb_cluster->llc_ops->errmsg(hb_cluster));
 		return FALSE;
 	}
 
 	crm_debug_3("Be informed of CRM messages");
 	if (HA_OK != hb_cluster->llc_ops->set_msg_callback(
 		    hb_cluster, T_CRM, crmd_ha_msg_callback, hb_cluster)){
 		
 		crm_err("Cannot set msg callback: %s",
 			hb_cluster->llc_ops->errmsg(hb_cluster));
 		return FALSE;
 	}
 
 	crm_debug_3("Be informed of Node Status changes");
 	if (HA_OK != hb_cluster->llc_ops->set_nstatus_callback(
 		    hb_cluster, crmd_ha_status_callback, hb_cluster)){
 		
 		crm_err("Cannot set nstatus callback: %s",
 			hb_cluster->llc_ops->errmsg(hb_cluster));
 		return FALSE;
 	}
 	
 	crm_debug_3("Be informed of CRM Client Status changes");
 	if (HA_OK != hb_cluster->llc_ops->set_cstatus_callback(
 		    hb_cluster, crmd_client_status_callback, hb_cluster)) {
 
 		crm_err("Cannot set cstatus callback: %s",
 			hb_cluster->llc_ops->errmsg(hb_cluster));
 		return FALSE;
 	}
 
 	crm_debug_3("Adding channel to mainloop");
 	G_main_add_ll_cluster(
 		G_PRIORITY_HIGH, hb_cluster,
 		FALSE, crmd_ha_msg_dispatch, hb_cluster /* userdata  */,  
 		crmd_ha_connection_destroy);
 
 	crm_debug_3("Finding our node name");
 	if ((fsa_our_uname =
 	     hb_cluster->llc_ops->get_mynodeid(hb_cluster)) == NULL) {
 		crm_err("get_mynodeid() failed");
 		return FALSE;
 	}
 	crm_info("Hostname: %s", fsa_our_uname);
 
 	crm_debug_3("Finding our node uuid");
 	const_uuid = get_uuid(fsa_cluster_conn, fsa_our_uname);
 	if(const_uuid == NULL) {
 		crm_err("get_uuid_by_name() failed");
 		return FALSE;
 	}
 	/* copy it so that unget_uuid() doesn't trash the value on us */
 	fsa_our_uuid = crm_strdup(const_uuid);
 	crm_info("UUID: %s", fsa_our_uuid);
 		
 	populate_cib_nodes(hb_cluster, TRUE);
 	
 	return TRUE;
     
 }
diff --git a/cts/CIB.py.in b/cts/CIB.py.in
index bf030119a6..2a62794f05 100644
--- a/cts/CIB.py.in
+++ b/cts/CIB.py.in
@@ -1,248 +1,248 @@
 #!@PYTHON@
 
 '''CTS: Cluster Testing System: CIB generator
 '''
 __copyright__='''
 Author: Jia Ming Pan <jmltc@cn.ibm.com>
 Copyright (C) 2006 International Business Machines
 '''
 
 from UserDict import UserDict
 import sys, time, types, syslog, os, struct, string, signal, traceback
 from CTS  import ClusterManager
 from CM_hb import HeartbeatCM
 
 class CIB:
     cib_option_template = '''
     <cluster_property_set id="cib-bootstrap-options">
        <attributes>
         <nvpair id="cib-bootstrap-options-default-action-timeout"  name="default-action-timeout" value="10s"/>
         <nvpair id="cib-bootstrap-options-cluster-delay"           name="cluster-delay"          value="60s"/>
         <nvpair id="cib-bootstrap-options-symmetric-cluster"       name="symmetric-cluster"      value="true"/>
         <nvpair id="cib-bootstrap-options-stop-orphan-resources"   name="stop-orphan-resources"  value="true"/>
         <nvpair id="cib-bootstrap-options-stop-orphan-actions"     name="stop-orphan-actions"    value="true"/>
         <nvpair id="cib-bootstrap-options-remove-after-stop"       name="remove-after-stop"      value="false"/>
         <nvpair id="cib-bootstrap-options-is-managed-default"      name="is-managed-default"     value="true"/>
         <nvpair id="cib-bootstrap-options-no-quorum-policy"        name="no-quorum-policy"       value="stop"/>
         <nvpair id="cib-bootstrap-options-stonith-action"          name="stonith-action"         value="reboot"/>
         <nvpair id="cib-bootstrap-options-stonith-enabled"         name="stonith-enabled"        value="%d"/>
         <nvpair id="cib-bootstrap-options-pe-error-series-max"     name="pe-error-series-max"    value="-1"/>
         <nvpair id="cib-bootstrap-options-pe-warn-series-max"      name="pe-warn-series-max"     value="-1"/>
         <nvpair id="cib-bootstrap-options-pe-input-series-max"     name="pe-input-series-max"    value="30000"/>
         <nvpair id="cib-bootstrap-options-default-resource-stickiness"         name="default-resource-stickiness"         value="0"/>
         <nvpair id="cib-bootstrap-options-default-resource-failure-stickiness" name="default-resource-failure-stickiness" value="0"/>
         <nvpair id="cib-bootstrap-options-shutdown-escalation"                 name="shutdown-escalation"                 value="5min"/>
         <!-- *** For CTS testing only *** _NEVER_ make this the default -->
         <nvpair id="cib-bootstrap-optionsstartup-fencing" name="startup-fencing" value="false"/>
        </attributes>
     </cluster_property_set>'''
 
     ipaddr_template = ''' 
         <primitive id="%s" class="ocf" type="IPaddr" provider="heartbeat">
           <operations>
             <op id="%s-mon" name="monitor" interval="5s"/>
           </operations>
           <instance_attributes id="%s">
             <attributes>
               <nvpair id="%s-ip" name="ip" value="%s"/>
             </attributes>
           </instance_attributes>
         </primitive> '''
 
     hb_ipaddr_template = ''' 
         <primitive id="%s" class="heartbeat" type="IPaddr">
           <operations>
             <op id="%s-mon" name="monitor" interval="5s"/>
           </operations>
           <instance_attributes id="%s">
             <attributes>
               <nvpair id="%s-ip" name="1" value="%s"/>
             </attributes>
           </instance_attributes>
         </primitive> '''
 
     lsb_resource = ''' 
-        <primitive id="lsb_dummy" class="lsb" type="@libdir@/heartbeat/cts/LSBDummy" provider="heartbeat">
+        <primitive id="lsb_dummy" class="lsb" type="@HA_NOARCHDATAHBDIR@/cts/LSBDummy" provider="heartbeat">
           <operations>
             <op id="ocf_lsb_monitor" name="monitor" interval="5s"/>
           </operations>
         </primitive> '''
 
     dummy_resource_template = ''' 
         <primitive id="%s" class="ocf" type="Dummy" provider="heartbeat">
           <operations>
              <op id="%s-mon" name="monitor" interval="10s"/>
           </operations>
           <instance_attributes id="%s">
              <attributes>
                <nvpair id="%s-migrate" name="allow_migrate" value="1"/>
              </attributes>
           </instance_attributes>
         </primitive> '''
     
     clustermon_resource_template = ''' 
         <primitive id="cluster_mon" class="ocf" type="ClusterMon" provider="heartbeat">
           <operations>
             <op id="cluster_mon-1" name="monitor" interval="5s" prereq="nothing"/>
             <op id="cluster_mon-2" name="start" prereq="nothing"/>
           </operations>
           <instance_attributes id="ClusterMon">
             <attributes>
                <nvpair id="cluster_mon-1" name="htmlfile" value="/suse/abeekhof/Export/cluster.html"/>
                <nvpair id="cluster_mon-2" name="update" value="10"/>
                <nvpair id="cluster_mon-3" name="extra_options" value="-n -r"/>
                <nvpair id="cluster_mon-4" name="user" value="abeekhof"/>
            </attributes>
           </instance_attributes>
         </primitive> ''' 
 
     clustermon_location_constraint = ''' 
         <rsc_location id="run_cluster_mon" rsc="cluster_mon">
           <rule id="cant_run_cluster_mon" score="-INFINITY" boolean_op="and">
              <expression id="mon_expr" attribute="#is_dc" operation="eq" value="false"/>
           </rule>
         </rsc_location> '''
 
     master_slave_resource = ''' 
         <master_slave id="master_rsc_1">
           <instance_attributes id="master_rsc">
             <attributes>
               <nvpair id="clone_max_1" name="clone_max" value="%d"/>
               <nvpair id="clone_node_max_2" name="clone_node_max" value="%d"/>
               <nvpair id="master_max_3" name="master_max" value="%d"/>
               <nvpair id="master_node_max_4" name="master_node_max" value="%d"/>
             </attributes>
           </instance_attributes>
           <primitive id="ocf_msdummy" class="ocf" type="Stateful" provider="heartbeat">
             <operations>
               <op id="ocf_msdummy_monitor" name="monitor" interval="5s"/>
               <op id="ocf_msdummy_monitor_master" name="monitor" interval="6s" role="Master"/>
             </operations>
           </primitive>
         </master_slave>'''
 
     resource_group_template = '''<group id="group-1">%s %s %s</group>'''
 
     per_node_constraint_template = ''' 
         <rsc_location id="run_%s" rsc="%s">
           <rule id="pref_run_%s" score="100" boolean_op="and">
             <expression id="%s_loc_expr" attribute="#uname" operation="eq" value="%s"/>
           </rule>
         </rsc_location> ''' 
 
     stonith_resource_template = """ 
         <clone id="DoFencing" globally_unique="false">
           <instance_attributes id="fencing">
             <attributes>
               <nvpair id="DoFencing-1" name="clone_node_max" value="1"/>
             </attributes>
           </instance_attributes>
           <primitive id="child_DoFencing" class="stonith" type="%s">
             <operations>
               <op id="DoFencing-1" name="monitor" interval="60s" prereq="nothing" timeout="300s"/>
               <op id="DoFencing-2" name="start" prereq="nothing"  timeout="180s"/>
               <op id="DoFencing-3" name="stop" timeout="180s"/>
             </operations>
             <instance_attributes id="fencing-child">
               <attributes>
                 <nvpair id="child_DoFencing-1" name="%s" value="%s"/>
               </attributes>
             </instance_attributes>
           </primitive>
         </clone>"""
 
     cib_template ='''
 <cib cib_feature_revision="1" have_quorum="false" ignore_dtd="false">
   <configuration>
      <crm_config>  %s 
      </crm_config>
      <nodes/>
      <resources> %s 
      </resources>
      <constraints> %s 
      </constraints>
     </configuration>
     <status/>
 </cib> '''
 
     def NextIP(self):
         fields = string.split(self.CM.Env["IPBase"], '.')
         fields[3] = str(int(fields[3])+1)
         ip = string.join(fields, '.')
         self.CM.Env["IPBase"]=ip
         return ip
 
     def __init__(self, CM):
         self.CM = CM
 
         #make up crm config
         cib_options = self.cib_option_template % CM.Env["DoFencing"]
 
         #create resources and their constraints
         resources = ""
         constraints = ""
 
         if self.CM.Env["DoBSC"] == 1:
             cib_options = cib_options + '''
      <cluster_property_set id="bsc-options">
        <attributes>
          <nvpair id="bsc-options-ident-string" name="ident-string" value="Linux-HA TEST configuration file - REMOVEME!!"/>
        </attributes>
     </cluster_property_set>'''
 
         if self.CM.Env["CIBResource"] != 1:
             # generate cib
             self.cts_cib = self.cib_template %  (cib_options, resources, constraints)
             return
 
         if self.CM.cluster_monitor == 1:
             resources += self.clustermon_resource_template
             constraints += self.clustermon_location_constraint
             
         ip1=self.NextIP()
         ip2=self.NextIP()
         ip3=self.NextIP()
         ip1_rsc = self.ipaddr_template % (ip1, ip1, ip1, ip1, ip1)
         ip2_rsc = self.hb_ipaddr_template % (ip2, ip2, ip2, ip2, ip2)
         ip3_rsc = self.ipaddr_template % (ip3, ip3, ip3, ip3, ip3)
         resources += self.resource_group_template % (ip1_rsc, ip2_rsc, ip3_rsc)
 
         # lsb resource
         resources += self.lsb_resource
 
         # Mirgator
         resources += self.dummy_resource_template % \
             ("migrator", "migrator", "migrator", "migrator")
 
         # per node resource
         fields = string.split(self.CM.Env["IPBase"], '.')
         for node in self.CM.Env["nodes"]:
             ip = self.NextIP()
             per_node_resources = self.ipaddr_template % \
                 ("rsc_"+node, "rsc_"+node, "rsc_"+node, "rsc_"+node, ip)    
             
             per_node_constraint = self.per_node_constraint_template % \
                 ("rsc_"+node, "rsc_"+node, "rsc_"+node, "rsc_"+node, node)
                 
             resources += per_node_resources
             constraints += per_node_constraint
     
         # fencing resource
         nodelist = ""
         len = 0
         for node in self.CM.Env["nodes"]:
             nodelist += node + " "
             len = len + 1
 
         stonith_resource = self.stonith_resource_template % \
             (self.CM.Env["reset"].stonithtype, self.CM.Env["reset"].configName, self.CM.Env["reset"].configValue)
         resources += stonith_resource
         
         #master slave resource
         resources += self.master_slave_resource % (2*len, 2, len, 1)
 
         # generate cib
         self.cts_cib = self.cib_template %  (cib_options, resources, constraints)
 
     def cib(self):
         return self.cts_cib
diff --git a/lib/plugins/lrm/raexecstonith.c b/lib/plugins/lrm/raexecstonith.c
index 5ea99fb90f..d5414f42e5 100644
--- a/lib/plugins/lrm/raexecstonith.c
+++ b/lib/plugins/lrm/raexecstonith.c
@@ -1,389 +1,390 @@
 /* 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
  * File: raexecocf.c
  * Author: Sun Jiang Dong <sunjd@cn.ibm.com>
  * Copyright (c) 2004 International Business Machines
  *
  * This code implements the Resource Agent Plugin Module for LSB style.
  * It's a part of Local Resource Manager. Currently it's used by lrmd only.
  */
 
 #include <lha_internal.h>
 #include <stdio.h>		
 #include <string.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <libgen.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <errno.h>
 #include <glib.h>
 #include <clplumbing/cl_log.h>
 #include <clplumbing/uids.h>
 #include <pils/plugin.h>
 #include <dirent.h>
 #include <libgen.h>  /* Add it for compiling on OSX */
 #include <libxml/entities.h>
 
 #include <lrm/raexec.h>
 #include <fencing/stonithd_api.h>
 #include <stonith/stonith.h>
 
 # define PIL_PLUGINTYPE		RA_EXEC_TYPE
 # define PIL_PLUGINTYPE_S	"RAExec"
 # define PIL_PLUGINLICENSE	LICENSE_PUBDOM
 # define PIL_PLUGINLICENSEURL	URL_PUBDOM
 
 # define PIL_PLUGIN		stonith
 # define PIL_PLUGIN_S		"stonith"
 
 static PIL_rc close_stonithRA(PILInterface*, void* ud_interface);
 
 /* static const char * RA_PATH = STONITH_RA_DIR; */
 /* Temporarily use it */
 static const char * RA_PATH = HA_LIBHBDIR "/stonith/plugins/stonith/";
 
 /* The begin of exported function list */
 static int execra(const char * rsc_id,
 		  const char * rsc_type,
 		  const char * provider,
 		  const char * op_type,
 		  const int    timeout,
 	 	  GHashTable * params);
 static uniform_ret_execra_t map_ra_retvalue(int ret_execra
 	, const char * op_type, const char * std_output);
 static int get_resource_list(GList ** rsc_info);
 static char* get_resource_meta(const char* rsc_type,  const char* provider);
 static int get_provider_list(const char* op_type, GList ** providers);
 
 /* The end of exported function list */
 
 /* The begin of internal used function & data list */
 static int get_providers(const char* class_path, const char* op_type,
 			 GList ** providers);
 static void stonithRA_ops_callback(stonithRA_ops_t * op, void * private_data);
 static int exit_value;
 /* The end of internal function & data list */
 
 /* Rource agent execution plugin operations */
 static struct RAExecOps raops =
 {	execra,
 	map_ra_retvalue,
 	get_resource_list,
 	get_provider_list,
 	get_resource_meta
 };
 
 static const char META_TEMPLATE[] =
 "<?xml version=\"1.0\"?>\n"
 "<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n"
 "<resource-agent name=\"%s\">\n"
 "<version>1.0</version>\n"
 "<longdesc lang=\"en\">\n"
 "%s\n"
 "</longdesc>\n"	
 "<shortdesc lang=\"en\">%s</shortdesc>\n"
 "%s\n"
 "<actions>\n"
 "<action name=\"start\"   timeout=\"15\" />\n"
 "<action name=\"stop\"    timeout=\"15\" />\n"
 "<action name=\"status\"  timeout=\"15\" />\n"
 "<action name=\"monitor\" timeout=\"15\" interval=\"15\" start-delay=\"15\" />\n"
 "<action name=\"meta-data\"  timeout=\"15\" />\n"
 "</actions>\n"
 "<special tag=\"heartbeat\">\n"
 "<version>2.0</version>\n"
 "</special>\n"
 "</resource-agent>\n";
 
 static const char * no_parameter_info = "<!-- No parameter segment -->";
 
 #define CHECKMETANULL(ret, which) \
 	if (ret == NULL) { \
 		cl_log(LOG_WARNING, "stonithRA plugin: cannot get %s " \
 			"segment of %s's metadata.", which, rsc_type); \
 		ret = no_parameter_info; \
 	}
 #define xmlize(p) \
 	( p ? (char *)xmlEncodeEntitiesReentrant(NULL, \
 				(const unsigned char *)p) \
 	 	: NULL )
 #define zapxml(p) do { \
 	if( p ) { \
 		xmlFree(p); \
 	} \
 } while(0)
 
 PIL_PLUGIN_BOILERPLATE2("1.0", Debug);
 
 static const PILPluginImports*  PluginImports;
 static PILPlugin*               OurPlugin;
 static PILInterface*		OurInterface;
 static void*			OurImports;
 static void*			interfprivate;
 
 /*
  * Our plugin initialization and registration function
  * It gets called when the plugin gets loaded.
  */
 PIL_rc
 PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports);
 
 PIL_rc
 PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports)
 {
 	/* Force the compiler to do a little type checking */
 	(void)(PILPluginInitFun)PIL_PLUGIN_INIT;
 
 	PluginImports = imports;
 	OurPlugin = us;
 
 	/* Register ourself as a plugin */
 	imports->register_plugin(us, &OurPIExports);
 
 	/*  Register our interfaces */
  	return imports->register_interface(us, PIL_PLUGINTYPE_S,  PIL_PLUGIN_S,
 		&raops, close_stonithRA, &OurInterface, &OurImports,
 		interfprivate);
 }
 
 static PIL_rc
 close_stonithRA(PILInterface* pif, void* ud_interface)
 {
 	return PIL_OK;
 }
 
 /*
  * Most of the oprations will be sent to sotnithd directly, such as 'start',
  * 'stop', 'monitor'. And others like 'meta-data' will be handled by itself
  * locally.
  * Some of important parameters' name:
  * config_file
  * config_string
  */
 static int
 execra(const char * rsc_id, const char * rsc_type, const char * provider,
        const char * op_type,const int timeout, GHashTable * params)
 {
 	stonithRA_ops_t * op;
 	int call_id = -1;
 	char buffer_tmp[32];
 
 	/* Handling "meta-data" operation in a special way.
 	 * Now handle "meta-data" operation locally. 
 	 * Should be changed in the future?
 	 */
 	if ( 0 == STRNCMP_CONST(op_type, "meta-data")) {
 		char * tmp;
 		tmp = get_resource_meta(rsc_type, provider);
 		printf("%s", tmp);
 		g_free(tmp);
 		exit(0);
 	}
 
 	g_snprintf(buffer_tmp, sizeof(buffer_tmp), "%s_%d"
 		, 	"STONITH_RA_EXEC", getpid());
 	if (ST_OK != stonithd_signon(buffer_tmp)) {
 		cl_log(LOG_ERR, "%s:%d: Cannot sign on the stonithd."
 			, __FUNCTION__, __LINE__);
 		exit(EXECRA_UNKNOWN_ERROR);
 	}
 
 	stonithd_set_stonithRA_ops_callback(stonithRA_ops_callback, &call_id);
 
 	/* Temporarily donnot use it, but how to deal with the global OCF 
 	 * variables. This is a important thing to think about and do.
 	 */
 	/* send the RA operation to stonithd to simulate a RA's actions */
 	if ( 0==STRNCMP_CONST(op_type, "start") 
 		|| 0==STRNCMP_CONST(op_type, "stop") ) {
 		cl_log(LOG_INFO
 			, "Try to %s STONITH resource <rsc_id=%s> : Device=%s"
 			, op_type, rsc_id, rsc_type);
 	}
 
 	op = g_new(stonithRA_ops_t, 1);
 	op->ra_name = g_strdup(rsc_type);
 	op->op_type = g_strdup(op_type);
 	op->params = params;
 	op->rsc_id = g_strdup(rsc_id);
 	if (ST_OK != stonithd_virtual_stonithRA_ops(op, &call_id)) {
 		cl_log(LOG_ERR, "sending stonithRA op to stonithd failed.");
 		/* Need to improve the granularity for error return code */
 		stonithd_signoff();
 		exit(EXECRA_EXEC_UNKNOWN_ERROR);
 	}
 
 	/* May be redundant */
 	/*
 	while (stonithd_op_result_ready() != TRUE) {
 		;
 	}
 	*/
 	/* cl_log(LOG_DEBUG, "Will call stonithd_receive_ops_result."); */
 	if (ST_OK != stonithd_receive_ops_result(TRUE)) {
 		cl_log(LOG_ERR, "stonithd_receive_ops_result failed.");
 		/* Need to improve the granularity for error return code */
 		stonithd_signoff();
 		exit(EXECRA_EXEC_UNKNOWN_ERROR);
 	}
 
 	/* exit_value will be setted by the callback function */
 	g_free(op->ra_name);
 	g_free(op->op_type);
 	g_free(op->rsc_id);
 	g_free(op);
 
 	stonithd_signoff();
 	/* cl_log(LOG_DEBUG, "stonithRA orignal exit code=%d", exit_value); */
 	exit(map_ra_retvalue(exit_value, op_type, NULL));
 }
 
 static void
 stonithRA_ops_callback(stonithRA_ops_t * op, void * private_data)
 {
 	/* cl_log(LOG_DEBUG, "setting exit code=%d", exit_value); */
 	exit_value = op->op_result;
 }
 
 static uniform_ret_execra_t
 map_ra_retvalue(int ret_execra, const char * op_type, const char * std_output)
 {
 	/* Because the UNIFORM_RET_EXECRA is compatible with OCF standard, no
 	 * actual mapping except validating, which ensure the return code
 	 * will be in the range 0 to 7. Too strict?
 	 */
-	if (ret_execra < 0 || ret_execra > 7) {
+	if (ret_execra < EXECRA_EXEC_UNKNOWN_ERROR ||
+		ret_execra > EXECRA_STATUS_UNKNOWN) {
 		cl_log(LOG_WARNING, "mapped the invalid return code %d."
 			, ret_execra);
 		ret_execra = EXECRA_UNKNOWN_ERROR;
 	}
 	return ret_execra;
 }
 
 static int
 get_resource_list(GList ** rsc_info)
 {
 	int rc;
 	int     needprivs = !cl_have_full_privs();
 
 	if ( rsc_info == NULL ) {
 		cl_log(LOG_ERR, "Parameter error: get_resource_list");
 		return -2;
 	}
 
 	if ( *rsc_info != NULL ) {
 		cl_log(LOG_ERR, "Parameter error: get_resource_list."\
 			"will cause memory leak.");
 		*rsc_info = NULL;
 	}
 
 	if (needprivs) {
 		return_to_orig_privs();
 	}
 	if (ST_OK != stonithd_signon("STONITH_RA")) {
 		cl_log(LOG_ERR, "%s:%d: Can not signon to the stonithd."
 			, __FUNCTION__, __LINE__);
 		rc = -1;
 	} else {
 		rc = stonithd_list_stonith_types(rsc_info);
 		stonithd_signoff();
 	}
 
 	if (needprivs) {
 		return_to_dropped_privs();
 	}
 	return rc;
 }
 
 static int
 get_provider_list(const char* op_type, GList ** providers)
 {
 	int ret;
 	ret = get_providers(RA_PATH, op_type, providers);
 	if (0>ret) {
 		cl_log(LOG_ERR, "scandir failed in stonith RA plugin");
 	}
 	return ret;
 }
 
 static char *
 get_resource_meta(const char* rsc_type, const char* provider)
 {
 	char * buffer;
 	int bufferlen = 0;
 	const char * meta_param = NULL;
 	const char * meta_longdesc = NULL;
 	const char * meta_shortdesc = NULL;
 	char *xml_meta_longdesc = NULL;
 	char *xml_meta_shortdesc = NULL;
 	Stonith * stonith_obj = NULL;	
 
 	if ( provider != NULL ) {
 		cl_log(LOG_DEBUG, "stonithRA plugin: provider attribute "
 			"is not needed and will be ignored.");
 	}
 
 	stonith_obj = stonith_new(rsc_type);
 	meta_longdesc = stonith_get_info(stonith_obj, ST_DEVICEDESCR);
 	CHECKMETANULL(meta_longdesc, "longdesc")
 	xml_meta_longdesc = xmlize(meta_longdesc);
 	meta_shortdesc = stonith_get_info(stonith_obj, ST_DEVICENAME);
 	CHECKMETANULL(meta_shortdesc, "shortdesc") 
 	xml_meta_shortdesc = xmlize(meta_shortdesc);
 	meta_param = stonith_get_info(stonith_obj, ST_CONF_XML);
 	CHECKMETANULL(meta_param, "parameters") 
 
 	
 	bufferlen = STRLEN_CONST(META_TEMPLATE) + strlen(rsc_type)
 			+ strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc)
 			+ strlen(meta_param) + 1;
 	buffer = g_new(char, bufferlen);
 	buffer[bufferlen-1] = '\0';
 	snprintf(buffer, bufferlen-1, META_TEMPLATE, rsc_type
 		, xml_meta_longdesc, xml_meta_shortdesc, meta_param);
 	stonith_delete(stonith_obj);
 	zapxml(xml_meta_longdesc);
 	zapxml(xml_meta_shortdesc);
 
 	return buffer;
 }
 
 /* 
  * Currently should return *providers = NULL, but remain the old code for
  * possible unsing in the future
  */
 static int
 get_providers(const char* class_path, const char* op_type, GList ** providers)
 {
 	if ( providers == NULL ) {
 		cl_log(LOG_ERR, "%s:%d: Parameter error: providers==NULL"
 			, __FUNCTION__, __LINE__);
 		return -2;
 	}
 
 	if ( *providers != NULL ) {
 		cl_log(LOG_ERR, "%s:%d: Parameter error: *providers==NULL."
 			"This will cause memory leak."
 			, __FUNCTION__, __LINE__);
 	}
 
 	/* Now temporarily make it fixed */
 	*providers = g_list_append(*providers, g_strdup("heartbeat"));
 
 	return g_list_length(*providers);
 }
diff --git a/tools/Makefile.am b/tools/Makefile.am
index af745d46ea..5351c68b09 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -1,89 +1,91 @@
 #
 # heartbeat: Linux-HA heartbeat code
 #
 # Copyright (C) 2001 Michael Moerz
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 # 
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 MAINTAINERCLEANFILES    = Makefile.in ccdv
 
 INCLUDES                = -I$(top_builddir)/include -I$(top_srcdir)/include \
   			  -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \
   			  -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl
 
-EXTRA_DIST		= ccdv.c attrd.h ocf-tester
+EXTRA_DIST		= ccdv.c attrd.h $(hanoarch_DATA) $(sbin_SCRIPTS)
 
 apigid			= @HA_APIGID@
 habindir		= @bindir@
 halibdir		= $(libdir)/@HB_PKG@
+hanoarchdir		= @HA_NOARCHDATAHBDIR@
 gliblib			= @GLIBLIB@
 
 habin_PROGRAMS		= cl_status cl_respawn 
 halib_SCRIPTS		= haresources2cib.py
-sbin_SCRIPTS		= ocf-tester
+hanoarch_DATA		= utillib.sh README.hb_report
+sbin_SCRIPTS		= ocf-tester hb_report
 
 if CRM_BUILD
 halib_PROGRAMS		= attrd pingd
 sbin_PROGRAMS		= attrd_updater
 endif
 
 ## SOURCES
 ccdv: 	$(top_srcdir)/tools/ccdv.c
 	gcc $(AM_CFLAGS) $(CFLAGS) -o ccdv $(top_srcdir)/tools/ccdv.c
 
 cl_status_SOURCES	= cl_status.c
 # A little trick. Now ccdv can be auto-built but not auto-cleaned.
 cl_status_DEPENDENCIES  = ccdv
 cl_status_LDADD		= $(top_builddir)/lib/hbclient/libhbclient.la	\
 			  $(top_builddir)/lib/clplumbing/libplumb.la	\
 			  $(gliblib)					\
 			  $(top_builddir)/replace/libreplace.la
 
 cl_respawn_SOURCES	= cl_respawn.c
 cl_respawn_LDADD	= $(top_builddir)/lib/clplumbing/libplumb.la	\
 			  $(top_builddir)/lib/apphb/libapphb.la 	\
 			  $(gliblib)					\
 			  $(top_builddir)/replace/libreplace.la
 
 
 attrd_SOURCES		= attrd.c
 attrd_LDADD		=  				\
 		  $(top_builddir)/lib/clplumbing/libplumb.la		\
 		  $(top_builddir)/lib/crm/common/libcrmcommon.la	\
 		  $(top_builddir)/lib/hbclient/libhbclient.la		\
 		  $(top_builddir)/lib/crm/cib/libcib.la			\
 		  $(GLIBLIB)						\
 		  $(LIBRT)
 
 pingd_SOURCES		= pingd.c
 pingd_LDADD		=  				\
 		  $(top_builddir)/lib/clplumbing/libplumb.la		\
 		  $(top_builddir)/lib/crm/common/libcrmcommon.la	\
 		  $(top_builddir)/lib/hbclient/libhbclient.la		\
 		  $(GLIBLIB)						\
 		  $(LIBRT)
 
 attrd_updater_SOURCES	= attrd_updater.c
 attrd_updater_LDADD	=  				\
 		  $(top_builddir)/lib/clplumbing/libplumb.la		\
 		  $(top_builddir)/lib/crm/common/libcrmcommon.la	\
 		  $(GLIBLIB)						\
 		  $(LIBRT)
 
 install-data-hook:    # install-exec-hook doesn't work (!)
 	-chgrp $(apigid) $(DESTDIR)/$(habindir)/cl_status
 	-chmod g+s,a-w $(DESTDIR)/$(habindir)/cl_status
 
 .PHONY: install-exec-hook
diff --git a/tools/README.hb_report b/tools/README.hb_report
new file mode 100644
index 0000000000..043898184c
--- /dev/null
+++ b/tools/README.hb_report
@@ -0,0 +1,297 @@
+Heartbeat reporting
+===================
+Dejan Muhamedagic <dmuhamedagic@suse.de>
+v1.0
+
+`hb_report` is a utility to collect all information relevant to
+Heartbeat over the given period of time.
+
+Quick start
+-----------
+
+Run `hb_report` on one of the nodes or on the host which serves as
+a central log server. Run `hb_report` without parameters to see usage.
+
+A few examples:
+
+1. Last night during the backup there were several warnings
+encountered (logserver is the log host):
++
+	logserver# hb_report -f 3:00 -t 4:00 /tmp/report
++
+collects everything from all nodes from 3am to 4am last night.
+The files are stored in /tmp/report and compressed to a tarball
+/tmp/report.tar.gz.
+
+2. Just found a problem during testing:
+
+	node1# date : note the current time
+	node1# /etc/init.d/heartbeat start
+	node1# nasty_command_that_breaks_things
+	node1# sleep 120 : wait for the cluster to settle
+	node1# hb_report -f time /tmp/hb1
+
+Introduction
+------------
+
+Managing clusters is cumbersome. Heartbeat v2 with its numerous
+configuration files and multi-node clusters just adds to the
+complexity. No wonder then that most problem reports were less
+than optimal. This is an attempt to rectify that situation and
+make life easier for both the users and the developers.
+
+On security
+-----------
+
+`hb_report` is a fairly complex program. As some of you are
+probably going to run it as root let us state a few important
+things you should keep in mind:
+
+1. Don't run `hb_report` as root! It is fairly simple to setup
+things in such a way that root access is not needed. I won't go
+into details, just to stress that all information collected
+should be readable by accounts belonging the haclient group.
+
+2. If you still have to run this as root. Well, don't use the
+`-C` option.
+
+3. Of course, every possible precaution has been taken not to
+disturb processes, or touch or remove files out of the given
+destination directory. If you (by mistake) specify an existing
+directory, `hb_report` will bail out soon. If you specify a
+relative path, it won't work either. The final product of
+`hb_report` is a tarball. However, the destination directory is
+not removed on any node, unless the user specifies `-C`. If you're
+too lazy to cleanup the previous run, do yourself a favour and
+just supply a new destination directory. You've been warned. If
+you worry about the space used, just put all your directories
+under /tmp and setup a cronjob to remove those directories once a
+week:
+..........
+	for d in /tmp/*; do
+		test -d $d ||
+			continue
+		test -f $d/description.txt || test -f $d/.env ||
+			continue
+		grep -qs 'By: hb_report' $d/description.txt ||
+			grep -qs '^UNIQUE_MSG=Mark' $d/.env ||
+			continue
+		rm -r $d
+	done
+..........
+
+Mode of operation
+-----------------
+
+Cluster data collection is straightforward: just run the same
+procedure on all nodes and collect the reports. There is,
+apart from many small ones, one large complication: central
+syslog destination. So, in order to allow this to be fully
+automated, we should sometimes run the procedure on the log host
+too. Actually, if there is a log host, then the best way is to
+run `hb_report` there.
+
+We use ssh for the remote program invocation. Even though it is
+possible to run `hb_report` without ssh by doing a more menial job,
+the overall user experience is much better if ssh works. Anyway,
+how else do you manage your cluster?
+
+Another ssh related point: In case your security policy
+proscribes loghost-to-cluster-over-ssh communications, then
+you'll have to copy the log file to one of the nodes and point
+`hb_report` to it.
+
+Prerequisites
+-------------
+
+1. ssh
++
+This is not strictly required, but you won't regret having a
+password-less ssh. It is not too difficult to setup and will save
+you a lot of time. If you can't have it, for example because your
+security policy does not allow such a thing, or you just prefer
+menial work, then you will have to resort to the semi-manual
+semi-automated report generation. See below for instructions.
+
+2. Times
++
+In order to find files and messages in the given period and to
+parse the `-f` and `-t` options, `hb_report` uses perl and one of the
+`Date::Parse` or `Date::Manip` perl modules. Note that you need
+only one of these.
++
+On rpm based distributions, you can find `Date::Parse` in
+`perl-TimeDate` and on Debian and its derivatives in
+`libtimedate-perl`.
+
+3. Core dumps
++
+To backtrace core dumps gdb is needed and the Heartbeat packages
+with the debugging info. The debug info packages may be installed
+at the time the report is created. Let's hope that you will need
+this really seldom.
+
+What is in the report
+---------------------
+
+1. Heartbeat related
+- heartbeat version/release information
+- heartbeat configuration (CIB, ha.cf, logd.cf)
+- heartbeat status (output from crm_mon, crm_verify, ccm_tool)
+- pengine transition graphs (if any)
+- backtraces of core dumps (if any)
+- heartbeat logs (if any)
+2. System related
+- general platform information (`uname`, `arch`, `distribution`)
+- system statistics (`uptime`, `top`, `ps`)
+3. User created :)
+- problem description (template to be edited)
+4. Generated
+- problem analysis (generated)
+
+It is preferred that the Heartbeat is running at the time of the
+report, but not absolutely required. `hb_report` will also do a
+quick analysis of the collected information.
+
+Times
+-----
+
+Specifying times can at times be a nuisance. That is why we have
+chosen to use one of the perl modules--they do allow certain
+freedom when talking dates. You can either read the instructions
+at the
+http://search.cpan.org/dist/TimeDate/lib/Date/Parse.pm#EXAMPLE_DATES[Date::Parse
+examples page].
+
+or just rely on common sense and try stuff like:
+
+	3:00          (today at 3am)
+	15:00         (today at 3pm)
+	2007/9/1 2pm  (September 1st at 2pm)
+
+`hb_report` will (probably) complain if it can't figure out what do
+you mean.
+
+Try to delimit the event as close as possible in order to reduce
+the size of the report, but still leaving a minute or two around
+for good measure.
+
+Note that `-f` is not an optional option. And don't forget to quote
+dates when they contain spaces.
+
+Should I send all this to the rest of Internet?
+-----------------------------------------------
+
+We make an effort to remove sensitive data from the Heartbeat
+configuration (CIB, ha.cf, and transition graphs). However, you
+_have_ to tell us what is sensitive! Use the `-p` option to specify
+additional regular expressions to match variable names which may
+contain information you don't want to leak. For example:
+
+	# hb_report -f 18:00 -p "user.*" -p "secret.*" /var/tmp/report
+
+We look by default for variable names matching "pass.*" and the
+stonith_host ha.cf directive.
+
+Logs and other files are not filtered. Please filter them
+yourself if necessary.
+
+Logs
+----
+
+It may be tricky to find syslog logs. The scheme used is to log a
+unique message on all nodes and then look it up in the usual
+syslog locations. This procedure is not foolproof, in particular
+if the syslog files are in a non-standard directory. We look in
+/var/log /var/logs /var/syslog /var/adm /var/log/ha
+/var/log/cluster. In case we can't find the logs, please supply
+their location:
+
+	# hb_report -f 5pm -l /var/log/cluster1/ha-log -S /tmp/report_node1
+
+If you have different log locations on different nodes, well,
+perhaps you'd like to make them the same. Or read about the
+manual report collection. 
+
+The log files are collected from all hosts where found. In case
+your syslog is configured to log to both the log server and local
+files and `hb_report` is run on the log server you will end up with
+multiple logs with same content.
+
+Files starting with "ha-" are preferred. In case syslog sends
+messages to more than one file, if one of them is named ha-log or
+ha-debug those will be favoured to syslog or messages.
+
+If there is no separate log for Heartbeat, possibly unrelated
+messages from other programs are included. We don't filter logs,
+just pick a segment for the period you specified.
+
+NB: Don't have a central log host? Read the CTS README and setup
+one.
+
+Manual report collection
+------------------------
+
+So, your ssh doesn't work. In that case, you will have to run
+this procedure on all nodes. Use `-S` so that we don't bother with
+ssh:
+
+	# hb_report -f 5:20pm -t 5:30pm -S /tmp/report_node1
+
+If you also have a log host which is not in the cluster, then
+you'll have to copy the log to one of the nodes and tell us where
+it is:
+
+	# hb_report -f 5:20pm -t 5:30pm -l /var/tmp/ha-log -S /tmp/report_node1
+
+Furthermore, to prevent `hb_report` from asking you to edit the
+report to describe the problem on every node use `-D` on all but
+one:
+
+	# hb_report -f 5:20pm -t 5:30pm -DS /tmp/report_node1
+
+If you reconsider and want the ssh setup, take a look at the CTS
+README file for instructions.
+
+Analysis
+--------
+
+The point of analysis is to get out the most important
+information from probably several thousand lines worth of text.
+Perhaps this should be more properly named as report review as it
+is rather simple, but let's pretend that we are doing something
+utterly sophisticated.
+
+The analysis consists of the following:
+
+- compare files coming from different nodes; if they are equal,
+  make one copy in the top level directory, remove duplicates,
+  and create soft links instead
+- print errors, warnings, and lines matching `-L` patterns from logs
+- report if there were coredumps and by whom
+- report crm_verify results
+
+The goods
+---------
+
+1. Common
++
+- ha-log (if found on the log host)
+- description.txt (template and user report)
+- analysis.txt
+
+2. Per node
++
+- ha.cf
+- logd.cf
+- ha-log (if found)
+- cib.xml (`cibadmin -Ql` or `cp` if Heartbeat is not running)
+- ccm_tool.txt (`ccm_tool -p`)
+- crm_mon.txt (`crm_mon -1`)
+- crm_verify.txt (`crm_verify -V`)
+- pengine/ (only on DC, directory with pengine transitions)
+- sysinfo.txt (static info)
+- sysstats.txt (dynamic info)
+- backtraces.txt (if coredumps found)
+- DC (well...)
+
diff --git a/tools/hb_report.in b/tools/hb_report.in
new file mode 100755
index 0000000000..c02a3df378
--- /dev/null
+++ b/tools/hb_report.in
@@ -0,0 +1,608 @@
+#!/bin/sh
+
+ # Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
+ # 
+ # This program is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU General Public
+ # License as published by the Free Software Foundation; either
+ # version 2.1 of the License, or (at your option) any later version.
+ # 
+ # This software is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # General Public License for more details.
+ # 
+ # You should have received a copy of the GNU General Public
+ # License along with this library; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ #
+
+. @sysconfdir@/ha.d/shellfuncs
+. $HA_NOARCHBIN/utillib.sh
+
+PROG=`basename $0`
+# FIXME: once this is part of the package!
+PROGDIR=`dirname $0`
+echo "$PROGDIR" | grep -qs '^/' || {
+	test -f @sbindir@/$PROG &&
+		PROGDIR=@sbindir@
+	test -f $HA_NOARCHBIN/$PROG &&
+		PROGDIR=$HA_NOARCHBIN
+}
+
+LOGD_CF=`findlogdcf @sysconfdir@ $HA_DIR`
+export LOGD_CF
+
+: ${SSH_OPTS="-T -o Batchmode=yes"}
+LOG_PATTERNS="CRIT: ERROR:"
+
+#
+# the instance where user runs hb_report is the master
+# the others are slaves
+#
+if [ x"$1" = x__slave ]; then
+	SLAVE=1
+fi
+
+#
+# if this is the master, allow ha.cf and logd.cf in the current dir
+# (because often the master is the log host)
+#
+if [ "$SLAVE" = "" ]; then
+	[ -f ha.cf ] && HA_CF=ha.cf
+	[ -f logd.cf ] && LOGD_CF=logd.cf
+fi
+
+usage() {
+	cat<<EOF
+usage: hb_report -f time [-t time] [-u user] [-l file] [-p patt] [-L patt]
+       [-e prog] [-SDC] dest
+
+	-f time: time to start from
+	-t time: time to finish at (dflt: now)
+	-u user: ssh user to access other nodes (dftl: hacluster)
+	-l file: log file
+	-p patt: regular expression to match variables to be removed;
+	         this option is additive (dflt: "passw.*")
+	-L patt: regular expression to match in log files for analysis;
+	         this option is additive (dflt: $LOG_PATTERNS)
+	-e prog: your favourite editor
+	-D     : don't invoke editor to write description
+	-C     : remove the destination directory
+	-S     : single node operation; don't try to start report
+	         collectors on other nodes
+	dest   : destination directory
+EOF
+
+[ "$1" != short ] &&
+	cat<<EOF
+
+	. the multifile output is first stored in a directory {dest}
+	  of which a tarball {dest}.tar.gz is created
+	. the time specification is as in either Date::Parse or
+	  Date::Manip, whatever you have installed; Date::Parse is
+	  preferred
+	. we try to figure where is the logfile; if we can't, please
+	  clue us in
+
+	Examples
+
+	  hb_report -f 2pm /tmp/report_1
+	  hb_report -f "2007/9/5 12:30" -t "2007/9/5 14:00" /tmp/report_2
+	  hb_report -f 1:00 -t 3:00 -l /var/log/cluster/ha-debug /tmp/report_3
+	  hb_report -f "09sep07 2:00" -u hbadmin /tmp/report_4
+	  hb_report -f 18:00 -p "usern.*" -p "admin.*" /tmp/report_5
+
+	. WARNING . WARNING . WARNING . WARNING . WARNING . WARNING .
+	  We try to sanitize the CIB and the peinputs files. If you
+	  have more sensitive information, please supply additional
+	  patterns yourself. The logs and the crm_mon, ccm_tool, and
+	  crm_verify output are *not* sanitized.
+	  IT IS YOUR RESPONSIBILITY TO PROTECT THE DATA FROM EXPOSURE!
+EOF
+	exit
+}
+#
+# these are "global" variables
+#
+setvarsanddefaults() {
+	now=`perl -e 'print time()'`
+	# used by all
+	DESTDIR=""
+	FROM_TIME=""
+	TO_TIME=0
+	HA_LOG=""
+	UNIQUE_MSG="Mark:HB_REPORT:$now"
+	SANITIZE="passw.*"
+	REMOVE_DEST=""
+	# used only by the master
+	NO_SSH=""
+	SSH_USER=""
+	TRY_SSH="hacluster"
+	SLAVEPIDS=""
+	NO_DESCRIPTION=""
+}
+chkdirname() {
+	[ "$1" ] || usage short
+	[ $# -ne 1 ] && fatal "bad directory name: $1"
+	echo $1 | grep -qs '^/' ||
+		fatal "destination directory must be an absolute path"
+	[ "$1" = / ] &&
+		fatal "no root here, thank you"
+}
+chktime() {
+	[ "$1" ] || fatal "bad time specification: $2"
+}
+msgcleanup() {
+	fatal "destination directory $DESTDIR exists, please cleanup"
+}
+nodistdirectory() {
+	fatal "could not create the destination directory $DESTDIR"
+}
+time2str() {
+	perl -e "use POSIX; print strftime('%x %X',localtime($1));"
+}
+
+#
+# find log files
+#
+logmarks() {
+	sev=$1 msg=$2
+	forall "logger -p $HA_LOGFACILITY.$sev $msg"
+}
+findlog() {
+	if [ "$HA_LOGFACILITY" ]; then
+		findmsg $UNIQUE_MSG | awk '{print $1}'
+	else
+		echo ${HA_DEBUGFILE:-$HA_LOGFILE}
+	fi
+}
+
+#
+# this is how we pass environment to other hosts
+#
+dumpenv() {
+	cat<<EOF
+FROM_TIME=$FROM_TIME
+TO_TIME=$TO_TIME
+HA_LOG=$HA_LOG
+DESTDIR=$DESTDIR
+UNIQUE_MSG=$UNIQUE_MSG
+SANITIZE="$SANITIZE"
+REMOVE_DEST="$REMOVE_DEST"
+EOF
+}
+send_config() {
+	for node in `getnodes`; do
+		[ "$node" = "$WE" ] && continue
+		dumpenv |
+		ssh $SSH_OPTS $SSH_USER@$node "mkdir -p $DESTDIR; cat > $DESTDIR/.env"
+	done
+}
+start_remote_collectors() {
+	for node in `getnodes`; do
+		[ "$node" = "$WE" ] && continue
+		ssh $SSH_OPTS $SSH_USER@$node "$PROGDIR/hb_report __slave $DESTDIR" |
+			(cd $DESTDIR && tar xf -) &
+		SLAVEPIDS="$SLAVEPIDS $!"
+	done
+}
+
+#
+# does ssh work?
+#
+findsshuser() {
+	for n in `getnodes`; do
+		[ "$node" = "$WE" ] && continue
+		trysshusers $n $TRY_SSH && break
+	done
+}
+checkssh() {
+	for n in `getnodes`; do
+		[ "$node" = "$WE" ] && continue
+		checksshuser $n $SSH_USER || return 1
+	done
+	return 0
+}
+
+#
+# the usual stuff
+#
+getbacktraces() {
+	flist=`find_files $HA_VARLIB/cores $1 $2`
+	[ "$flist" ] &&
+		getbt $flist > $3
+}
+getpeinputs() {
+	n=`basename $3`
+	flist=$(
+	if [ -f $3/ha-log ]; then
+		grep " $n peng.*PEngine Input stored" $3/ha-log | awk '{print $NF}'
+	else
+		find_files $HA_VARLIB/pengine $1 $2
+	fi | sed "s,$HA_VARLIB/,,g"
+	)
+	[ "$flist" ] &&
+		(cd $HA_VARLIB && tar cf - $flist) | (cd $3 && tar xf -)
+}
+touch_DC_if_dc() {
+	dc=`crmadmin -D 2>/dev/null | awk '{print $NF}'`
+	if [ "$WE" = "$dc" ]; then
+		touch $1/DC
+	fi
+}
+
+#
+# some basic system info and stats
+#
+sys_info() {
+	echo "Heartbeat version: `hb_ver`"
+	crm_info
+	echo "Platform: `uname`"
+	echo "Kernel release: `uname -r`"
+	echo "Architecture: `arch`"
+	[ `uname` = Linux ] &&
+		echo "Distribution: `distro`"
+}
+sys_stats() {
+	set -x
+	uptime
+	ps axf
+	ps auxw
+	top -b -n 1
+	netstat -i
+	set +x
+}
+
+#
+# replace sensitive info with '****'
+#
+sanitize() {
+	for f in $1/ha.cf $1/cib.xml $1/pengine/*; do
+		[ -f "$f" ] && sanitize_one $f
+	done
+}
+
+#
+# remove duplicates if files are same, make links instead
+#
+consolidate() {
+	for n in `getnodes`; do
+		if [ -f $1/$2 ]; then
+			rm $1/$n/$2
+		else
+			mv $1/$n/$2 $1
+		fi
+		ln -s ../$2 $1/$n
+	done
+}
+
+#
+# some basic analysis of the report
+#
+checkcrmvfy() {
+	for n in `getnodes`; do
+		if [ -s $1/$n/crm_verify.txt ]; then
+			echo "WARN: crm_verify reported warnings at $n:"
+			cat $1/$n/crm_verify.txt
+		fi
+	done
+}
+checkbacktraces() {
+	for n in `getnodes`; do
+		[ -s $1/$n/backtraces.txt ] && {
+			echo "WARN: coredumps found at $n:"
+			egrep 'Core was generated|Program terminated' \
+					$1/$n/backtraces.txt |
+				sed 's/^/	/'
+		}
+	done
+}
+checklogs() {
+	logs=`find $1 -name ha-log`
+	[ "$logs" ] || return
+	pattfile=`maketempfile` ||
+		fatal "cannot create temporary files"
+	for p in $LOG_PATTERNS; do
+		echo "$p"
+	done > $pattfile
+	echo ""
+	echo "Log patterns:"
+	for n in `getnodes`; do
+		cat $logs | grep -f $pattfile
+	done
+	rm -f $pattfile
+}
+
+#
+# check if files have same content in the cluster
+#
+cibdiff() {
+	crm_diff -c -n $1 -o $2
+}
+txtdiff() {
+	diff $1 $2
+}
+diffcheck() {
+	case `basename $1` in
+	ccm_tool.txt)
+		txtdiff $1 $2;; # worddiff?
+	cib.xml)
+		cibdiff $1 $2;;
+	ha.cf)
+		txtdiff $1 $2;; # confdiff?
+	crm_mon.txt|sysinfo.txt)
+		txtdiff $1 $2;;
+	esac
+}
+analyze_one() {
+	rc=0
+	node0=""
+	for n in `getnodes`; do
+		if [ "$node0" ]; then
+			diffcheck $1/$node0/$2 $1/$n/$2
+			rc=$((rc+$?))
+		else
+			node0=$n
+		fi
+	done
+	return $rc
+}
+analyze() {
+	flist="ccm_tool.txt cib.xml crm_mon.txt ha.cf sysinfo.txt"
+	for f in $flist; do
+		perl -e "printf \"Diff $f... \""
+		ls $1/*/$f >/dev/null 2>&1 || continue
+		if analyze_one $1 $f; then
+			echo "OK"
+			consolidate $1 $f
+		else
+			echo "varies"
+		fi
+	done
+	checkcrmvfy $1
+	checkbacktraces $1
+	checklogs $1
+}
+
+#
+# description template, editing, and other notes
+#
+mktemplate() {
+	cat<<EOF
+Please edit this template and describe the issue/problem you
+encountered. Then, post to
+	Linux-HA@lists.linux-ha.org
+or file a bug at
+	http://old.linux-foundation.org/developer_bugzilla/
+
+See http://linux-ha.org/ReportingProblems for detailed
+description on how to report problems.
+
+Thank you.
+
+Date: `date`
+By: $PROG $userargs
+Subject: [short problem description]
+Severity: [choose one] enhancement minor normal major critical blocking
+Component: [choose one] CRM LRM CCM RA fencing comm GUI other
+
+Detailed description:
+---
+[...]
+---
+
+$(
+if [ -f $DESTDIR/sysinfo.txt ]; then
+	cat $DESTDIR/sysinfo.txt
+else
+	for n in `getnodes`; do
+		[ -f $DESTDIR/$n/sysinfo.txt ] &&
+			echo "Info $n:"; sed 's/^/	/' $DESTDIR/$n/sysinfo.txt
+	done
+fi
+)
+EOF
+}
+edittemplate() {
+	if ec=`pickfirst $EDITOR vim vi emacs nano`; then
+		$ec $1
+	else
+		warning "could not find a text editor"
+	fi
+}
+finalword() {
+	cat<<EOF
+The report is saved in $DESTDIR.tar.gz.
+
+Thank you for taking time to create this report.
+EOF
+}
+checksize() {
+	ls -s $DESTDIR.tar.gz | awk '$1>=100{exit 1}' ||
+		cat <<EOF
+
+NB: size of the tarball exceeds 100kb and if posted to the
+mailing list will have to be first approved by the moderator.
+Try reducing the period (use the -f and -t options).
+EOF
+}
+
+[ $# -eq 0 ] && usage
+
+# check for the major prereq
+t=`str2time "12:00"`
+if [ "$t" = "" ]; then
+	fatal "please install the perl Date::Parse module"
+fi
+
+WE=`uname -n`  # who am i?
+THIS_IS_NODE=""
+getnodes | grep -wqs $WE && # are we a node?
+	THIS_IS_NODE=1
+getlogvars
+
+#
+# part 1: get and check options; and the destination
+#
+if [ "$SLAVE" = "" ]; then
+	setvarsanddefaults
+	userargs="$@"
+	args=`getopt -o f:t:l:u:p:L:e:SDCh -- "$@"`
+	[ $? -ne 0 ] && usage
+	eval set -- "$args"
+	while [ x"$1" != x ]; do
+		case "$1" in
+			-h) usage;;
+			-f) FROM_TIME=`str2time "$2"`
+			    chktime "$FROM_TIME" "$2"
+				shift 2;;
+			-t) TO_TIME=`str2time "$2"`
+			    chktime "$TO_TIME" "$2"
+				shift 2;;
+			-u) SSH_USER="$2"; shift 2;;
+			-l) HA_LOG="$2"; shift 2;;
+			-e) EDITOR="$2"; shift 2;;
+			-p) SANITIZE="$SANITIZE $2"; shift 2;;
+			-L) LOG_PATTERNS="$LOG_PATTERNS $2"; shift 2;;
+			-S) NO_SSH=1; shift 1;;
+			-D) NO_DESCRIPTION=1; shift 1;;
+			-C) REMOVE_DEST=1; shift 1;;
+			--) shift 1; break;;
+			*) usage short;;
+		esac
+	done
+	[ $# -ne 1 ] && usage short
+	DESTDIR=$1
+	chkdirname $DESTDIR
+	[ "$FROM_TIME" ] || usage short
+fi
+
+# this only on master
+if [ "$SLAVE" = "" ]; then
+#
+# part 2: ssh business
+#
+	# find out if ssh works
+	if [ "$NO_SSH" = "" ]; then
+		[ "$SSH_USER" ] ||
+			SSH_USER=`findsshuser`
+		[ "$SSH_USER" ] && checkssh || # check if it works on _all_ nodes
+			SSH_USER=""
+	fi
+# final check: don't run if the destination directory exists
+	[ -d $DESTDIR ] && msgcleanup
+	[ "$SSH_USER" ] &&
+		for node in `getnodes`; do
+			[ "$node" = "$WE" ] && continue
+			ssh $SSH_OPTS $SSH_USER@$node "test -d $DESTDIR" &&
+				msgcleanup
+		done
+fi
+
+if [ "$SLAVE" ]; then
+	DESTDIR=$2
+	[ -d $DESTDIR ] || nodistdirectory
+	. $DESTDIR/.env
+else
+	mkdir -p $DESTDIR
+	[ -d $DESTDIR ] || nodistdirectory
+fi
+
+if [ "$SLAVE" = "" ]; then
+#
+# part 3: log marks to be searched for later
+#         important to do this now on _all_ nodes
+# 
+	if [ "$HA_LOGFACILITY" ]; then
+		sev="info"
+		cfdebug=`getcfvar debug` # prefer debuglog if set
+		[ "$cfdebug" -a "$cfdebug" -gt 0 ] &&
+			sev="debug"
+		logmarks $sev $UNIQUE_MSG
+	fi
+#
+# part 4: start this program on other nodes
+#
+	if [ "$SSH_USER" ]; then
+		send_config
+		start_remote_collectors
+	else
+		[ `getnodes | wc -w` -gt 1 ] &&
+			warning "ssh does not work to all nodes"
+	fi
+fi
+
+# only cluster nodes need their own directories
+[ "$THIS_IS_NODE" ] && mkdir -p $DESTDIR/$WE
+
+#
+# part 5: find the logs and cut out the segment for the period
+#
+if [ "$HA_LOG" ]; then  # log provided by the user?
+	[ -f "$HA_LOG" ] || {  # not present
+		[ "$SLAVE" ] ||  # warning if not on slave
+			warning "$HA_LOG not found; we will try to find log ourselves"
+		HA_LOG=""
+	}
+fi
+if [ "$HA_LOG" = "" ]; then
+	HA_LOG=`findlog`
+	[ "$HA_LOG" ] &&
+		cnt=`fgrep -c $UNIQUE_MSG < $HA_LOG`
+fi
+nodecnt=`getnodes | wc -w`
+if [ "$cnt" ] && [ $cnt -eq $nodecnt ]; then
+	info "found the central log!"
+	info "you can ignore warnings about missing logs"
+fi
+
+if [ -f "$HA_LOG" ]; then
+	dumplog $HA_LOG $FROM_TIME $TO_TIME |
+	if [ "$THIS_IS_NODE" ]; then
+		cat > $DESTDIR/$WE/ha-log
+	else
+		cat > $DESTDIR/ha-log # we are log server, probably
+	fi
+else
+	warning "could not find the log file on $WE"
+fi
+
+#
+# part 6: get all other info (config, stats, etc)
+#
+if [ "$THIS_IS_NODE" ]; then
+	getconfig $DESTDIR/$WE
+	getpeinputs $FROM_TIME $TO_TIME $DESTDIR/$WE
+	getbacktraces $FROM_TIME $TO_TIME $DESTDIR/$WE/backtraces.txt
+	touch_DC_if_dc $DESTDIR/$WE
+	sanitize $DESTDIR/$WE
+	sys_info > $DESTDIR/$WE/sysinfo.txt
+	sys_stats > $DESTDIR/$WE/sysstats.txt 2>&1
+fi
+
+#
+# part 7: endgame:
+#         slaves tar their results to stdout, the master waits
+#         for them, analyses results, asks the user to edit the
+#         problem description template, and prints final notes
+#
+if [ "$SLAVE" ]; then
+	(cd $DESTDIR && tar cf - $WE)
+else
+	wait $SLAVEPIDS
+	analyze $DESTDIR > $DESTDIR/analysis.txt
+	mktemplate > $DESTDIR/description.txt
+	[ "$NO_DESCRIPTION" ] || {
+		echo press enter to edit the problem description...
+		read junk
+		edittemplate $DESTDIR/description.txt
+	}
+	cd $DESTDIR/..
+	tar czf $DESTDIR.tar.gz $DESTDIR/
+	finalword
+	checksize
+fi
+
+[ "$REMOVE_DEST" ] &&
+	rm -r $DESTDIR
diff --git a/tools/utillib.sh b/tools/utillib.sh
new file mode 100644
index 0000000000..05e259120a
--- /dev/null
+++ b/tools/utillib.sh
@@ -0,0 +1,384 @@
+ # Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
+ # 
+ # This program is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU General Public
+ # License as published by the Free Software Foundation; either
+ # version 2.1 of the License, or (at your option) any later version.
+ # 
+ # This software is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ # General Public License for more details.
+ # 
+ # You should have received a copy of the GNU General Public
+ # License along with this library; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ #
+
+#
+# ha.cf/logd.cf parsing
+#
+getcfvar() {
+	[ -f $HA_CF ] || return
+	sed 's/#.*//' < $HA_CF |
+		grep -w "^$1" |
+		sed 's/^[^[:space:]]*[[:space:]]*//'
+}
+iscfvarset() {
+	test "`getcfvar \"$1\"`"
+}
+iscfvartrue() {
+	getcfvar "$1" |
+		egrep -qsi "^(true|y|yes|on|1)"
+}
+getnodes() {
+	getcfvar node
+}
+
+#
+# ssh
+#
+checksshuser() {
+	ssh -o Batchmode=yes $2@$1 true 2>/dev/null
+}
+trysshusers() {
+	n=$1
+	shift 1
+	for u; do
+		if checksshuser $n $u; then
+			echo $u
+			break
+		fi
+	done
+}
+
+#
+# logging
+#
+syslogmsg() {
+	severity=$1
+	shift 1
+	logtag=""
+	[ "$HA_LOGTAG" ] && logtag="-t $HA_LOGTAG"
+	logger -p ${HA_LOGFACILITY:-"daemon"}.$severity $logtag $*
+}
+
+#
+# find log destination
+#
+uselogd() {
+	iscfvartrue use_logd &&
+		return 0  # if use_logd true
+	iscfvarset logfacility ||
+	iscfvarset logfile ||
+	iscfvarset debugfile ||
+		return 0  # or none of the log options set
+	false
+}
+findlogdcf() {
+	for f in \
+		`which strings > /dev/null 2>&1 &&
+			strings $HA_BIN/ha_logd | grep 'logd\.cf'` \
+		`for d; do echo $d/logd.cf $d/ha_logd.cf; done`
+	do
+		if [ -f "$f" ]; then
+			echo $f
+			return 0
+		fi
+	done
+	return 1
+}
+getlogvars() {
+	savecf=$HA_CF
+	if uselogd; then
+		[ -f "$LOGD_CF" ] ||
+			fatal "could not find logd.cf or ha_logd.cf"
+		HA_CF=$LOGD_CF
+	fi
+	HA_LOGFACILITY=`getcfvar logfacility`
+	HA_LOGFILE=`getcfvar logfile`
+	HA_DEBUGFILE=`getcfvar debugfile`
+	HA_SYSLOGMSGFMT=""
+	iscfvartrue syslogmsgfmt &&
+		HA_SYSLOGMSGFMT=1
+	HA_CF=$savecf
+}
+findmsg() {
+	# this is tricky, we try a few directories
+	syslogdir="/var/log /var/logs /var/syslog /var/adm /var/log/ha /var/log/cluster"
+	favourites="ha-*"
+	mark=$1
+	log=""
+	for d in $syslogdir; do
+		[ -d $d ] || continue
+		log=`fgrep -l "$mark" $d/$favourites` && break
+		log=`fgrep -l "$mark" $d/*` && break
+	done 2>/dev/null
+	echo $log
+}
+
+#
+# print a segment of a log file
+#
+str2time() {
+	perl -e "\$time='$*';" -e '
+	eval "use Date::Parse";
+	if (!$@) {
+		print str2time($time);
+	} else {
+		eval "use Date::Manip";
+		if (!$@) {
+			print UnixDate(ParseDateString($time), "%s");
+		}
+	}
+	'
+}
+getstamp() {
+	if [ "$HA_SYSLOGMSGFMT" -o "$HA_LOGFACILITY" ]; then
+		awk '{print $1,$2,$3}'
+	else
+		awk '{print $2}' | sed 's/_/ /'
+	fi
+}
+linetime() {
+	l=`tail -n +$2 $1 | head -1 | getstamp`
+	str2time "$l"
+}
+findln_by_time() {
+	logf=$1
+	tm=$2
+	first=1
+	last=`wc -l < $logf`
+	while [ $first -le $last ]; do
+		mid=$(((last+first)/2))
+		tmid=`linetime $logf $mid`
+		if [ -z "$tmid" ]; then
+			warning "cannot extract time: $logf:$mid"
+			return
+		fi
+		if [ $tmid -gt $tm ]; then
+			last=$((mid-1))
+		elif [ $tmid -lt $tm ]; then
+			first=$((mid+1))
+		else
+			break
+		fi
+	done
+	echo $mid
+}
+dumplog() {
+	logf=$1
+	from_time=$2
+	to_time=$3
+	from_line=`findln_by_time $logf $from_time`
+	if [ -z "$from_line" ]; then
+		warning "couldn't find line for time $from_time; corrupt log file?"
+		return
+	fi
+	tail -n +$from_line $logf |
+		if [ "$to_time" != 0 ]; then
+			to_line=`findln_by_time $logf $to_time`
+			if [ -z "$to_line" ]; then
+				warning "couldn't find line for time $to_time; corrupt log file?"
+				return
+			fi
+			head -$((to_line-from_line+1))
+		else
+			cat
+		fi
+}
+
+#
+# find files newer than a and older than b
+#
+touchfile() {
+	t=`maketempfile` &&
+	perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' &&
+	echo $t
+}
+find_files() {
+	dir=$1
+	from_time=$2
+	to_time=$3
+	from_stamp=`touchfile $from_time`
+	findexp="-newer $from_stamp"
+	if [ "$to_time" -a "$to_time" -gt 0 ]; then
+		to_stamp=`touchfile $to_time`
+		findexp="$findexp ! -newer $to_stamp"
+	fi
+	find $dir -type f $findexp
+	rm -f $from_stamp $to_stamp
+}
+
+#
+# coredumps
+#
+findbinary() {
+	random_binary=`which cat 2>/dev/null` # suppose we are lucky
+	binary=`gdb $random_binary $1 < /dev/null 2>/dev/null |
+		grep 'Core was generated' | awk '{print $5}' |
+		sed "s/^.//;s/[.']*$//"`
+	[ x = x"$binary" ] && return
+	fullpath=`which $binary 2>/dev/null`
+	if [ x = x"$fullpath" ]; then
+		[ -x $HA_BIN/$binary ] && echo $HA_BIN/$binary
+	else
+		echo $fullpath
+	fi
+}
+getbt() {
+	which gdb > /dev/null 2>&1 || {
+		warning "please install gdb to get backtraces"
+		return
+	}
+	for corefile; do
+		absbinpath=`findbinary $corefile`
+		[ x = x"$absbinpath" ] && return 1
+		echo "====================== start backtrace ======================"
+		ls -l $corefile
+		gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \
+			$absbinpath $corefile 2>/dev/null
+		echo "======================= end backtrace ======================="
+	done
+}
+
+#
+# heartbeat configuration/status
+#
+iscrmrunning() {
+	crmadmin -D >/dev/null 2>&1
+}
+dumpstate() {
+	crm_mon -1 | grep -v '^Last upd' > $1/crm_mon.txt
+	cibadmin -Ql > $1/cib.xml
+	ccm_tool -p > $1/ccm_tool.txt 2>&1
+}
+getconfig() {
+	cp -p $HA_CF $1/
+	[ -f $LOGD_CF ] &&
+		cp -p $LOGD_CF $1/
+	if iscrmrunning; then
+		dumpstate $1
+	else
+		cp -p $HA_VARLIB/crm/cib.xml $1/ 2>/dev/null
+	fi
+	[ -f "$1/cib.xml" ] &&
+		crm_verify -V -x $1/cib.xml >$1/crm_verify.txt 2>&1
+}
+
+#
+# remove values of sensitive attributes
+#
+# this is not proper xml parsing, but it will work under the
+# circumstances
+sanitize_xml_attrs() {
+	sed $(
+	for patt in $SANITIZE; do
+		echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/"
+	done
+	)
+}
+sanitize_hacf() {
+	awk '
+	$1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; }
+	{print}
+	'
+}
+sanitize_one() {
+	file=$1
+	compress=""
+	echo $file | grep -qs 'gz$' && compress=gzip
+	echo $file | grep -qs 'bz2$' && compress=bzip2
+	if [ "$compress" ]; then
+		decompress="$compress -dc"
+	else
+		compress=cat
+		decompress=cat
+	fi
+	tmp=`maketempfile` && ref=`maketempfile` ||
+		fatal "cannot create temporary files"
+	touch -r $file $ref  # save the mtime
+	if [ "`basename $file`" = ha.cf ]; then
+		sanitize_hacf
+	else
+		$decompress | sanitize_xml_attrs | $compress
+	fi < $file > $tmp
+	mv $tmp $file
+	touch -r $ref $file
+	rm -f $ref
+}
+
+#
+# keep the user posted
+#
+fatal() {
+	echo "ERROR: $*" >&2
+	exit 1
+}
+warning() {
+	echo "WARN: $*" >&2
+}
+info() {
+	echo "INFO: $*" >&2
+}
+pickfirst() {
+	for x; do
+		which $x >/dev/null 2>&1 && {
+			echo $x
+			return 0
+		}
+	done
+	return 1
+}
+
+#
+# run a command everywhere
+#
+forall() {
+	c="$*"
+	for n in `getnodes`; do
+		if [ "$n" = "`uname -n`" ]; then
+			$c
+		else
+			if [ "$SSH_USER" ]; then
+				echo $c | ssh $SSH_OPTS $SSH_USER@$n
+			fi
+		fi
+	done
+}
+
+#
+# get some system info
+#
+distro() {
+	which lsb_release >/dev/null 2>&1 && {
+		lsb_release -d
+		return
+	}
+	relf=`ls /etc/debian_version 2>/dev/null` ||
+	relf=`ls /etc/slackware-version 2>/dev/null` ||
+	relf=`ls -d /etc/*-release 2>/dev/null` && {
+		for f in $relf; do
+			test -f $f && {
+				echo "`ls $f` `cat $f`"
+				return
+			}
+		done
+	}
+	warning "no lsb_release no /etc/*-release no /etc/debian_version"
+}
+hb_ver() {
+	which dpkg > /dev/null 2>&1 && {
+		dpkg-query -f '${Version}' -W heartbeat 2>/dev/null ||
+			dpkg-query -f '${Version}' -W heartbeat-2
+		return
+	}
+	which rpm > /dev/null 2>&1 && {
+		rpm -q --qf '%{version}' heartbeat
+		return
+	}
+	# more packagers?
+}
+crm_info() {
+	$HA_BIN/crmd version 2>&1
+}