No OneTemporary
Actions

Size

417 KB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

This document is not UTF8. It was detected as ISO-8859-1 (Latin 1) and converted to UTF8 for display.

	diff --git a/crmd/control.c b/crmd/control.c
	index cee8a70588..242eaa2c57 100644
	--- a/crmd/control.c
	+++ b/crmd/control.c
	@@ -1,916 +1,919 @@
	/*
	* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This software is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* General Public License for more details.
	*
	* You should have received a copy of the GNU General Public
	* License along with this library; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#include <crm_internal.h>

	#include <sys/param.h>

	#include <crm/crm.h>
	#include <crm/cib.h>
	#include <crm/msg_xml.h>

	#include <crm/pengine/rules.h>
	#include <crm/common/cluster.h>
	+#include "../lib/common/stack.h"

	#include <crmd.h>
	#include <crmd_fsa.h>
	#include <fsa_proto.h>
	#include <crmd_messages.h>
	#include <crmd_callbacks.h>
	#include <crmd_lrm.h>
	#include <tengine.h>

	#include <sys/types.h>
	#include <sys/stat.h>


	char *ipc_server = NULL;

	extern gboolean crm_connect_corosync(void);
	extern void crmd_ha_connection_destroy(gpointer user_data);

	void crm_shutdown(int nsig);
	gboolean crm_read_options(gpointer user_data);

	gboolean fsa_has_quorum = FALSE;
	GHashTable *ipc_clients = NULL;
	crm_trigger_t *fsa_source = NULL;
	crm_trigger_t *config_read = NULL;

	/* A_HA_CONNECT */
	void
	do_ha_control(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input,
	fsa_data_t *msg_data)
	{
	gboolean registered = FALSE;

	if(action & A_HA_DISCONNECT) {
	if(is_openais_cluster()) {
	crm_peer_destroy();
	+ terminate_ais_connection();
	crm_info("Disconnected from OpenAIS");
	+
	#if SUPPORT_HEARTBEAT
	} else if(fsa_cluster_conn != NULL) {
	set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED);
	fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE);
	crm_info("Disconnected from Heartbeat");
	#endif
	}
	}

	if(action & A_HA_CONNECT) {
	crm_set_status_callback(&ais_status_callback);

	if(is_openais_cluster()) {
	#if SUPPORT_COROSYNC
	registered = crm_connect_corosync();
	#endif
	} else if(is_heartbeat_cluster()) {
	#if SUPPORT_HEARTBEAT
	registered = crm_cluster_connect(
	&fsa_our_uname, &fsa_our_uuid, crmd_ha_msg_callback, crmd_ha_connection_destroy,
	&fsa_cluster_conn);
	#endif
	}


	#if SUPPORT_HEARTBEAT
	if(is_heartbeat_cluster()) {
	crm_debug_3("Be informed of Node Status changes");
	if (registered &&
	fsa_cluster_conn->llc_ops->set_nstatus_callback(
	fsa_cluster_conn, crmd_ha_status_callback,
	fsa_cluster_conn) != HA_OK){

	crm_err("Cannot set nstatus callback: %s",
	fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn));
	registered = FALSE;
	}

	crm_debug_3("Be informed of CRM Client Status changes");
	if (registered &&
	fsa_cluster_conn->llc_ops->set_cstatus_callback(
	fsa_cluster_conn, crmd_client_status_callback,
	fsa_cluster_conn) != HA_OK) {

	crm_err("Cannot set cstatus callback: %s",
	fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn));
	registered = FALSE;
	}

	if(registered) {
	crm_debug_3("Requesting an initial dump of CRMD client_status");
	fsa_cluster_conn->llc_ops->client_status(
	fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1);
	}
	}
	#endif

	if(registered == FALSE) {
	set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED);
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	return;
	}

	clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED);
	crm_info("Connected to the cluster");
	}

	if(action & ~(A_HA_CONNECT\|A_HA_DISCONNECT)) {
	crm_err("Unexpected action %s in %s",
	fsa_action2string(action), __FUNCTION__);
	}
	}

	/* A_SHUTDOWN */
	void
	do_shutdown(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input,
	fsa_data_t *msg_data)
	{
	/* just in case */
	set_bit_inplace(fsa_input_register, R_SHUTDOWN);

	if(is_heartbeat_cluster()) {
	if(is_set(fsa_input_register, pe_subsystem->flag_connected)) {
	crm_info("Terminating the %s", pe_subsystem->name);
	if(stop_subsystem(pe_subsystem, TRUE) == FALSE) {
	/* its gone... */
	crm_err("Faking %s exit", pe_subsystem->name);
	clear_bit_inplace(fsa_input_register,
	pe_subsystem->flag_connected);
	} else {
	crm_info("Waiting for subsystems to exit");
	crmd_fsa_stall(NULL);
	}
	}
	crm_info("All subsystems stopped, continuing");
	}

	if(stonith_api) {
	/* Prevent it from comming up again */
	clear_bit_inplace(fsa_input_register, R_ST_REQUIRED);

	crm_info("Disconnecting STONITH...");
	stonith_api->cmds->disconnect(stonith_api);
	}
	}

	/* A_SHUTDOWN_REQ */
	void
	do_shutdown_req(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input,
	fsa_data_t *msg_data)
	{
	xmlNode *msg = NULL;

	crm_info("Sending shutdown request to DC: %s", crm_str(fsa_our_dc));
	msg = create_request(
	CRM_OP_SHUTDOWN_REQ, NULL, NULL,
	CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);

	/* set_bit_inplace(fsa_input_register, R_STAYDOWN); */
	if(send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	free_xml(msg);
	}

	extern char *max_generation_from;
	extern xmlNode *max_generation_xml;
	extern GHashTable *resources;
	extern GHashTable *voted;
	extern GHashTable *reload_hash;

	void log_connected_client(gpointer key, gpointer value, gpointer user_data);

	void
	log_connected_client(gpointer key, gpointer value, gpointer user_data)
	{
	crmd_client_t *client = value;
	crm_err("%s is still connected at exit", client->table_key);
	}


	static void free_mem(fsa_data_t *msg_data)
	{
	g_main_loop_quit(crmd_mainloop);
	g_main_loop_unref(crmd_mainloop);

	#if SUPPORT_HEARTBEAT
	if(fsa_cluster_conn) {
	fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn);
	fsa_cluster_conn = NULL;
	}
	#endif
	slist_destroy(fsa_data_t, fsa_data, fsa_message_queue,
	crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
	fsa_input2string(fsa_data->fsa_input),
	fsa_state2string(fsa_state),
	fsa_cause2string(fsa_data->fsa_cause),
	fsa_data->origin);
	delete_fsa_input(fsa_data);
	);
	delete_fsa_input(msg_data);

	if(ipc_clients) {
	crm_debug("Number of connected clients: %d",
	g_hash_table_size(ipc_clients));
	/* g_hash_table_foreach(ipc_clients, log_connected_client, NULL); */
	g_hash_table_destroy(ipc_clients);
	}

	empty_uuid_cache();
	crm_peer_destroy();
	clear_bit_inplace(fsa_input_register, R_CCM_DATA);

	if(te_subsystem->client && te_subsystem->client->client_source) {
	crm_debug("Full destroy: TE");
	G_main_del_IPC_Channel(te_subsystem->client->client_source);
	} else {
	crm_debug("Partial destroy: TE");
	crmd_ipc_connection_destroy(te_subsystem->client);
	}
	crm_free(te_subsystem);

	if(pe_subsystem->client && pe_subsystem->client->client_source) {
	crm_debug("Full destroy: PE");
	G_main_del_IPC_Channel(pe_subsystem->client->client_source);
	} else {
	crm_debug("Partial destroy: PE");
	crmd_ipc_connection_destroy(pe_subsystem->client);
	}
	crm_free(pe_subsystem);

	crm_free(cib_subsystem);

	if(integrated_nodes) {
	g_hash_table_destroy(integrated_nodes);
	}
	if(finalized_nodes) {
	g_hash_table_destroy(finalized_nodes);
	}
	if(confirmed_nodes) {
	g_hash_table_destroy(confirmed_nodes);
	}
	if(reload_hash) {
	g_hash_table_destroy(reload_hash);
	}
	if(resources) {
	g_hash_table_destroy(resources);
	}
	if(voted) {
	g_hash_table_destroy(voted);
	}

	cib_delete(fsa_cib_conn);
	fsa_cib_conn = NULL;

	if(fsa_lrm_conn) {
	fsa_lrm_conn->lrm_ops->delete(fsa_lrm_conn);
	}

	crm_free(integration_timer);
	crm_free(finalization_timer);
	crm_free(election_trigger);
	crm_free(election_timeout);
	crm_free(shutdown_escalation_timer);
	crm_free(wait_timer);
	crm_free(recheck_timer);

	crm_free(fsa_our_dc_version);
	crm_free(fsa_our_uname);
	crm_free(fsa_our_uuid);
	crm_free(fsa_our_dc);
	crm_free(ipc_server);

	crm_free(max_generation_from);
	free_xml(max_generation_xml);

	crm_xml_cleanup();
	}

	/* A_EXIT_0, A_EXIT_1 */
	void
	do_exit(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input,
	fsa_data_t *msg_data)
	{
	int exit_code = 0;
	int log_level = LOG_INFO;
	const char *exit_type = "gracefully";

	if(action & A_EXIT_1) {
	exit_code = 1;
	log_level = LOG_ERR;
	exit_type = "forcefully";
	}

	verify_stopped(cur_state, LOG_ERR);
	do_crm_log(log_level, "Performing %s - %s exiting the CRMd",
	fsa_action2string(action), exit_type);

	if(is_set(fsa_input_register, R_IN_RECOVERY)) {
	crm_err("Could not recover from internal error");
	exit_code = 2;
	}
	if(is_set(fsa_input_register, R_STAYDOWN)) {
	crm_warn("Inhibiting respawn by Heartbeat");
	exit_code = 100;
	}

	free_mem(msg_data);

	crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
	cl_flush_logs();
	exit(exit_code);
	}

	/* A_STARTUP */
	void
	do_startup(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input,
	fsa_data_t *msg_data)
	{
	int was_error = 0;
	int interval = 1; /* seconds between DC heartbeats */

	crm_debug("Registering Signal Handlers");
	mainloop_add_signal(SIGTERM, crm_shutdown);

	fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
	config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);

	ipc_clients = g_hash_table_new(g_str_hash, g_str_equal);

	crm_debug("Creating CIB and LRM objects");
	fsa_cib_conn = cib_new();
	fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM);

	/* set up the timers */
	crm_malloc0(integration_timer, sizeof(fsa_timer_t));
	crm_malloc0(finalization_timer, sizeof(fsa_timer_t));
	crm_malloc0(election_trigger, sizeof(fsa_timer_t));
	crm_malloc0(election_timeout, sizeof(fsa_timer_t));
	crm_malloc0(shutdown_escalation_timer, sizeof(fsa_timer_t));
	crm_malloc0(wait_timer, sizeof(fsa_timer_t));
	crm_malloc0(recheck_timer, sizeof(fsa_timer_t));

	interval = interval * 1000;

	if(election_trigger != NULL) {
	election_trigger->source_id = 0;
	election_trigger->period_ms = -1;
	election_trigger->fsa_input = I_DC_TIMEOUT;
	election_trigger->callback = crm_timer_popped;
	election_trigger->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if(election_timeout != NULL) {
	election_timeout->source_id = 0;
	election_timeout->period_ms = -1;
	election_timeout->fsa_input = I_ELECTION_DC;
	election_timeout->callback = crm_timer_popped;
	election_timeout->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if(integration_timer != NULL) {
	integration_timer->source_id = 0;
	integration_timer->period_ms = -1;
	integration_timer->fsa_input = I_INTEGRATED;
	integration_timer->callback = crm_timer_popped;
	integration_timer->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if(finalization_timer != NULL) {
	finalization_timer->source_id = 0;
	finalization_timer->period_ms = -1;
	finalization_timer->fsa_input = I_FINALIZED;
	finalization_timer->callback = crm_timer_popped;
	finalization_timer->repeat = FALSE;
	/* for possible enabling... a bug in the join protocol left
	* a slave in S_PENDING while we think its in S_NOT_DC
	*
	* raising I_FINALIZED put us into a transition loop which is
	* never resolved.
	* in this loop we continually send probes which the node
	* NACK's because its in S_PENDING
	*
	* if we have nodes where heartbeat is active but the
	* CRM is not... then this will be handled in the
	* integration phase
	*/
	finalization_timer->fsa_input = I_ELECTION;

	} else {
	was_error = TRUE;
	}

	if(shutdown_escalation_timer != NULL) {
	shutdown_escalation_timer->source_id = 0;
	shutdown_escalation_timer->period_ms = -1;
	shutdown_escalation_timer->fsa_input = I_STOP;
	shutdown_escalation_timer->callback = crm_timer_popped;
	shutdown_escalation_timer->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if(wait_timer != NULL) {
	wait_timer->source_id = 0;
	wait_timer->period_ms = 2000;
	wait_timer->fsa_input = I_NULL;
	wait_timer->callback = crm_timer_popped;
	wait_timer->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if(recheck_timer != NULL) {
	recheck_timer->source_id = 0;
	recheck_timer->period_ms = -1;
	recheck_timer->fsa_input = I_PE_CALC;
	recheck_timer->callback = crm_timer_popped;
	recheck_timer->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	/* set up the sub systems */
	crm_malloc0(cib_subsystem, sizeof(struct crm_subsystem_s));
	crm_malloc0(te_subsystem, sizeof(struct crm_subsystem_s));
	crm_malloc0(pe_subsystem, sizeof(struct crm_subsystem_s));

	if(cib_subsystem != NULL) {
	cib_subsystem->pid = -1;
	cib_subsystem->path = CRM_DAEMON_DIR;
	cib_subsystem->name = CRM_SYSTEM_CIB;
	cib_subsystem->command = CRM_DAEMON_DIR"/"CRM_SYSTEM_CIB;
	cib_subsystem->args = "-VVc";
	cib_subsystem->flag_connected = R_CIB_CONNECTED;
	cib_subsystem->flag_required = R_CIB_REQUIRED;

	} else {
	was_error = TRUE;
	}

	if(te_subsystem != NULL) {
	te_subsystem->pid = -1;
	te_subsystem->path = CRM_DAEMON_DIR;
	te_subsystem->name = CRM_SYSTEM_TENGINE;
	te_subsystem->command = CRM_DAEMON_DIR"/"CRM_SYSTEM_TENGINE;
	te_subsystem->args = NULL;
	te_subsystem->flag_connected = R_TE_CONNECTED;
	te_subsystem->flag_required = R_TE_REQUIRED;

	} else {
	was_error = TRUE;
	}

	if(pe_subsystem != NULL) {
	pe_subsystem->pid = -1;
	pe_subsystem->path = CRM_DAEMON_DIR;
	pe_subsystem->name = CRM_SYSTEM_PENGINE;
	pe_subsystem->command = CRM_DAEMON_DIR"/"CRM_SYSTEM_PENGINE;
	pe_subsystem->args = NULL;
	pe_subsystem->flag_connected = R_PE_CONNECTED;
	pe_subsystem->flag_required = R_PE_REQUIRED;

	} else {
	was_error = TRUE;
	}

	if(was_error) {
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}

	welcomed_nodes = g_hash_table_new_full(
	g_str_hash, g_str_equal,
	g_hash_destroy_str, g_hash_destroy_str);
	integrated_nodes = g_hash_table_new_full(
	g_str_hash, g_str_equal,
	g_hash_destroy_str, g_hash_destroy_str);
	finalized_nodes = g_hash_table_new_full(
	g_str_hash, g_str_equal,
	g_hash_destroy_str, g_hash_destroy_str);
	confirmed_nodes = g_hash_table_new_full(
	g_str_hash, g_str_equal,
	g_hash_destroy_str, g_hash_destroy_str);

	set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME);
	}

	/* A_STOP */
	void
	do_stop(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input,
	fsa_data_t *msg_data)
	{
	register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
	}

	/* A_STARTED */
	void
	do_started(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input,
	fsa_data_t *msg_data)
	{
	if(cur_state != S_STARTING) {
	crm_err("Start cancelled... %s", fsa_state2string(cur_state));
	return;

	} else if(is_set(fsa_input_register, R_CCM_DATA) == FALSE) {
	crm_info("Delaying start, no membership data (%.16llx)", R_CCM_DATA);

	crmd_fsa_stall(NULL);
	return;

	} else if(is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
	crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED);

	crmd_fsa_stall(NULL);
	return;

	} else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
	crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);

	crmd_fsa_stall(NULL);
	return;

	} else if(is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
	crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);

	crmd_fsa_stall(NULL);
	return;

	} else if(is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
	HA_Message *msg = NULL;

	/* try reading from HA */
	crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);

	crm_debug_3("Looking for a HA message");
	#if SUPPORT_HEARTBEAT
	if(is_heartbeat_cluster()) {
	msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0);
	}
	#endif
	if(msg != NULL) {
	crm_debug_3("There was a HA message");
	crm_msg_del(msg);
	}
	crmd_fsa_stall(NULL);
	return;
	}

	crm_debug("Init server comms");
	if(ipc_server == NULL) {
	ipc_server = crm_strdup(CRM_SYSTEM_CRMD);
	}

	if(init_server_ipc_comms(ipc_server, crmd_client_connect,
	default_ipc_connection_destroy)) {
	crm_err("Couldn't start IPC server");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}

	if(stonith_reconnect == NULL) {
	int dummy;
	stonith_reconnect = mainloop_add_trigger(
	G_PRIORITY_LOW, te_connect_stonith, &dummy);
	}
	set_bit_inplace(fsa_input_register, R_ST_REQUIRED);
	mainloop_set_trigger(stonith_reconnect);

	crm_info("The local CRM is operational");
	clear_bit_inplace(fsa_input_register, R_STARTING);
	register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
	}

	/* A_RECOVER */
	void
	do_recover(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input,
	fsa_data_t *msg_data)
	{
	set_bit_inplace(fsa_input_register, R_IN_RECOVERY);
	crm_err("Action %s (%.16llx) not supported",
	fsa_action2string(action), action);

	register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
	}

	pe_cluster_option crmd_opts[] = {
	/* name, old-name, validate, default, description */
	{ "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact Mercurial changeset it was built from. Used for diagnostic purposes." },
	{ "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." },
	{ XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "60s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." },
	{ XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time",
	"Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer,
	"Polling interval for time based changes to options, resource parameters and constraints.",
	"The Cluster is primarily event driven, however the configuration can have elements that change based on time."
	" To ensure these changes take effect, we can optionally poll the cluster's status for changes." },
	{ XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "* Advanced Use Only *.", "If need to adjust this value, it probably indicates the presence of a bug." },
	{ XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "* Advanced Use Only *.", "If need to adjust this value, it probably indicates the presence of a bug." },
	{ "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "* Advanced Use Only *.", "If need to adjust this value, it probably indicates the presence of a bug." },
	{ "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "* Advanced Use Only *.", "If you need to adjust this value, it probably indicates the presence of a bug." },
	{ XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." },
	};

	void
	crmd_metadata(void)
	{
	config_metadata("CRM Daemon", "1.0",
	"CRM Daemon Options",
	"This is a fake resource that details the options that can be configured for the CRM Daemon.",
	crmd_opts, DIMOF(crmd_opts));
	}

	static void
	verify_crmd_options(GHashTable *options)
	{
	verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
	}

	static const char *
	crmd_pref(GHashTable options, const char name)
	{
	return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
	}

	static void
	config_query_callback(xmlNode *msg, int call_id, int rc,
	xmlNode output, void user_data)
	{
	const char *value = NULL;
	GHashTable *config_hash = NULL;
	ha_time_t *now = new_ha_date(TRUE);

	if(rc != cib_ok) {
	fsa_data_t *msg_data = NULL;
	crm_err("Local CIB query resulted in an error: %s",
	cib_error2string(rc));
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);

	if(rc == cib_bad_permissions
	\|\| rc == cib_dtd_validation
	\|\| rc == cib_bad_digest
	\|\| rc == cib_bad_config) {
	crm_err("The cluster is mis-configured - shutting down and staying down");
	set_bit_inplace(fsa_input_register, R_STAYDOWN);
	}
	goto bail;
	}

	crm_debug("Call %d : Parsing CIB options", call_id);
	config_hash = g_hash_table_new_full(
	g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str);

	unpack_instance_attributes(
	output, output, XML_CIB_TAG_PROPSET, NULL, config_hash,
	CIB_OPTIONS_FIRST, FALSE, now);

	verify_crmd_options(config_hash);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
	election_trigger->period_ms = crm_get_msec(value);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
	shutdown_escalation_timer->period_ms = crm_get_msec(value);
	crm_info("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
	election_timeout->period_ms = crm_get_msec(value);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
	recheck_timer->period_ms = crm_get_msec(value);
	crm_info("Checking for expired actions every %dms", recheck_timer->period_ms);

	value = crmd_pref(config_hash, "crmd-integration-timeout");
	integration_timer->period_ms = crm_get_msec(value);

	value = crmd_pref(config_hash, "crmd-finalization-timeout");
	finalization_timer->period_ms = crm_get_msec(value);

	#if SUPPORT_COROSYNC
	if(is_classic_ais_cluster()) {
	value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES);
	crm_info("Sending expected-votes=%s to corosync", value);
	send_ais_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais);
	}
	#endif

	set_bit_inplace(fsa_input_register, R_READ_CONFIG);
	crm_debug_3("Triggering FSA: %s", __FUNCTION__);
	mainloop_set_trigger(fsa_source);

	g_hash_table_destroy(config_hash);
	bail:
	free_ha_date(now);
	}

	gboolean
	crm_read_options(gpointer user_data)
	{
	int call_id = fsa_cib_conn->cmds->query(
	fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local);

	add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, config_query_callback);
	crm_debug_2("Querying the CIB... call %d", call_id);
	return TRUE;
	}

	/* A_READCONFIG */
	void
	do_read_config(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input,
	fsa_data_t *msg_data)
	{
	mainloop_set_trigger(config_read);
	}


	void
	crm_shutdown(int nsig)
	{
	if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) {
	if(is_set(fsa_input_register, R_SHUTDOWN)) {
	crm_err("Escalating the shutdown");
	register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);

	} else {
	crm_info("Requesting shutdown");
	set_bit_inplace(fsa_input_register, R_SHUTDOWN);
	register_fsa_input(C_SHUTDOWN,I_SHUTDOWN,NULL);

	if(shutdown_escalation_timer->period_ms < 1) {
	const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
	int msec = crm_get_msec(value);
	crm_info("Using default shutdown escalation: %dms", msec);
	shutdown_escalation_timer->period_ms = msec;
	}

	/* cant rely on this... */
	crm_notice("Forcing shutdown in: %dms", shutdown_escalation_timer->period_ms);
	crm_timer_start(shutdown_escalation_timer);
	}

	} else {
	crm_info("exit from shutdown");
	exit(LSB_EXIT_OK);

	}
	}

	static void
	default_cib_update_callback(xmlNode *msg, int call_id, int rc,
	xmlNode output, void user_data)
	{
	if(rc != cib_ok) {
	fsa_data_t *msg_data = NULL;
	crm_err("CIB Update failed: %s", cib_error2string(rc));
	crm_log_xml_warn(output, "update:failed");

	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	}

	#if SUPPORT_HEARTBEAT
	static void
	populate_cib_nodes_ha(gboolean with_client_status)
	{
	int call_id = 0;
	const char *ha_node = NULL;
	xmlNode *cib_node_list = NULL;

	if(fsa_cluster_conn == NULL) {
	crm_debug("Not connected");
	return;
	}

	/* Async get client status information in the cluster */
	crm_info("Requesting the list of configured nodes");
	fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn);

	cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
	do {
	const char *ha_node_type = NULL;
	const char *ha_node_uuid = NULL;
	xmlNode *cib_new_node = NULL;

	ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn);
	if(ha_node == NULL) {
	continue;
	}

	ha_node_type = fsa_cluster_conn->llc_ops->node_type(
	fsa_cluster_conn, ha_node);
	if(safe_str_neq(NORMALNODE, ha_node_type)) {
	crm_debug("Node %s: skipping '%s'",
	ha_node, ha_node_type);
	continue;
	}

	ha_node_uuid = get_uuid(ha_node);
	if(ha_node_uuid == NULL) {
	crm_warn("Node %s: no uuid found", ha_node);
	continue;
	}

	crm_debug("Node: %s (uuid: %s)", ha_node, ha_node_uuid);
	cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE);
	crm_xml_add(cib_new_node, XML_ATTR_ID, ha_node_uuid);
	crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node);
	crm_xml_add(cib_new_node, XML_ATTR_TYPE, ha_node_type);

	} while(ha_node != NULL);

	fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn);

	/* Now update the CIB with the list of nodes */
	fsa_cib_update(
	XML_CIB_TAG_NODES, cib_node_list,
	cib_scope_local\|cib_quorum_override, call_id, NULL);
	add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback);

	free_xml(cib_node_list);
	crm_debug_2("Complete");
	}

	#endif

	static void create_cib_node_definition(
	gpointer key, gpointer value, gpointer user_data)
	{
	crm_node_t *node = value;
	xmlNode *cib_nodes = user_data;
	xmlNode *cib_new_node = NULL;

	cib_new_node = create_xml_node(cib_nodes, XML_CIB_TAG_NODE);
	crm_xml_add(cib_new_node, XML_ATTR_ID, node->uuid);
	crm_xml_add(cib_new_node, XML_ATTR_UNAME, node->uname);
	crm_xml_add(cib_new_node, XML_ATTR_TYPE, NORMALNODE);
	}

	void
	populate_cib_nodes(gboolean with_client_status)
	{
	int call_id = 0;
	xmlNode *cib_node_list = NULL;
	#if SUPPORT_HEARTBEAT
	if(is_heartbeat_cluster()) {
	populate_cib_nodes_ha(with_client_status);
	return;
	}
	#endif

	cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
	g_hash_table_foreach(
	crm_peer_cache, create_cib_node_definition, cib_node_list);

	fsa_cib_update(
	XML_CIB_TAG_NODES, cib_node_list, cib_scope_local\|cib_quorum_override, call_id, NULL);
	add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback);

	free_xml(cib_node_list);
	crm_debug_2("Complete");
	}
	diff --git a/crmd/te_utils.c b/crmd/te_utils.c
	index 06cbc3f0d3..fb78a5376b 100644
	--- a/crmd/te_utils.c
	+++ b/crmd/te_utils.c
	@@ -1,393 +1,390 @@
	/*
	* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This software is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* General Public License for more details.
	*
	* You should have received a copy of the GNU General Public
	* License along with this library; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <crm/crm.h>
	#include <crm/cib.h>
	#include <crm/msg_xml.h>
	#include <crm/common/msg.h>
	#include <crm/common/xml.h>
	#include <tengine.h>
	#include <crmd_fsa.h>
	#include <crmd_messages.h>

	GCHSource *stonith_src = NULL;
	crm_trigger_t *stonith_reconnect = NULL;

	static gboolean
	fail_incompletable_stonith(crm_graph_t *graph)
	{
	GListPtr lpc = NULL;
	const char *task = NULL;
	xmlNode *last_action = NULL;

	if(graph == NULL) {
	return FALSE;
	}

	for(lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
	GListPtr lpc2 = NULL;
	synapse_t synapse = (synapse_t)lpc->data;
	if (synapse->confirmed) {
	continue;
	}

	for(lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
	crm_action_t action = (crm_action_t)lpc2->data;


	if(action->type != action_type_crm \|\| action->confirmed) {
	continue;
	}

	task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
	if(task && safe_str_eq(task, CRM_OP_FENCE)) {
	action->failed = TRUE;
	last_action = action->xml;
	update_graph(graph, action);
	crm_notice("Failing action %d (%s): STONITHd terminated",
	action->id, ID(action->xml));
	}
	}
	}

	if(last_action != NULL) {
	crm_warn("STONITHd failure resulted in un-runnable actions");
	abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
	return TRUE;
	}

	return FALSE;
	}

	static void
	tengine_stonith_connection_destroy(stonith_t st, const char event, xmlNode *msg)
	{
	if(is_set(fsa_input_register, R_ST_REQUIRED)) {
	crm_crit("Fencing daemon connection failed");
	mainloop_set_trigger(stonith_reconnect);

	} else {
	crm_info("Fencing daemon disconnected");
	}

	/* cbchan will be garbage at this point, arrange for it to be reset */
	stonith_api->state = stonith_disconnected;

	if(AM_I_DC) {
	fail_incompletable_stonith(transition_graph);
	trigger_graph();
	}
	}

	/*
	<notify t="st_notify" subt="st_fence" st_op="st_fence" st_rc="0" >
	<st_calldata >
	<st-reply st_origin="stonith_construct_reply" t="stonith-ng" st_rc="0" st_op="st_query" st_callid="0" st_clientid="09fcbd8b-156a-4727-ab37-4f8b2071847c" st_remote_op="1230801d-dba5-42ac-8e2c-bf444fb2a401" st_callopt="0" st_delegate="pcmk-4" >
	<st_calldata >
	<st-reply st_origin="stonith_construct_async_reply" t="stonith-ng" st_op="reboot" st_remote_op="1230801d-dba5-42ac-8e2c-bf444fb2a401" st_callid="0" st_callopt="0" st_rc="0" src="pcmk-4" seq="2" state="0" st_target="pcmk-1" />
	*/
	#ifdef SUPPORT_CMAN
	# include <libfenced.h>
	# include "../lib/common/stack.h"
	#endif

	static void
	tengine_stonith_notify(stonith_t st, const char event, xmlNode *msg)
	{
	int rc = -99;
	const char *origin = NULL;
	const char *target = NULL;
	const char *executioner = NULL;
	xmlNode *action = get_xpath_object("//st-data", msg, LOG_ERR);

	if(action == NULL) {
	crm_log_xml(LOG_ERR, "Notify data not found", msg);
	return;
	}

	crm_log_xml(LOG_DEBUG, "stonith_notify", msg);
	crm_element_value_int(msg, F_STONITH_RC, &rc);
	origin = crm_element_value(action, F_STONITH_ORIGIN);
	target = crm_element_value(action, F_STONITH_TARGET);
	executioner = crm_element_value(action, F_STONITH_DELEGATE);
	-
	- if(rc == stonith_ok) {
	+
	+ if(rc == stonith_ok && crm_str_eq(target, fsa_our_uname, TRUE)) {
	+ crm_err("We were alegedly just fenced by %s for %s!", executioner, origin);
	+ register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
	+
	+ } else if(rc == stonith_ok) {
	crm_info("Peer %s was terminated (%s) by %s for %s (ref=%s): %s",
	target,
	crm_element_value(action, F_STONITH_OPERATION),
	executioner, origin,
	crm_element_value(action, F_STONITH_REMOTE),
	stonith_error2string(rc));
	} else {
	crm_err("Peer %s could not be terminated (%s) by %s for %s (ref=%s): %s",
	target,
	crm_element_value(action, F_STONITH_OPERATION),
	executioner?executioner:"<anyone>", origin,
	crm_element_value(action, F_STONITH_REMOTE),
	stonith_error2string(rc));
	}

	#ifdef SUPPORT_CMAN
	if(rc == stonith_ok && is_cman_cluster()) {
	int rc = 0;
	char *target_copy = crm_strdup(target);
	crm_info("Notifing CMAN that '%s' is now fenced", target);

	- rc = fenced_join();
	- if(rc != 0) {
	- crm_notice("Could not connect to fenced: rc=%d", rc);
	-
	- } else {
	- rc = fenced_external(target_copy);
	- if(rc != 0) {
	- crm_err("Could not notify fenced: rc=%d", rc);
	- }
	- fenced_leave();
	- }
	- crm_free(target_copy);
	+ rc = fenced_external(target_copy);
	+ if(rc != 0) {
	+ crm_err("Could not notify fenced that '%s' is down: rc=%d", target, rc);
	+ }
	+ crm_free(target_copy);
	}
	#endif

	if(rc == stonith_ok && safe_str_eq(target, origin)) {
	if(fsa_our_dc == NULL \|\| safe_str_eq(fsa_our_dc, target)) {
	const char *uuid = get_uuid(target);
	crm_notice("Target was our leader %s/%s (recorded leader: %s)",
	target, uuid, fsa_our_dc?fsa_our_dc:"<unset>");
	/* There's no need for everyone to update the cib.
	* Have the node that performed the op do the update too.
	* In the unlikely event that both die, the DC would be
	* shot a second time which is not ideal but safe.
	*/
	if(safe_str_eq(executioner, fsa_our_uname)) {
	send_stonith_update(NULL, target, uuid);
	}
	}
	}
	}

	gboolean
	te_connect_stonith(gpointer user_data)
	{
	int lpc = 0;
	int rc = stonith_ok;

	if(stonith_api == NULL) {
	stonith_api = stonith_api_new();
	}

	if(stonith_api->state != stonith_disconnected) {
	crm_debug_2("Still connected");
	return TRUE;
	}

	for(lpc = 0; lpc < 30; lpc++) {
	crm_info("Attempting connection to fencing daemon...");

	sleep(1);
	rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL, NULL);

	if(rc == stonith_ok) {
	break;
	}

	if(user_data != NULL) {
	crm_err("Sign-in failed: triggered a retry");
	mainloop_set_trigger(stonith_reconnect);
	return TRUE;
	}

	crm_err("Sign-in failed: pausing and trying again in 2s...");
	sleep(1);
	}

	CRM_CHECK(rc == stonith_ok, return TRUE); /* If not, we failed 30 times... just get out */
	stonith_api->cmds->register_notification(
	stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy);

	stonith_api->cmds->register_notification(
	stonith_api, STONITH_OP_FENCE, tengine_stonith_notify);

	crm_info("Connected");
	return TRUE;
	}

	gboolean
	stop_te_timer(crm_action_timer_t *timer)
	{
	const char *timer_desc = "action timer";

	if(timer == NULL) {
	return FALSE;
	}
	if(timer->reason == timeout_abort) {
	timer_desc = "global timer";
	crm_debug_2("Stopping %s", timer_desc);
	}

	if(timer->source_id != 0) {
	crm_debug_2("Stopping %s", timer_desc);
	g_source_remove(timer->source_id);
	timer->source_id = 0;

	} else {
	crm_debug_2("%s was already stopped", timer_desc);
	return FALSE;
	}

	return TRUE;
	}

	gboolean
	te_graph_trigger(gpointer user_data)
	{
	enum transition_status graph_rc = -1;
	if(transition_graph == NULL) {
	crm_debug("Nothing to do");
	return TRUE;
	}

	crm_debug_2("Invoking graph %d in state %s",
	transition_graph->id, fsa_state2string(fsa_state));

	switch(fsa_state) {
	case S_STARTING:
	case S_PENDING:
	case S_NOT_DC:
	case S_HALT:
	case S_ILLEGAL:
	case S_STOPPING:
	case S_TERMINATE:
	return TRUE;
	break;
	default:
	break;
	}

	if(transition_graph->complete == FALSE) {
	graph_rc = run_graph(transition_graph);
	print_graph(LOG_DEBUG_3, transition_graph);

	if(graph_rc == transition_active) {
	crm_debug_3("Transition not yet complete");
	return TRUE;

	} else if(graph_rc == transition_pending) {
	crm_debug_3("Transition not yet complete - no actions fired");
	return TRUE;
	}

	if(graph_rc != transition_complete) {
	crm_err("Transition failed: %s", transition_status(graph_rc));
	print_graph(LOG_WARNING, transition_graph);
	}
	}

	crm_info("Transition %d is now complete", transition_graph->id);
	transition_graph->complete = TRUE;
	notify_crmd(transition_graph);

	return TRUE;
	}

	void
	trigger_graph_processing(const char *fn, int line)
	{
	mainloop_set_trigger(transition_trigger);
	crm_debug_2("%s:%d - Triggered graph processing", fn, line);
	}

	void
	abort_transition_graph(
	int abort_priority, enum transition_action abort_action,
	const char abort_text, xmlNode reason, const char *fn, int line)
	{
	int log_level = LOG_INFO;
	const char *magic = NULL;
	CRM_CHECK(transition_graph != NULL, return);

	if(reason) {
	int diff_add_updates = 0;
	int diff_add_epoch = 0;
	int diff_add_admin_epoch = 0;

	int diff_del_updates = 0;
	int diff_del_epoch = 0;
	int diff_del_admin_epoch = 0;
	xmlNode *diff = get_xpath_object("//"F_CIB_UPDATE_RESULT"//diff", reason, LOG_DEBUG_2);
	magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);

	if(diff) {
	cib_diff_version_details(
	diff,
	&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
	&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
	do_crm_log(log_level,
	"%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s",
	fn, line, transition_graph->complete, TYPE(reason), ID(reason), magic?magic:"NA",
	diff_add_admin_epoch,diff_add_epoch,diff_add_updates, abort_text);

	} else {
	do_crm_log(log_level,
	"%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s) : %s",
	fn, line, transition_graph->complete, TYPE(reason), ID(reason), magic?magic:"NA", abort_text);
	}

	} else {
	do_crm_log(log_level,
	"%s:%d - Triggered transition abort (complete=%d) : %s",
	fn, line, transition_graph->complete, abort_text);
	}

	switch(fsa_state) {
	case S_STARTING:
	case S_PENDING:
	case S_NOT_DC:
	case S_HALT:
	case S_ILLEGAL:
	case S_STOPPING:
	case S_TERMINATE:
	do_crm_log(log_level,
	"Abort suppressed: state=%s (complete=%d)",
	fsa_state2string(fsa_state), transition_graph->complete);
	return;
	default:
	break;
	}

	if(magic == NULL && reason != NULL) {
	crm_log_xml(log_level+1, "Cause", reason);
	}

	/* Make sure any queued calculations are discarded ASAP */
	crm_free(fsa_pe_ref);
	fsa_pe_ref = NULL;

	if(transition_graph->complete) {
	register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
	return;
	}

	update_abort_priority(
	transition_graph, abort_priority, abort_action, abort_text);

	mainloop_set_trigger(transition_trigger);
	}

	diff --git a/cts/CM_ais.py b/cts/CM_ais.py
	index ead4555753..b22c5b91a5 100644
	--- a/cts/CM_ais.py
	+++ b/cts/CM_ais.py
	@@ -1,353 +1,351 @@
	'''CTS: Cluster Testing System: AIS dependent modules...
	'''

	__copyright__='''
	Copyright (C) 2007 Andrew Beekhof <andrew@suse.de>

	'''

	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License
	# as published by the Free Software Foundation; either version 2
	# of the License, or (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

	import os, sys, warnings
	from cts.CTSvars import *
	from cts.CM_lha import crm_lha
	from cts.CTS import Process

	#######################################################################
	#
	# LinuxHA v2 dependent modules
	#
	#######################################################################

	class crm_ais(crm_lha):
	'''
	The crm version 3 cluster manager class.
	It implements the things we need to talk to and manipulate
	crm clusters running on top of openais
	'''
	def __init__(self, Environment, randseed=None):
	crm_lha.__init__(self, Environment, randseed=randseed)

	self.update({
	"Name" : "crm-ais",

	- "UUIDQueryCmd" : "crmadmin -N --openais",
	"EpocheCmd" : "crm_node -e --openais",
	"QuorumCmd" : "crm_node -q --openais",
	"ParitionCmd" : "crm_node -p --openais",

	"Pat:They_stopped" : "%s crmd:.Node %s: . state=lost .new",
	"Pat:ChildExit" : "Child process .* exited",

	# Bad news Regexes. Should never occur.
	"BadRegexes" : (
	r"ERROR:",
	r"CRIT:",
	r"TRACE:",
	r"Shutting down\.",
	r"Forcing shutdown\.",
	r"Timer I_TERMINATE just popped",
	r"input=I_ERROR",
	r"input=I_FAIL",
	r"input=I_INTEGRATED cause=C_TIMER_POPPED",
	r"input=I_FINALIZED cause=C_TIMER_POPPED",
	r"input=I_ERROR",
	r", exiting\.",
	r"WARN.Ignoring HA message.vote.*not in our membership list",
	r"pengine.*Attempting recovery of resource",
	r"is taking more than 2x its timeout",
	r"Confirm not received from",
	r"Welcome reply not received from",
	r"Attempting to schedule .* after a stop",
	r"Resource .* was active at shutdown",
	r"duplicate entries for call_id",
	r"Search terminated:",
	r":global_timer_callback",
	r"Faking parameter digest creation",
	r"Parameters to .* action changed:",
	r"Parameters to .* changed",
	r"Child process .* terminated with signal 11",
	r"Executing .* fencing operation",

	# Not inherently bad, but worth tracking
	r"No need to invoke the TE",
	r"ping.*: DEBUG: Updated connected = 0",
	r"Digest mis-match:",
	),
	})

	def errorstoignore(self):
	# At some point implement a more elegant solution that
	# also produces a report at the end
	'''Return list of errors which are known and very noisey should be ignored'''
	if 1:
	return [
	"crm_mon:",
	"crmadmin:",
	"update_trace_data",
	"async_notify: strange, client not found",
	"ERROR: Message hist queue is filling up"
	]
	return []

	def NodeUUID(self, node):
	return node

	def ais_components(self):
	fullcomplist = {}
	self.complist = []
	self.common_ignore = [
	"Pending action:",
	"ERROR: crm_log_message_adv:",
	"ERROR: MSG: No message to dump",
	"pending LRM operations at shutdown",
	"Lost connection to the CIB service",
	"Connection to the CIB terminated...",
	"Sending message to CIB service FAILED",
	"apply_xml_diff: Diff application failed!",
	"crmd: .Action A_RECOVER . not supported",
	"pingd: .*ERROR: send_update: Could not send update",
	"send_ipc_message: IPC Channel to .* is not connected",
	"unconfirmed_actions: Waiting on .* unconfirmed actions",
	"cib_native_msgready: Message pending on command channel",
	"crmd:.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd",
	"verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.",
	"ERROR: attrd_connection_destroy: Lost connection to attrd",
	"nfo: te_fence_node: Executing .* fencing operation",
	]

	fullcomplist["cib"] = Process(self, "cib", pats = [
	"State transition .* S_RECOVERY",
	"Respawning .* crmd",
	"Respawning .* attrd",
	"Lost connection to the CIB service",
	"Connection to the CIB terminated...",
	"Child process crmd exited .* rc=2",
	"Child process attrd exited .* rc=1",
	"crmd: .*Input I_TERMINATE from do_recover",
	"crmd: .I_ERROR.crmd_cib_connection_destroy",
	"crmd:.*do_exit: Could not recover from internal error",
	], badnews_ignore = self.common_ignore)

	fullcomplist["lrmd"] = Process(self, "lrmd", pats = [
	"State transition .* S_RECOVERY",
	"LRM Connection failed",
	"Respawning .* crmd",
	"crmd: .I_ERROR.lrm_connection_destroy",
	"Child process crmd exited .* rc=2",
	"crmd: .*Input I_TERMINATE from do_recover",
	"crmd:.*do_exit: Could not recover from internal error",
	], badnews_ignore = self.common_ignore)

	fullcomplist["crmd"] = Process(self, "crmd", pats = [
	# "WARN: determine_online_status: Node .* is unclean",
	# "Scheduling Node .* for STONITH",
	# "Executing .* fencing operation",
	# Only if the node wasn't the DC: "State transition S_IDLE",
	"State transition .* -> S_IDLE",
	], badnews_ignore = self.common_ignore)

	fullcomplist["attrd"] = Process(self, "attrd", pats = [
	"crmd: .*ERROR: attrd_connection_destroy: Lost connection to attrd"
	], badnews_ignore = self.common_ignore)

	fullcomplist["pengine"] = Process(self, "pengine", dc_pats = [
	"State transition .* S_RECOVERY",
	"Respawning .* crmd",
	"Child process crmd exited .* rc=2",
	"crmd: .*pe_connection_destroy: Connection to the Policy Engine failed",
	"crmd: .I_ERROR.save_cib_contents",
	"crmd: .*Input I_TERMINATE from do_recover",
	"crmd:.*do_exit: Could not recover from internal error",
	], badnews_ignore = self.common_ignore)

	stonith_ignore = [
	"update_failcount: Updating failcount for child_DoFencing",
	"ERROR: te_connect_stonith: Sign-in failed: triggered a retry",
	]

	stonith_ignore.extend(self.common_ignore)

	fullcomplist["stonith-ng"] = Process(self, "stonith-ng", process="stonithd", pats = [
	"CRIT: stonith_dispatch: Lost connection to the STONITH service",
	"tengine_stonith_connection_destroy: Fencing daemon connection failed",
	"Attempting connection to fencing daemon",
	"te_connect_stonith: Connected",
	], badnews_ignore = stonith_ignore)

	vgrind = self.Env["valgrind-procs"].split()
	for key in fullcomplist.keys():
	if self.Env["valgrind-tests"]:
	if key in vgrind:
	# Processes running under valgrind can't be shot with "killall -9 processname"
	self.log("Filtering %s from the component list as it is being profiled by valgrind" % key)
	continue
	if key == "stonith-ng" and not self.Env["DoFencing"]:
	continue

	self.complist.append(fullcomplist[key])

	#self.complist = [ fullcomplist["pengine"] ]
	return self.complist

	class crm_whitetank(crm_ais):
	'''
	The crm version 3 cluster manager class.
	It implements the things we need to talk to and manipulate
	crm clusters running on top of openais
	'''
	def __init__(self, Environment, randseed=None):
	crm_ais.__init__(self, Environment, randseed=randseed)

	self.update({
	"Name" : "crm-whitetank",
	"StartCmd" : CTSvars.INITDIR+"/openais start",
	"StopCmd" : CTSvars.INITDIR+"/openais stop",

	"Pat:We_stopped" : "%s.openais.pcmk_shutdown: Shutdown complete",
	"Pat:They_stopped" : "%s crmd:.Node %s: . state=lost .new",
	"Pat:They_dead" : "openais:.*Node %s is now: lost",

	"Pat:ChildKilled" : "%s openais.*Child process %s terminated with signal 9",
	"Pat:ChildRespawn" : "%s openais.*Respawning failed child process: %s",
	"Pat:ChildExit" : "Child process .* exited",
	})

	def Components(self):
	self.ais_components()

	aisexec_ignore = [
	"ERROR: ais_dispatch: Receiving message .* failed",
	"crmd: .I_ERROR.crmd_cib_connection_destroy",
	"cib: .*ERROR: cib_ais_destroy: AIS connection terminated",
	#"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated",
	"crmd:.*do_exit: Could not recover from internal error",
	"crmd: .I_TERMINATE.do_recover",
	"attrd: .*CRIT: attrd_ais_destroy: Lost connection to OpenAIS service!",
	"stonithd: .*ERROR: AIS connection terminated",
	]

	aisexec_ignore.extend(self.common_ignore)

	self.complist.append(Process(self, "aisexec", pats = [
	"ERROR: ais_dispatch: AIS connection failed",
	"crmd: .*ERROR: do_exit: Could not recover from internal error",
	"pengine: .Scheduling Node . for STONITH",
	"stonithd: .*requests a STONITH operation RESET on node",
	"stonithd: .*Succeeded to STONITH the node",
	], badnews_ignore = aisexec_ignore))

	class crm_flatiron(crm_ais):
	'''
	The crm version 3 cluster manager class.
	It implements the things we need to talk to and manipulate
	crm clusters running on top of openais
	'''
	def __init__(self, Environment, randseed=None):
	crm_ais.__init__(self, Environment, randseed=randseed)

	self.update({
	"Name" : "crm-flatiron",
	"StartCmd" : "service corosync start",
	"StopCmd" : "service corosync stop",

	# The next pattern is too early
	# "Pat:We_stopped" : "%s.*Service engine unloaded: Pacemaker Cluster Manager",
	# The next pattern would be preferred, but it doesn't always come out
	# "Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting with status",
	"Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service",
	"Pat:They_stopped" : "%s crmd:.Node %s: . state=lost .new",
	"Pat:They_dead" : "corosync:.*Node %s is now: lost",

	"Pat:ChildKilled" : "%s corosync.*Child process %s terminated with signal 9",
	"Pat:ChildRespawn" : "%s corosync.*Respawning failed child process: %s",
	})

	def Components(self):
	self.ais_components()

	corosync_ignore = [
	"ERROR: ais_dispatch: Receiving message .* failed",
	"crmd: .I_ERROR.crmd_cib_connection_destroy",
	"cib: .*ERROR: cib_ais_destroy: AIS connection terminated",
	#"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated",
	"crmd:.*do_exit: Could not recover from internal error",
	"crmd: .I_TERMINATE.do_recover",
	"attrd: .*CRIT: attrd_ais_destroy: Lost connection to Corosync service!",
	"stonithd: .*ERROR: AIS connection terminated",
	]

	# corosync_ignore.extend(self.common_ignore)

	# self.complist.append(Process(self, "corosync", pats = [
	# "ERROR: ais_dispatch: AIS connection failed",
	# "crmd: .*ERROR: do_exit: Could not recover from internal error",
	# "pengine: .Scheduling Node . for STONITH",
	# "stonithd: .*requests a STONITH operation RESET on node",
	# "stonithd: .*Succeeded to STONITH the node",
	# ], badnews_ignore = corosync_ignore))


	return self.complist

	class crm_mcp(crm_flatiron):
	'''
	The crm version 3 cluster manager class.
	It implements the things we need to talk to and manipulate
	crm clusters running on top of openais
	'''
	def __init__(self, Environment, randseed=None):
	crm_flatiron.__init__(self, Environment, randseed=randseed)

	self.update({
	"Name" : "crm-mcp",
	"StartCmd" : "service corosync start; service pacemaker start",
	"StopCmd" : "service pacemaker stop; service corosync stop",

	"Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service",
	"Pat:They_stopped" : "%s crmd:.Node %s: . state=lost .new",
	"Pat:They_dead" : "crmd:.Node %s: . state=lost .new",

	"Pat:ChildKilled" : "%s pacemakerd.*Child process %s terminated with signal 9",
	"Pat:ChildRespawn" : "%s pacemakerd.*Respawning failed child process: %s",
	})

	class crm_cman(crm_flatiron):
	'''
	The crm version 3 cluster manager class.
	It implements the things we need to talk to and manipulate
	crm clusters running on top of openais
	'''
	def __init__(self, Environment, randseed=None):
	crm_flatiron.__init__(self, Environment, randseed=randseed)

	self.update({
	"Name" : "crm-cman",
	- "StartCmd" : "service corosync start; service pacemaker start",
	- "StopCmd" : "service pacemaker stop; cman_tool leave",
	+ "StartCmd" : "service cman start; service pacemaker start",
	+ "StopCmd" : "service pacemaker stop; service cman stop;",

	- "UUIDQueryCmd" : "crmadmin -N --cman",
	"EpocheCmd" : "crm_node -e --cman",
	"QuorumCmd" : "crm_node -q --cman",
	"ParitionCmd" : "crm_node -p --cman",

	"Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service",
	"Pat:They_stopped" : "%s crmd:.Node %s: . state=lost .new",
	"Pat:They_dead" : "crmd:.Node %s: . state=lost .new",

	"Pat:ChildKilled" : "%s pacemakerd.*Child process %s terminated with signal 9",
	"Pat:ChildRespawn" : "%s pacemakerd.*Respawning failed child process: %s",
	})
	diff --git a/cts/CTSlab.py b/cts/CTSlab.py
	index b8f71ff972..5493bf9bc8 100755
	--- a/cts/CTSlab.py
	+++ b/cts/CTSlab.py
	@@ -1,461 +1,468 @@
	#!/usr/bin/python

	'''CTS: Cluster Testing System: Lab environment module
	'''

	__copyright__='''
	Copyright (C) 2001,2005 Alan Robertson <alanr@unix.sh>
	Licensed under the GNU GPL.
	'''

	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License
	# as published by the Free Software Foundation; either version 2
	# of the License, or (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

	from UserDict import UserDict
	import sys, types, string, string, signal, os, socket

	pdir=os.path.dirname(sys.path[0])
	sys.path.insert(0, pdir) # So that things work from the source directory

	try:
	from cts.CTSvars import *
	from cts.CM_ais import *
	from cts.CM_lha import crm_lha
	from cts.CTSaudits import AuditList
	from cts.CTStests import TestList
	from cts.CTSscenarios import *

	except ImportError:
	sys.stderr.write("abort: couldn't find cts libraries in [%s]\n" %
	' '.join(sys.path))
	sys.stderr.write("(check your install and PYTHONPATH)\n")
	sys.exit(-1)

	cm = None
	Tests = []
	Chosen = []
	scenario = None

	# Not really used, the handler in
	def sig_handler(signum, frame) :
	if cm: cm.log("Interrupted by signal %d"%signum)
	if scenario: scenario.summarize()
	if signum == 15 :
	if scenario: scenario.TearDown()
	sys.exit(1)

	class LabEnvironment(CtsLab):

	def __init__(self):
	CtsLab.__init__(self)

	# Get a random seed for the random number generator.
	- self["DoStonith"] = 1
	self["DoStandby"] = 1
	self["DoFencing"] = 1
	self["XmitLoss"] = "0.0"
	self["RecvLoss"] = "0.0"
	self["IPBase"] = "127.0.0.10"
	self["ConnectivityHost"] = socket.gethostname()
	self["ClobberCIB"] = 0
	self["CIBfilename"] = None
	self["CIBResource"] = 0
	self["DoBSC"] = 0
	self["use_logd"] = 0
	self["oprofile"] = []
	self["warn-inactive"] = 0
	self["ListTests"] = 0
	self["benchmark"] = 0
	self["Schema"] = "pacemaker-1.0"
	self["Stack"] = "openais"
	self["stonith-type"] = "external/ssh"
	self["stonith-params"] = "hostlist=all,livedangerously=yes"
	self["at-boot"] = 1 # Does the cluster software start automatically when the node boots
	self["logger"] = ([StdErrLog(self)])
	self["loop-minutes"] = 60
	self["valgrind-prefix"] = None
	self["valgrind-procs"] = "cib crmd attrd pengine stonith-ng"
	self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""
	#self["valgrind-opts"] = """--trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""

	self["experimental-tests"] = 0
	self["valgrind-tests"] = 0
	self["unsafe-tests"] = 1
	self["loop-tests"] = 1
	self["scenario"] = "random"

	def usage(arg, status=1):
	print "Illegal argument " + arg
	print "usage: " + sys.argv[0] +" [options] number-of-iterations"
	print "\nCommon options: "
	print "\t [--at-boot (1\|0)], does the cluster software start at boot time"
	print "\t [--nodes 'node list'], list of cluster nodes separated by whitespace"
	print "\t [--limit-nodes max], only use the first 'max' cluster nodes supplied with --nodes"
	print "\t [--stack (heartbeat\|ais)], which cluster stack is installed"
	print "\t [--logfile path], where should the test software look for logs from cluster nodes"
	print "\t [--outputfile path], optional location for the test software to write logs to"
	print "\t [--syslog-facility name], which syslog facility should the test software log to"
	print "\t [--choose testcase-name], run only the named test"
	print "\t [--list-tests], list the valid tests"
	print "\t [--benchmark], add the timing information"
	print "\t "
	print "Options for release testing: "
	print "\t [--clobber-cib \| -c ] Erase any existing configuration"
	print "\t [--populate-resources \| -r] Generate a sample configuration"
	print "\t [--test-ip-base ip] Offset for generated IP address resources"
	print "\t "
	print "Additional (less common) options: "
	print "\t [--trunc (truncate logfile before starting)]"
	print "\t [--xmit-loss lost-rate(0.0-1.0)]"
	print "\t [--recv-loss lost-rate(0.0-1.0)]"
	print "\t [--standby (1 \| 0 \| yes \| no)]"
	print "\t [--fencing (1 \| 0 \| yes \| no)]"
	print "\t [--stonith (1 \| 0 \| yes \| no)]"
	print "\t [--stonith-type type]"
	print "\t [--stonith-args name=value]"
	print "\t [--bsc]"
	print "\t [--once], run all valid tests once"
	print "\t [--no-loop-tests], dont run looping/time-based tests"
	print "\t [--no-unsafe-tests], dont run tests that are unsafe for use with ocfs2/drbd"
	print "\t [--valgrind-tests], include tests using valgrind"
	print "\t [--experimental-tests], include experimental tests"
	print "\t [--oprofile 'node list'], list of cluster nodes to run oprofile on]"
	print "\t [--qarsh] Use the QARSH backdoor to access nodes instead of SSH"
	print "\t [--seed random_seed]"
	print "\t [--set option=value]"
	sys.exit(status)


	#
	# A little test code...
	#
	if __name__ == '__main__':

	Environment = LabEnvironment()

	NumIter = 0
	Version = 1
	LimitNodes = 0
	TruncateLog = 0
	ListTests = 0
	HaveSeed = 0
	node_list = ''

	# Set the signal handler
	signal.signal(15, sig_handler)
	signal.signal(10, sig_handler)

	# Process arguments...

	skipthis=None
	args=sys.argv[1:]
	for i in range(0, len(args)):
	if skipthis:
	skipthis=None
	continue

	elif args[i] == "-l" or args[i] == "--limit-nodes":
	skipthis=1
	LimitNodes = int(args[i+1])

	elif args[i] == "-r" or args[i] == "--populate-resources":
	Environment["CIBResource"] = 1

	elif args[i] == "-L" or args[i] == "--logfile":
	skipthis=1
	Environment["LogFileName"] = args[i+1]

	elif args[i] == "--outputfile":
	skipthis=1
	Environment["OutputFile"] = args[i+1]

	elif args[i] == "--test-ip-base":
	skipthis=1
	Environment["IPBase"] = args[i+1]

	elif args[i] == "--oprofile":
	skipthis=1
	Environment["oprofile"] = args[i+1].split(' ')

	elif args[i] == "--trunc":
	Environment["TruncateLog"]=1

	elif args[i] == "--list-tests" or args[i] == "--list" :
	Environment["ListTests"]=1

	elif args[i] == "--benchmark":
	Environment["benchmark"]=1

	elif args[i] == "--bsc":
	Environment["DoBSC"] = 1
	Environment["scenario"] = "basic-sanity"

	elif args[i] == "--qarsh":
	Environment.rsh.enable_qarsh()

	elif args[i] == "--fencing":
	skipthis=1
	if args[i+1] == "1" or args[i+1] == "yes":
	Environment["DoFencing"] = 1
	elif args[i+1] == "0" or args[i+1] == "no":
	Environment["DoFencing"] = 0
	else:
	usage(args[i+1])

	elif args[i] == "--stonith":
	skipthis=1
	if args[i+1] == "1" or args[i+1] == "yes":
	- Environment["DoStonith"]=1
	+ Environment["DoFencing"]=1
	elif args[i+1] == "0" or args[i+1] == "no":
	- Environment["DoStonith"]=0
	+ Environment["DoFencing"]=0
	+ elif args[i+1] == "rhcs":
	+ Environment["DoStonith"]=1
	+ Environment["stonith-type"] = "fence_xvm"
	+ Environment["stonith-params"] = "pcmk_arg_map=domain:uname"
	+ elif args[i+1] == "lha":
	+ Environment["DoStonith"]=1
	+ Environment["stonith-type"] = "external/ssh"
	+ Environment["stonith-params"] = "hostlist=all,livedangerously=yes"
	else:
	usage(args[i+1])

	elif args[i] == "--stonith-type":
	Environment["stonith-type"] = args[i+1]
	skipthis=1

	elif args[i] == "--stonith-args":
	Environment["stonith-params"] = args[i+1]
	skipthis=1

	elif args[i] == "--standby":
	skipthis=1
	if args[i+1] == "1" or args[i+1] == "yes":
	Environment["DoStandby"] = 1
	elif args[i+1] == "0" or args[i+1] == "no":
	Environment["DoStandby"] = 0
	else:
	usage(args[i+1])

	elif args[i] == "--clobber-cib" or args[i] == "-c":
	Environment["ClobberCIB"] = 1

	elif args[i] == "--cib-filename":
	skipthis=1
	Environment["CIBfilename"] = args[i+1]

	elif args[i] == "--xmit-loss":
	try:
	float(args[i+1])
	except ValueError:
	print ("--xmit-loss parameter should be float")
	usage(args[i+1])
	skipthis=1
	Environment["XmitLoss"] = args[i+1]

	elif args[i] == "--recv-loss":
	try:
	float(args[i+1])
	except ValueError:
	print ("--recv-loss parameter should be float")
	usage(args[i+1])
	skipthis=1
	Environment["RecvLoss"] = args[i+1]

	elif args[i] == "--choose":
	skipthis=1
	Chosen.append(args[i+1])
	Environment["scenario"] = "sequence"

	elif args[i] == "--nodes":
	skipthis=1
	node_list = args[i+1].split(' ')

	elif args[i] == "--syslog-facility" or args[i] == "--facility":
	skipthis=1
	Environment["SyslogFacility"] = args[i+1]

	elif args[i] == "--seed":
	skipthis=1
	Environment.SeedRandom(args[i+1])

	elif args[i] == "--warn-inactive":
	Environment["warn-inactive"] = 1

	elif args[i] == "--schema":
	skipthis=1
	Environment["Schema"] = args[i+1]

	elif args[i] == "--ais":
	Environment["Stack"] = "openais"

	elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot":
	skipthis=1
	if args[i+1] == "1" or args[i+1] == "yes":
	Environment["at-boot"] = 1
	elif args[i+1] == "0" or args[i+1] == "no":
	Environment["at-boot"] = 0
	else:
	usage(args[i+1])

	elif args[i] == "--heartbeat" or args[i] == "--lha":
	Environment["Stack"] = "heartbeat"

	elif args[i] == "--hae":
	Environment["Stack"] = "openais"
	Environment["Schema"] = "hae"

	elif args[i] == "--stack":
	Environment["Stack"] = args[i+1]
	skipthis=1

	elif args[i] == "--once":
	Environment["scenario"] = "all-once"

	elif args[i] == "--valgrind-tests":
	Environment["valgrind-tests"] = 1

	elif args[i] == "--no-loop-tests":
	Environment["loop-tests"] = 0

	elif args[i] == "--loop-minutes":
	skipthis=1
	try:
	Environment["loop-minutes"]=int(args[i+1])
	except ValueError:
	usage(args[i])

	elif args[i] == "--no-unsafe-tests":
	Environment["unsafe-tests"] = 0

	elif args[i] == "--experimental-tests":
	Environment["experimental-tests"] = 1

	elif args[i] == "--set":
	skipthis=1
	(name, value) = args[i+1].split('=')
	Environment[name] = value

	else:
	try:
	NumIter=int(args[i])
	except ValueError:
	usage(args[i])

	if Environment["DoBSC"]:
	NumIter = 2
	LimitNodes = 1
	Chosen.append("AddResource")
	Environment["ClobberCIB"] = 1
	Environment["CIBResource"] = 0
	Environment["logger"].append(FileLog(Environment, Environment["LogFileName"]))

	elif Environment["OutputFile"]:
	Environment["logger"].append(FileLog(Environment, Environment["OutputFile"]))

	elif Environment["SyslogFacility"]:
	Environment["logger"].append(SysLog(Environment))

	if Environment["Stack"] == "heartbeat" or Environment["Stack"] == "lha":
	Environment["Stack"] = "heartbeat"
	Environment['CMclass'] = crm_lha

	elif Environment["Stack"] == "openais" or Environment["Stack"] == "ais" or Environment["Stack"] == "whitetank":
	Environment["Stack"] = "openais (whitetank)"
	Environment['CMclass'] = crm_whitetank
	Environment["use_logd"] = 0

	elif Environment["Stack"] == "corosync" or Environment["Stack"] == "cs" or Environment["Stack"] == "flatiron":
	Environment["Stack"] = "corosync (flatiron)"
	Environment['CMclass'] = crm_flatiron
	Environment["use_logd"] = 0

	elif Environment["Stack"] == "cman":
	Environment["Stack"] = "corosync (cman)"
	Environment['CMclass'] = crm_cman
	Environment["use_logd"] = 0

	elif Environment["Stack"] == "mcp":
	Environment["Stack"] = "corosync (mcp)"
	Environment['CMclass'] = crm_mcp
	Environment["use_logd"] = 0

	else:
	print "Unknown stack: "+Environment["Stack"]
	sys.exit(1)

	if len(node_list) < 1:
	print "No nodes specified!"
	sys.exit(1)

	if LimitNodes > 0:
	if len(node_list) > LimitNodes:
	print("Limiting the number of nodes configured=%d (max=%d)"
	%(len(node_list), LimitNodes))
	while len(node_list) > LimitNodes:
	node_list.pop(len(node_list)-1)

	Environment["nodes"] = node_list

	# Create the Cluster Manager object
	cm = Environment['CMclass'](Environment)
	if TruncateLog:
	Environment.log("Truncating %s" % LogFile)
	lf = open(LogFile, "w");
	if lf != None:
	lf.truncate(0)
	lf.close()

	Audits = AuditList(cm)

	if Environment["ListTests"] == 1 :
	Tests = TestList(cm, Audits)
	Environment.log("Total %d tests"%len(Tests))
	for test in Tests :
	Environment.log(str(test.name));
	sys.exit(0)

	if len(Chosen) == 0:
	Tests = TestList(cm, Audits)

	else:
	for TestCase in Chosen:
	match = None

	for test in TestList(cm, Audits):
	if test.name == TestCase:
	match = test

	if not match:
	usage("--choose: No applicable/valid tests chosen")
	else:
	Tests.append(match)

	# Scenario selection
	if Environment["scenario"] == "basic-sanity":
	scenario = RandomTests(cm, [ BasicSanityCheck(Environment) ], Audits, Tests)

	elif Environment["scenario"] == "all-once":
	NumIter = len(Tests)
	scenario = AllOnce(
	cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests)
	elif Environment["scenario"] == "sequence":
	scenario = Sequence(
	cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests)
	else:
	scenario = RandomTests(
	cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests)

	Environment.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ")
	Environment.log("Stack: %s" % Environment["Stack"])
	Environment.log("Schema: %s" % Environment["Schema"])
	Environment.log("Scenario: %s" % scenario.__doc__)
	Environment.log("Random Seed: %s" % Environment["RandSeed"])
	Environment.log("System log files: %s" % Environment["LogFileName"])

	Environment.dump()
	rc = Environment.run(scenario, NumIter)
	sys.exit(rc)
	diff --git a/cts/CTStests.py b/cts/CTStests.py
	index 2287b0efd9..6661e37fd7 100644
	--- a/cts/CTStests.py
	+++ b/cts/CTStests.py
	@@ -1,2223 +1,2223 @@
	'''CTS: Cluster Testing System: Tests module

	There are a few things we want to do here:

	'''

	__copyright__='''
	Copyright (C) 2000, 2001 Alan Robertson <alanr@unix.sh>
	Licensed under the GNU GPL.

	Add RecourceRecover testcase Zhao Kai <zhaokai@cn.ibm.com>
	'''

	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License
	# as published by the Free Software Foundation; either version 2
	# of the License, or (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

	#
	# SPECIAL NOTE:
	#
	# Tests may NOT implement any cluster-manager-specific code in them.
	# EXTEND the ClusterManager object to provide the base capabilities
	# the test needs if you need to do something that the current CM classes
	# do not. Otherwise you screw up the whole point of the object structure
	# in CTS.
	#
	# Thank you.
	#

	import time, os, re, types, string, tempfile, sys
	from stat import *
	from cts import CTS
	from cts.CTSaudits import *

	AllTestClasses = [ ]

	class CTSTest:
	'''
	A Cluster test.
	We implement the basic set of properties and behaviors for a generic
	cluster test.

	Cluster tests track their own statistics.
	We keep each of the kinds of counts we track as separate {name,value}
	pairs.
	'''

	def __init__(self, cm):
	#self.name="the unnamed test"
	self.Stats = {"calls":0
	, "success":0
	, "failure":0
	, "skipped":0
	, "auditfail":0}

	# if not issubclass(cm.__class__, ClusterManager):
	# raise ValueError("Must be a ClusterManager object")
	self.CM = cm
	self.Audits = []
	self.timeout=120
	self.passed = 1
	self.is_loop = 0
	self.is_unsafe = 0
	self.is_experimental = 0
	self.is_valgrind = 0
	self.benchmark = 0 # which tests to benchmark
	self.timer = {} # timers

	def has_key(self, key):
	return self.Stats.has_key(key)

	def __setitem__(self, key, value):
	self.Stats[key] = value

	def __getitem__(self, key):
	return self.Stats[key]

	def log_mark(self, msg):
	self.CM.debug("MARK: test %s %s %d" % (self.name,msg,time.time()))
	return

	def get_timer(self,key = "test"):
	try: return self.timer[key]
	except: return 0

	def set_timer(self,key = "test"):
	self.timer[key] = time.time()
	return self.timer[key]

	def log_timer(self,key = "test"):
	elapsed = 0
	if key in self.timer:
	elapsed = time.time() - self.timer[key]
	s = key == "test" and self.name or "%s:%s" %(self.name,key)
	self.CM.debug("%s runtime: %.2f" % (s, elapsed))
	del self.timer[key]
	return elapsed

	def incr(self, name):
	'''Increment (or initialize) the value associated with the given name'''
	if not self.Stats.has_key(name):
	self.Stats[name]=0
	self.Stats[name] = self.Stats[name]+1

	# Reset the test passed boolean
	if name == "calls":
	self.passed = 1

	def failure(self, reason="none"):
	'''Increment the failure count'''
	self.passed = 0
	self.incr("failure")
	self.CM.log(("Test %s" % self.name).ljust(35) +" FAILED: %s" % reason)
	return None

	def success(self):
	'''Increment the success count'''
	self.incr("success")
	return 1

	def skipped(self):
	'''Increment the skipped count'''
	self.incr("skipped")
	return 1

	def __call__(self, node):
	'''Perform the given test'''
	raise ValueError("Abstract Class member (__call__)")
	self.incr("calls")
	return self.failure()

	def audit(self):
	passed = 1
	if len(self.Audits) > 0:
	for audit in self.Audits:
	if not audit():
	self.CM.log("Internal %s Audit %s FAILED." % (self.name, audit.name()))
	self.incr("auditfail")
	passed = 0
	return passed

	def setup(self, node):
	'''Setup the given test'''
	return self.success()

	def teardown(self, node):
	'''Tear down the given test'''
	return self.success()

	def create_watch(self, patterns, timeout, name=None):
	if not name:
	name = self.name
	return CTS.LogWatcher(self.CM.Env, self.CM["LogFileName"], patterns, name, timeout)

	def local_badnews(self, prefix, watch, local_ignore=[]):
	errcount = 0
	if not prefix:
	prefix = "LocalBadNews:"

	ignorelist = []
	ignorelist.append(" CTS: ")
	ignorelist.append(prefix)
	ignorelist.extend(local_ignore)

	while errcount < 100:
	match=watch.look(0)
	if match:
	add_err = 1
	for ignore in ignorelist:
	if add_err == 1 and re.search(ignore, match):
	add_err = 0
	if add_err == 1:
	self.CM.log(prefix + " " + match)
	errcount=errcount+1
	else:
	break
	else:
	self.CM.log("Too many errors!")

	return errcount

	def is_applicable(self):
	return self.is_applicable_common()

	def is_applicable_common(self):
	'''Return TRUE if we are applicable in the current test configuration'''
	#raise ValueError("Abstract Class member (is_applicable)")

	if self.is_loop and not self.CM.Env["loop-tests"]:
	return 0
	elif self.is_unsafe and not self.CM.Env["unsafe-tests"]:
	return 0
	elif self.is_valgrind and not self.CM.Env["valgrind-tests"]:
	return 0
	elif self.is_experimental and not self.CM.Env["experimental-tests"]:
	return 0
	elif self.CM.Env["benchmark"] and self.benchmark == 0:
	return 0

	return 1

	def find_ocfs2_resources(self, node):
	self.r_o2cb = None
	self.r_ocfs2 = []

	(rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	r = AuditResource(self.CM, line)
	if r.rtype == "o2cb" and r.parent != "NA":
	self.CM.debug("Found o2cb: %s" % self.r_o2cb)
	self.r_o2cb = r.parent
	if re.search("^Constraint", line):
	c = AuditConstraint(self.CM, line)
	if c.type == "rsc_colocation" and c.target == self.r_o2cb:
	self.r_ocfs2.append(c.rsc)

	self.CM.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2))
	return len(self.r_ocfs2)

	def canrunnow(self, node):
	'''Return TRUE if we can meaningfully run right now'''
	return 1

	def errorstoignore(self):
	'''Return list of errors which are 'normal' and should be ignored'''
	return []

	###################################################################
	class StopTest(CTSTest):
	###################################################################
	'''Stop (deactivate) the cluster manager on a node'''
	def __init__(self, cm):
	CTSTest.__init__(self, cm)
	self.name="Stop"

	def __call__(self, node):
	'''Perform the 'stop' test. '''
	self.incr("calls")
	if self.CM.ShouldBeStatus[node] != "up":
	return self.skipped()

	patterns = []
	# Technically we should always be able to notice ourselves stopping
	patterns.append(self.CM["Pat:We_stopped"] % node)

	#if self.CM.Env["use_logd"]:
	# patterns.append(self.CM["Pat:Logd_stopped"] % node)

	# Any active node needs to notice this one left
	# NOTE: This wont work if we have multiple partitions
	for other in self.CM.Env["nodes"]:
	if self.CM.ShouldBeStatus[other] == "up" and other != node:
	patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
	#self.debug("Checking %s will notice %s left"%(other, node))

	watch = self.create_watch(patterns, self.CM["DeadTime"])
	watch.setwatch()

	if node == self.CM.OurNode:
	self.incr("us")
	else:
	if self.CM.upcount() <= 1:
	self.incr("all")
	else:
	self.incr("them")

	self.CM.StopaCM(node)
	watch_result = watch.lookforall()

	failreason=None
	UnmatchedList = "\|\|"
	if watch.unmatched:
	(rc, output) = self.CM.rsh(node, "/bin/ps axf", None)
	for line in output:
	self.CM.debug(line)

	for regex in watch.unmatched:
	self.CM.log ("ERROR: Shutdown pattern not found: %s" % (regex))
	UnmatchedList += regex + "\|\|";
	failreason="Missing shutdown pattern"

	self.CM.cluster_stable(self.CM["DeadTime"])

	if not watch.unmatched or self.CM.upcount() == 0:
	return self.success()

	if len(watch.unmatched) >= self.CM.upcount():
	return self.failure("no match against (%s)" % UnmatchedList)

	if failreason == None:
	return self.success()
	else:
	return self.failure(failreason)
	#
	# We don't register StopTest because it's better when called by
	# another test...
	#

	###################################################################
	class StartTest(CTSTest):
	###################################################################
	'''Start (activate) the cluster manager on a node'''
	def __init__(self, cm, debug=None):
	CTSTest.__init__(self,cm)
	self.name="start"
	self.debug = debug

	def __call__(self, node):
	'''Perform the 'start' test. '''
	self.incr("calls")

	if self.CM.upcount() == 0:
	self.incr("us")
	else:
	self.incr("them")

	if self.CM.ShouldBeStatus[node] != "down":
	return self.skipped()
	elif self.CM.StartaCM(node):
	return self.success()
	else:
	return self.failure("Startup %s on node %s failed"
	%(self.CM["Name"], node))

	#
	# We don't register StartTest because it's better when called by
	# another test...
	#

	###################################################################
	class FlipTest(CTSTest):
	###################################################################
	'''If it's running, stop it. If it's stopped start it.
	Overthrow the status quo...
	'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="Flip"
	self.start = StartTest(cm)
	self.stop = StopTest(cm)

	def __call__(self, node):
	'''Perform the 'Flip' test. '''
	self.incr("calls")
	if self.CM.ShouldBeStatus[node] == "up":
	self.incr("stopped")
	ret = self.stop(node)
	type="up->down"
	# Give the cluster time to recognize it's gone...
	time.sleep(self.CM["StableTime"])
	elif self.CM.ShouldBeStatus[node] == "down":
	self.incr("started")
	ret = self.start(node)
	type="down->up"
	else:
	return self.skipped()

	self.incr(type)
	if ret:
	return self.success()
	else:
	return self.failure("%s failure" % type)

	# Register FlipTest as a good test to run
	AllTestClasses.append(FlipTest)

	###################################################################
	class RestartTest(CTSTest):
	###################################################################
	'''Stop and restart a node'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="Restart"
	self.start = StartTest(cm)
	self.stop = StopTest(cm)
	self.benchmark = 1

	def __call__(self, node):
	'''Perform the 'restart' test. '''
	self.incr("calls")

	self.incr("node:" + node)

	ret1 = 1
	if self.CM.StataCM(node):
	self.incr("WasStopped")
	if not self.start(node):
	return self.failure("start (setup) failure: "+node)

	self.set_timer()
	if not self.stop(node):
	return self.failure("stop failure: "+node)
	if not self.start(node):
	return self.failure("start failure: "+node)
	return self.success()

	# Register RestartTest as a good test to run
	AllTestClasses.append(RestartTest)

	###################################################################
	class StonithdTest(CTSTest):
	###################################################################
	def __init__(self, cm):
	CTSTest.__init__(self, cm)
	self.name="Stonithd"
	self.startall = SimulStartLite(cm)
	self.benchmark = 1

	def __call__(self, node):
	self.incr("calls")
	if len(self.CM.Env["nodes"]) < 2:
	return self.skipped()

	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	is_dc = self.CM.is_node_dc(node)

	watchpats = []
	watchpats.append("stonith-ng:.Operation . for host '%s' with device .* returned: 0" % node)
	watchpats.append("tengine_stonith_notify: Peer %s was terminated .*: OK" % node)

	if is_dc:
	watchpats.append("tengine_stonith_notify: Target was our leader .*%s" % node)
	else:
	watchpats.append("tengine_stonith_callback: .*: OK ")

	if self.CM.Env["LogWatcher"] != "remote" or not is_dc:
	# Often remote logs aren't flushed to disk by the time the node is shot,
	# so we wont be able to find them
	# Remote syslog doesn't suffer this problem because they're already on
	# the loghost when the node is shot
	watchpats.append("Node %s will be fenced because termination was requested" % node)
	watchpats.append("Scheduling Node %s for STONITH" % node)
	watchpats.append("Executing .* fencing operation")

	if self.CM.Env["at-boot"] == 0:
	self.CM.debug("Expecting %s to stay down" % node)
	self.CM.ShouldBeStatus[node]="down"
	else:
	self.CM.debug("Expecting %s to come up again %d" % (node, self.CM.Env["at-boot"]))
	watchpats.append("%s crmd: .* S_STARTING -> S_PENDING" % node)
	watchpats.append("%s crmd: .* S_PENDING -> S_NOT_DC" % node)

	watch = self.create_watch(watchpats, 30 + self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"])
	watch.setwatch()

	self.CM.rsh(node, "crm_attribute --node %s --type status --attr-name terminate --attr-value true" % node)

	self.set_timer("fence")
	matched = watch.lookforall()
	self.log_timer("fence")
	self.set_timer("reform")
	if watch.unmatched:
	self.CM.log("Patterns not found: " + repr(watch.unmatched))

	self.CM.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()

	self.CM.debug("Waiting STONITHd node to come back up")
	self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600)

	self.CM.debug("Waiting for the cluster to re-stabilize with all nodes")
	is_stable = self.CM.cluster_stable(self.CM["StartTime"])

	if not matched:
	return self.failure("Didn't find all expected patterns")
	elif not is_stable:
	return self.failure("Cluster did not become stable")

	self.log_timer("reform")
	return self.success()

	def errorstoignore(self):
	return [ "Executing .* fencing operation" ]

	def is_applicable(self):
	if not self.is_applicable_common():
	return 0

	- if self.CM.Env.has_key("DoStonith"):
	- return self.CM.Env["DoStonith"]
	+ if self.CM.Env.has_key("DoFencing"):
	+ return self.CM.Env["DoFencing"]

	return 1

	AllTestClasses.append(StonithdTest)

	###################################################################
	class StartOnebyOne(CTSTest):
	###################################################################
	'''Start all the nodes ~ one by one'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="StartOnebyOne"
	self.stopall = SimulStopLite(cm)
	self.start = StartTest(cm)
	self.ns=CTS.NodeStatus(cm.Env)

	def __call__(self, dummy):
	'''Perform the 'StartOnebyOne' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Shut down all the nodes...
	ret = self.stopall(None)
	if not ret:
	return self.failure("Test setup failed")

	failed=[]
	self.set_timer()
	for node in self.CM.Env["nodes"]:
	if not self.start(node):
	failed.append(node)

	if len(failed) > 0:
	return self.failure("Some node failed to start: " + repr(failed))

	return self.success()

	# Register StartOnebyOne as a good test to run
	AllTestClasses.append(StartOnebyOne)

	###################################################################
	class SimulStart(CTSTest):
	###################################################################
	'''Start all the nodes ~ simultaneously'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="SimulStart"
	self.stopall = SimulStopLite(cm)
	self.startall = SimulStartLite(cm)

	def __call__(self, dummy):
	'''Perform the 'SimulStart' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Shut down all the nodes...
	ret = self.stopall(None)
	if not ret:
	return self.failure("Setup failed")

	self.CM.clear_all_caches()

	if not self.startall(None):
	return self.failure("Startall failed")

	return self.success()

	# Register SimulStart as a good test to run
	AllTestClasses.append(SimulStart)

	###################################################################
	class SimulStop(CTSTest):
	###################################################################
	'''Stop all the nodes ~ simultaneously'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="SimulStop"
	self.startall = SimulStartLite(cm)
	self.stopall = SimulStopLite(cm)

	def __call__(self, dummy):
	'''Perform the 'SimulStop' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Start up all the nodes...
	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	if not self.stopall(None):
	return self.failure("Stopall failed")

	return self.success()

	# Register SimulStop as a good test to run
	AllTestClasses.append(SimulStop)

	###################################################################
	class StopOnebyOne(CTSTest):
	###################################################################
	'''Stop all the nodes in order'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="StopOnebyOne"
	self.startall = SimulStartLite(cm)
	self.stop = StopTest(cm)

	def __call__(self, dummy):
	'''Perform the 'StopOnebyOne' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Start up all the nodes...
	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	failed=[]
	self.set_timer()
	for node in self.CM.Env["nodes"]:
	if not self.stop(node):
	failed.append(node)

	if len(failed) > 0:
	return self.failure("Some node failed to stop: " + repr(failed))

	self.CM.clear_all_caches()
	return self.success()

	# Register StopOnebyOne as a good test to run
	AllTestClasses.append(StopOnebyOne)

	###################################################################
	class RestartOnebyOne(CTSTest):
	###################################################################
	'''Restart all the nodes in order'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="RestartOnebyOne"
	self.startall = SimulStartLite(cm)

	def __call__(self, dummy):
	'''Perform the 'RestartOnebyOne' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Start up all the nodes...
	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	did_fail=[]
	self.set_timer()
	self.restart = RestartTest(self.CM)
	for node in self.CM.Env["nodes"]:
	if not self.restart(node):
	did_fail.append(node)

	if did_fail:
	return self.failure("Could not restart %d nodes: %s"
	%(len(did_fail), repr(did_fail)))
	return self.success()

	# Register StopOnebyOne as a good test to run
	AllTestClasses.append(RestartOnebyOne)

	###################################################################
	class PartialStart(CTSTest):
	###################################################################
	'''Start a node - but tell it to stop before it finishes starting up'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="PartialStart"
	self.startall = SimulStartLite(cm)
	self.stopall = SimulStopLite(cm)
	#self.is_unsafe = 1

	def __call__(self, node):
	'''Perform the 'PartialStart' test. '''
	self.incr("calls")

	ret = self.stopall(None)
	if not ret:
	return self.failure("Setup failed")

	# FIXME! This should use the CM class to get the pattern
	# then it would be applicable in general
	watchpats = []
	watchpats.append("Starting crmd")
	watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
	watch.setwatch()

	self.CM.StartaCMnoBlock(node)
	ret = watch.lookforall()
	if not ret:
	self.CM.log("Patterns not found: " + repr(watch.unmatched))
	return self.failure("Setup of %s failed" % node)

	ret = self.stopall(None)
	if not ret:
	return self.failure("%s did not stop in time" % node)

	return self.success()

	# Register StopOnebyOne as a good test to run
	AllTestClasses.append(PartialStart)

	#######################################################################
	class StandbyTest(CTSTest):
	#######################################################################
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="Standby"
	self.benchmark = 1

	self.start = StartTest(cm)
	self.startall = SimulStartLite(cm)

	# make sure the node is active
	# set the node to standby mode
	# check resources, none resource should be running on the node
	# set the node to active mode
	# check resouces, resources should have been migrated back (SHOULD THEY?)

	def __call__(self, node):

	self.incr("calls")
	ret=self.startall(None)
	if not ret:
	return self.failure("Start all nodes failed")

	self.CM.debug("Make sure node %s is active" % node)
	if self.CM.StandbyStatus(node) != "off":
	if not self.CM.SetStandbyMode(node, "off"):
	return self.failure("can't set node %s to active mode" % node)

	self.CM.cluster_stable()

	status = self.CM.StandbyStatus(node)
	if status != "off":
	return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))

	self.CM.debug("Getting resources running on node %s" % node)
	rsc_on_node = self.CM.active_resources(node)

	self.CM.debug("Setting node %s to standby mode" % node)
	if not self.CM.SetStandbyMode(node, "on"):
	return self.failure("can't set node %s to standby mode" % node)

	self.set_timer("on")
	time.sleep(1) # Allow time for the update to be applied and cause something
	self.CM.cluster_stable()

	status = self.CM.StandbyStatus(node)
	if status != "on":
	return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status))
	self.log_timer("on")

	self.CM.debug("Checking resources")
	bad_run = self.CM.active_resources(node)
	if len(bad_run) > 0:
	rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run)))
	self.CM.debug("Setting node %s to active mode" % node)
	self.CM.SetStandbyMode(node, "off")
	return rc

	self.CM.debug("Setting node %s to active mode" % node)
	if not self.CM.SetStandbyMode(node, "off"):
	return self.failure("can't set node %s to active mode" % node)

	self.set_timer("off")
	self.CM.cluster_stable()

	status = self.CM.StandbyStatus(node)
	if status != "off":
	return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
	self.log_timer("off")

	return self.success()

	AllTestClasses.append(StandbyTest)

	#######################################################################
	class ValgrindTest(CTSTest):
	#######################################################################
	'''Check for memory leaks'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="Valgrind"
	self.stopall = SimulStopLite(cm)
	self.startall = SimulStartLite(cm)
	self.is_valgrind = 1
	self.is_loop = 1

	def setup(self, node):
	self.incr("calls")

	ret=self.stopall(None)
	if not ret:
	return self.failure("Stop all nodes failed")

	# Enable valgrind
	self.logPat = "/tmp/%s-*.valgrind" % self.name

	self.CM.Env["valgrind-prefix"] = self.name

	self.CM.rsh(node, "rm -f %s" % self.logPat, None)

	ret=self.startall(None)
	if not ret:
	return self.failure("Start all nodes failed")

	for node in self.CM.Env["nodes"]:
	(rc, output) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
	for line in output:
	self.CM.debug(line)

	return self.success()

	def teardown(self, node):
	# Disable valgrind
	self.CM.Env["valgrind-prefix"] = None

	# Return all nodes to normal
	ret=self.stopall(None)
	if not ret:
	return self.failure("Stop all nodes failed")

	return self.success()

	def find_leaks(self):
	# Check for leaks
	leaked = []
	self.stop = StopTest(self.CM)

	for node in self.CM.Env["nodes"]:
	(rc, ps_out) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
	rc = self.stop(node)
	if not rc:
	self.failure("Couldn't shut down %s" % node)

	rc = self.CM.rsh(node, "grep -e indirectly.lost:.[1-9] -e definitely.lost:.[1-9] -e ERROR.SUMMARY:.[1-9].*errors %s" % self.logPat, 0)
	if rc != 1:
	leaked.append(node)
	self.failure("Valgrind errors detected on %s" % node)
	for line in ps_out:
	self.CM.log(line)
	(rc, output) = self.CM.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logPat, None)
	for line in output:
	self.CM.log(line)
	(rc, output) = self.CM.rsh(node, "cat %s" % self.logPat, None)
	for line in output:
	self.CM.debug(line)

	self.CM.rsh(node, "rm -f %s" % self.logPat, None)
	return leaked

	def __call__(self, node):
	leaked = self.find_leaks()
	if len(leaked) > 0:
	return self.failure("Nodes %s leaked" % repr(leaked))

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	return [ """cib:.*readCibXmlFile:""", """HA_VALGRIND_ENABLED""" ]

	#######################################################################
	class StandbyLoopTest(ValgrindTest):
	#######################################################################
	'''Check for memory leaks by putting a node in and out of standby for an hour'''
	def __init__(self, cm):
	ValgrindTest.__init__(self,cm)
	self.name="StandbyLoop"

	def __call__(self, node):

	lpc = 0
	delay = 2
	failed = 0
	done=time.time() + self.CM.Env["loop-minutes"]*60
	while time.time() <= done and not failed:
	lpc = lpc + 1

	time.sleep(delay)
	if not self.CM.SetStandbyMode(node, "on"):
	self.failure("can't set node %s to standby mode" % node)
	failed = lpc

	time.sleep(delay)
	if not self.CM.SetStandbyMode(node, "off"):
	self.failure("can't set node %s to active mode" % node)
	failed = lpc

	leaked = self.find_leaks()
	if failed:
	return self.failure("Iteration %d failed" % failed)
	elif len(leaked) > 0:
	return self.failure("Nodes %s leaked" % repr(leaked))

	return self.success()

	AllTestClasses.append(StandbyLoopTest)

	##############################################################################
	class BandwidthTest(CTSTest):
	##############################################################################
	# Tests should not be cluster-manager-specific
	# If you need to find out cluster manager configuration to do this, then
	# it should be added to the generic cluster manager API.
	'''Test the bandwidth which heartbeat uses'''
	def __init__(self, cm):
	CTSTest.__init__(self, cm)
	self.name = "Bandwidth"
	self.start = StartTest(cm)
	self.__setitem__("min",0)
	self.__setitem__("max",0)
	self.__setitem__("totalbandwidth",0)
	self.tempfile = tempfile.mktemp(".cts")
	self.startall = SimulStartLite(cm)

	def __call__(self, node):
	'''Perform the Bandwidth test'''
	self.incr("calls")

	if self.CM.upcount()<1:
	return self.skipped()

	Path = self.CM.InternalCommConfig()
	if "ip" not in Path["mediatype"]:
	return self.skipped()

	port = Path["port"][0]
	port = int(port)

	ret = self.startall(None)
	if not ret:
	return self.failure("Test setup failed")
	time.sleep(5) # We get extra messages right after startup.


	fstmpfile = "/var/run/band_estimate"
	dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \
	% (port, fstmpfile)

	rc = self.CM.rsh(node, dumpcmd)
	if rc == 0:
	farfile = "root@%s:%s" % (node, fstmpfile)
	self.CM.rsh.cp(farfile, self.tempfile)
	Bandwidth = self.countbandwidth(self.tempfile)
	if not Bandwidth:
	self.CM.log("Could not compute bandwidth.")
	return self.success()
	intband = int(Bandwidth + 0.5)
	self.CM.log("...bandwidth: %d bits/sec" % intband)
	self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth
	if self.Stats["min"] == 0:
	self.Stats["min"] = Bandwidth
	if Bandwidth > self.Stats["max"]:
	self.Stats["max"] = Bandwidth
	if Bandwidth < self.Stats["min"]:
	self.Stats["min"] = Bandwidth
	self.CM.rsh(node, "rm -f %s" % fstmpfile)
	os.unlink(self.tempfile)
	return self.success()
	else:
	return self.failure("no response from tcpdump command [%d]!" % rc)

	def countbandwidth(self, file):
	fp = open(file, "r")
	fp.seek(0)
	count = 0
	sum = 0
	while 1:
	line = fp.readline()
	if not line:
	return None
	if re.search("udp",line) or re.search("UDP,", line):
	count=count+1
	linesplit = string.split(line," ")
	for j in range(len(linesplit)-1):
	if linesplit[j]=="udp": break
	if linesplit[j]=="length:": break

	try:
	sum = sum + int(linesplit[j+1])
	except ValueError:
	self.CM.log("Invalid tcpdump line: %s" % line)
	return None
	T1 = linesplit[0]
	timesplit = string.split(T1,":")
	time2split = string.split(timesplit[2],".")
	time1 = (long(timesplit[0])60+long(timesplit[1]))60+long(time2split[0])+long(time2split[1])*0.000001
	break

	while count < 100:
	line = fp.readline()
	if not line:
	return None
	if re.search("udp",line) or re.search("UDP,", line):
	count = count+1
	linessplit = string.split(line," ")
	for j in range(len(linessplit)-1):
	if linessplit[j] =="udp": break
	if linesplit[j]=="length:": break
	try:
	sum=int(linessplit[j+1])+sum
	except ValueError:
	self.CM.log("Invalid tcpdump line: %s" % line)
	return None

	T2 = linessplit[0]
	timesplit = string.split(T2,":")
	time2split = string.split(timesplit[2],".")
	time2 = (long(timesplit[0])60+long(timesplit[1]))60+long(time2split[0])+long(time2split[1])*0.000001
	time = time2-time1
	if (time <= 0):
	return 0
	return (sum*8)/time

	def is_applicable(self):
	'''BandwidthTest never applicable'''
	return 0

	AllTestClasses.append(BandwidthTest)

	###################################################################
	class ResourceRecover(CTSTest):
	###################################################################
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="ResourceRecover"
	self.start = StartTest(cm)
	self.startall = SimulStartLite(cm)
	self.max=30
	self.rid=None
	#self.is_unsafe = 1
	self.benchmark = 1

	# these are the values used for the new LRM API call
	self.action = "asyncmon"
	self.interval = 0

	def __call__(self, node):
	'''Perform the 'ResourceRecover' test. '''
	self.incr("calls")

	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	resourcelist = self.CM.active_resources(node)
	# if there are no resourcelist, return directly
	if len(resourcelist)==0:
	self.CM.log("No active resources on %s" % node)
	return self.skipped()

	self.rid = self.CM.Env.RandomGen.choice(resourcelist)

	rsc = None
	(rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	tmp = AuditResource(self.CM, line)
	if tmp.id == self.rid:
	rsc = tmp
	# Handle anonymous clones that get renamed
	self.rid = rsc.clone_id
	break

	if not rsc:
	return self.failure("Could not find %s in the resource list" % self.rid)

	self.CM.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id))

	pats = []
	pats.append("Updating failcount for %s on .* after .* %s"
	% (self.rid, self.action))

	if rsc.managed():
	pats.append("crmd:.* Performing .* op=%s_stop_0" % self.rid)
	if rsc.unique():
	pats.append("crmd:.* Performing .* op=%s_start_0" % self.rid)
	pats.append("crmd:.* LRM operation %s_start_0.confirmed.ok" % self.rid)
	else:
	# Anonymous clones may get restarted with a different clone number
	pats.append("crmd:.* Performing .* op=.*_start_0")
	pats.append("crmd:.* LRM operation ._start_0.confirmed.*ok")

	watch = self.create_watch(pats, 60)
	watch.setwatch()

	self.CM.rsh(node, "crm_resource -F -r %s -H %s &>/dev/null" % (self.rid, node))

	self.set_timer("recover")
	watch.lookforall()
	self.log_timer("recover")

	self.CM.cluster_stable()
	recovered=self.CM.ResourceLocation(self.rid)

	if watch.unmatched:
	return self.failure("Patterns not found: %s" % repr(watch.unmatched))

	elif rsc.unique() and len(recovered) > 1:
	return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered)))

	elif len(recovered) > 0:
	self.CM.debug("%s is running on: %s" %(self.rid, repr(recovered)))

	elif rsc.managed():
	return self.failure("%s was not recovered and is inactive" % self.rid)

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	return [ """Updating failcount for %s""" % self.rid,
	"""Unknown operation: fail""",
	"""ERROR: sending stonithRA op to stonithd failed.""",
	"""ERROR: process_lrm_event: LRM operation %s_%s_%d""" % (self.rid, self.action, self.interval),
	"""ERROR: process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval),
	]

	AllTestClasses.append(ResourceRecover)

	###################################################################
	class ComponentFail(CTSTest):
	###################################################################
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="ComponentFail"
	self.startall = SimulStartLite(cm)
	self.complist = cm.Components()
	self.patterns = []
	self.okerrpatterns = []
	self.is_unsafe = 1

	def __call__(self, node):
	'''Perform the 'ComponentFail' test. '''
	self.incr("calls")
	self.patterns = []
	self.okerrpatterns = []

	# start all nodes
	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	if not self.CM.cluster_stable(self.CM["StableTime"]):
	return self.failure("Setup failed - unstable")

	node_is_dc = self.CM.is_node_dc(node, None)

	# select a component to kill
	chosen = self.CM.Env.RandomGen.choice(self.complist)
	while chosen.dc_only == 1 and node_is_dc == 0:
	chosen = self.CM.Env.RandomGen.choice(self.complist)

	self.CM.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot))
	self.incr(chosen.name)

	if chosen.name != "aisexec":
	if self.CM["Name"] != "crm-lha" or chosen.name != "pengine":
	self.patterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name))
	self.patterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name))

	self.patterns.extend(chosen.pats)
	if node_is_dc:
	self.patterns.extend(chosen.dc_pats)

	# In an ideal world, this next stuff should be in the "chosen" object as a member function
	if self.CM["Name"] == "crm-lha" and chosen.triggersreboot:
	# Make sure the node goes down and then comes back up if it should reboot...
	for other in self.CM.Env["nodes"]:
	if other != node:
	self.patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
	self.patterns.append(self.CM["Pat:Slave_started"] % node)
	self.patterns.append(self.CM["Pat:Local_started"] % node)

	if chosen.dc_only:
	# Sometimes these will be in the log, and sometimes they won't...
	self.okerrpatterns.append("%s crmd:.Process %s:. exited" %(node, chosen.name))
	self.okerrpatterns.append("%s crmd:.I_ERROR.crmdManagedChildDied" %node)
	self.okerrpatterns.append("%s crmd:.*The %s subsystem terminated unexpectedly" %(node, chosen.name))
	self.okerrpatterns.append("ERROR: Client .* exited with return code")
	else:
	# Sometimes this won't be in the log...
	self.okerrpatterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name))
	self.okerrpatterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name))
	self.okerrpatterns.append(self.CM["Pat:ChildExit"])

	# supply a copy so self.patterns doesnt end up empty
	tmpPats = []
	tmpPats.extend(self.patterns)
	self.patterns.extend(chosen.badnews_ignore)

	# Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status
	stonithPats = []
	stonithPats.append("stonith-ng:.Operation . for host '%s' with device .* returned: 0" % node)
	stonith = self.create_watch(stonithPats, 0)
	stonith.setwatch()

	# set the watch for stable
	watch = self.create_watch(
	tmpPats, self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"])
	watch.setwatch()

	# kill the component
	chosen.kill(node)

	# check to see Heartbeat noticed
	matched = watch.lookforall(allow_multiple_matches=1)
	if watch.unmatched:
	self.CM.log("Patterns not found: " + repr(watch.unmatched))

	if self.CM.Env["at-boot"] == 0:
	self.CM.debug("Checking if %s was shot" % node)
	shot = stonith.look(60)
	if shot:
	self.CM.debug("Found: "+ repr(shot))
	self.CM.ShouldBeStatus[node]="down"

	self.CM.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()

	self.CM.debug("Waiting for any STONITHd node to come back up")
	self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600)

	self.CM.debug("Waiting for the cluster to re-stabilize with all nodes")
	is_stable = self.CM.cluster_stable(self.CM["StartTime"])

	if not matched:
	return self.failure("Didn't find all expected patterns")
	elif not is_stable:
	return self.failure("Cluster did not become stable")

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	# Note that okerrpatterns refers to the last time we ran this test
	# The good news is that this works fine for us...
	self.okerrpatterns.extend(self.patterns)
	return self.okerrpatterns

	AllTestClasses.append(ComponentFail)

	####################################################################
	class SplitBrainTest(CTSTest):
	####################################################################
	'''It is used to test split-brain. when the path between the two nodes break
	check the two nodes both take over the resource'''
	def __init__(self,cm):
	CTSTest.__init__(self,cm)
	self.name = "SplitBrain"
	self.start = StartTest(cm)
	self.startall = SimulStartLite(cm)
	self.is_experimental = 1

	def isolate_partition(self, partition):
	other_nodes = []
	other_nodes.extend(self.CM.Env["nodes"])

	for node in partition:
	try:
	other_nodes.remove(node)
	except ValueError:
	self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"]) + " from " +repr(partition))

	if len(other_nodes) == 0:
	return 1

	self.CM.debug("Creating partition: " + repr(partition))
	self.CM.debug("Everyone else: " + repr(other_nodes))

	for node in partition:
	if not self.CM.isolate_node(node, other_nodes):
	self.CM.log("Could not isolate %s" % node)
	return 0

	return 1

	def heal_partition(self, partition):
	other_nodes = []
	other_nodes.extend(self.CM.Env["nodes"])

	for node in partition:
	try:
	other_nodes.remove(node)
	except ValueError:
	self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"]))

	if len(other_nodes) == 0:
	return 1

	self.CM.debug("Healing partition: " + repr(partition))
	self.CM.debug("Everyone else: " + repr(other_nodes))

	for node in partition:
	self.CM.unisolate_node(node, other_nodes)

	def __call__(self, node):
	'''Perform split-brain test'''
	self.incr("calls")
	self.passed = 1
	partitions = {}

	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	while 1:
	# Retry until we get multiple partitions
	partitions = {}
	p_max = len(self.CM.Env["nodes"])
	for node in self.CM.Env["nodes"]:
	p = self.CM.Env.RandomGen.randint(1, p_max)
	if not partitions.has_key(p):
	partitions[p]= []
	partitions[p].append(node)
	p_max = len(partitions.keys())
	if p_max > 1:
	break
	# else, try again

	self.CM.debug("Created %d partitions" % p_max)
	for key in partitions.keys():
	self.CM.debug("Partition["+str(key)+"]:\t"+repr(partitions[key]))

	# Disabling STONITH to reduce test complexity for now
	self.CM.rsh(node, "crm_attribute -n stonith-enabled -v false")

	for key in partitions.keys():
	self.isolate_partition(partitions[key])

	count = 30
	while count > 0:
	if len(self.CM.find_partitions()) != p_max:
	time.sleep(10)
	else:
	break
	else:
	self.failure("Expected partitions were not created")

	# Target number of partitions formed - wait for stability
	if not self.CM.cluster_stable():
	self.failure("Partitioned cluster not stable")

	# Now audit the cluster state
	self.CM.partitions_expected = p_max
	if not self.audit():
	self.failure("Audits failed")
	self.CM.partitions_expected = 1

	# And heal them again
	for key in partitions.keys():
	self.heal_partition(partitions[key])

	# Wait for a single partition to form
	count = 30
	while count > 0:
	if len(self.CM.find_partitions()) != 1:
	time.sleep(10)
	count -= 1
	else:
	break
	else:
	self.failure("Cluster did not reform")

	# Wait for it to have the right number of members
	count = 30
	while count > 0:
	members = []

	partitions = self.CM.find_partitions()
	if len(partitions) > 0:
	members = partitions[0].split()

	if len(members) != len(self.CM.Env["nodes"]):
	time.sleep(10)
	count -= 1
	else:
	break
	else:
	self.failure("Cluster did not completely reform")

	# Wait up to 20 minutes - the delay is more preferable than
	# trying to continue with in a messed up state
	if not self.CM.cluster_stable(1200):
	self.failure("Reformed cluster not stable")
	answer = raw_input('Continue? [nY]')
	if answer and answer == "n":
	raise ValueError("Reformed cluster not stable")

	# Turn fencing back on
	if self.CM.Env["DoFencing"]:
	self.CM.rsh(node, "crm_attribute -D -n stonith-enabled")

	self.CM.cluster_stable()

	if self.passed:
	return self.success()
	return self.failure("See previous errors")

	def errorstoignore(self):
	'''Return list of errors which are 'normal' and should be ignored'''
	return [
	"Another DC detected:",
	"ERROR: attrd_cib_callback: .*Application of an update diff failed",
	"crmd_ha_msg_callback:.*not in our membership list",
	"CRIT:.node.returning after partition",
	]

	def is_applicable(self):
	if not self.is_applicable_common():
	return 0
	return len(self.CM.Env["nodes"]) > 2

	AllTestClasses.append(SplitBrainTest)

	####################################################################
	class Reattach(CTSTest):
	####################################################################
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="Reattach"
	self.startall = SimulStartLite(cm)
	self.restart1 = RestartTest(cm)
	self.stopall = SimulStopLite(cm)
	self.is_unsafe = 0 # Handled by canrunnow()

	def setup(self, node):
	attempt=0
	if not self.startall(None):
	return None

	# Make sure we are really _really_ stable and that all
	# resources, including those that depend on transient node
	# attributes, are started
	while not self.CM.cluster_stable(double_check=True):
	if attempt < 5:
	attempt += 1
	self.CM.debug("Not stable yet, re-testing")
	else:
	self.CM.log("Cluster is not stable")
	return None

	return 1

	def teardown(self, node):

	# Make sure 'node' is up
	start = StartTest(self.CM)
	start(node)

	is_managed = self.CM.rsh(node, "crm_attribute -GQ -t crm_config -n is-managed-default -d true", 1)
	is_managed = is_managed[:-1] # Strip off the newline
	if is_managed != "true":
	self.CM.log("Attempting to re-enable resource management on %s (%s)" % (node, is_managed))
	managed = self.create_watch(["is-managed-default"], 60)
	managed.setwatch()

	self.CM.rsh(node, "crm_attribute -D -n is-managed-default")

	if not managed.lookforall():
	self.CM.log("Patterns not found: " + repr(managed.unmatched))
	self.CM.log("Could not re-enable resource management")
	return 0

	return 1

	def canrunnow(self, node):
	'''Return TRUE if we can meaningfully run right now'''
	if self.find_ocfs2_resources(node):
	self.CM.log("Detach/Reattach scenarios are not possible with OCFS2 services present")
	return 0
	return 1

	def __call__(self, node):
	self.incr("calls")

	pats = []
	managed = self.create_watch(["is-managed-default"], 60)
	managed.setwatch()

	self.CM.debug("Disable resource management")
	self.CM.rsh(node, "crm_attribute -n is-managed-default -v false")

	if not managed.lookforall():
	self.CM.log("Patterns not found: " + repr(managed.unmatched))
	return self.failure("Resource management not disabled")

	pats = []
	pats.append("crmd:.Performing._stop_0")
	pats.append("crmd:.Performing._start_0")
	pats.append("crmd:.Performing._promote_0")
	pats.append("crmd:.Performing._demote_0")
	pats.append("crmd:.Performing._migrate_.*_0")

	watch = self.create_watch(pats, 60, "ShutdownActivity")
	watch.setwatch()

	self.CM.debug("Shutting down the cluster")
	ret = self.stopall(None)
	if not ret:
	self.CM.debug("Re-enable resource management")
	self.CM.rsh(node, "crm_attribute -D -n is-managed-default")
	return self.failure("Couldn't shut down the cluster")

	self.CM.debug("Bringing the cluster back up")
	ret = self.startall(None)
	time.sleep(5) # allow ping to update the CIB
	if not ret:
	self.CM.debug("Re-enable resource management")
	self.CM.rsh(node, "crm_attribute -D -n is-managed-default")
	return self.failure("Couldn't restart the cluster")

	if self.local_badnews("ResourceActivity:", watch):
	self.CM.debug("Re-enable resource management")
	self.CM.rsh(node, "crm_attribute -D -n is-managed-default")
	return self.failure("Resources stopped or started during cluster restart")

	watch = self.create_watch(pats, 60, "StartupActivity")
	watch.setwatch()

	managed = self.create_watch(["is-managed-default"], 60)
	managed.setwatch()

	self.CM.debug("Re-enable resource management")
	self.CM.rsh(node, "crm_attribute -D -n is-managed-default")

	if not managed.lookforall():
	self.CM.log("Patterns not found: " + repr(managed.unmatched))
	return self.failure("Resource management not enabled")

	self.CM.cluster_stable()

	# Ignore actions for STONITH resources
	ignore = []
	(rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	r = AuditResource(self.CM, line)
	if r.rclass == "stonith":
	self.CM.debug("Ignoring: crmd:.Performing.op=%s_.*_0" % r.id)
	ignore.append("crmd:.Performing.op=%s_.*_0" % r.id)

	if self.local_badnews("ResourceActivity:", watch, ignore):
	return self.failure("Resources stopped or started after resource management was re-enabled")

	return ret

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	return [
	"You may ignore this error if it is unmanaged.",
	"pingd: .*ERROR: send_ipc_message:",
	"pingd: .*ERROR: send_update:",
	"lrmd: .*ERROR: notify_client:",
	]

	def is_applicable(self):
	if self.CM["Name"] == "crm-lha":
	return None
	return 1

	AllTestClasses.append(Reattach)

	####################################################################
	class SpecialTest1(CTSTest):
	####################################################################
	'''Set up a custom test to cause quorum failure issues for Andrew'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="SpecialTest1"
	self.startall = SimulStartLite(cm)
	self.restart1 = RestartTest(cm)
	self.stopall = SimulStopLite(cm)

	def __call__(self, node):
	'''Perform the 'SpecialTest1' test for Andrew. '''
	self.incr("calls")

	# Shut down all the nodes...
	ret = self.stopall(None)
	if not ret:
	return self.failure("Could not stop all nodes")

	# Start the selected node
	ret = self.restart1(node)
	if not ret:
	return self.failure("Could not start "+node)

	# Start all remaining nodes
	ret = self.startall(None)
	if not ret:
	return self.failure("Could not start the remaining nodes")

	return self.success()

	AllTestClasses.append(SpecialTest1)

	####################################################################
	class HAETest(CTSTest):
	####################################################################
	'''Set up a custom test to cause quorum failure issues for Andrew'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="HAETest"
	self.stopall = SimulStopLite(cm)
	self.startall = SimulStartLite(cm)
	self.is_loop = 1

	def setup(self, node):
	# Start all remaining nodes
	ret = self.startall(None)
	if not ret:
	return self.failure("Couldn't start all nodes")
	return self.success()

	def teardown(self, node):
	# Stop everything
	ret = self.stopall(None)
	if not ret:
	return self.failure("Couldn't stop all nodes")
	return self.success()

	def wait_on_state(self, node, resource, expected_clones, attempts=240):
	while attempts > 0:
	active=0
	(rc, lines) = self.CM.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None)

	# Hack until crm_resource does the right thing
	if rc == 0 and lines:
	active = len(lines)

	if len(lines) == expected_clones:
	return 1

	elif rc == 1:
	self.CM.debug("Resource %s is still inactive" % resource)

	elif rc == 234:
	self.CM.log("Unknown resource %s" % resource)
	return 0

	elif rc == 246:
	self.CM.log("Cluster is inactive")
	return 0

	elif rc != 0:
	self.CM.log("Call to crm_resource failed, rc=%d" % rc)
	return 0

	else:
	self.CM.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones))

	attempts -= 1
	time.sleep(1)

	return 0

	def find_dlm(self, node):
	self.r_dlm = None

	(rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	r = AuditResource(self.CM, line)
	if r.rtype == "controld" and r.parent != "NA":
	self.CM.debug("Found dlm: %s" % self.r_dlm)
	self.r_dlm = r.parent
	return 1
	return 0

	def find_hae_resources(self, node):
	self.r_dlm = None
	self.r_o2cb = None
	self.r_ocfs2 = []

	if self.find_dlm(node):
	self.find_ocfs2_resources(node)

	def is_applicable(self):
	if not self.is_applicable_common():
	return 0
	if self.CM.Env["Schema"] == "hae":
	return 1
	return None

	####################################################################
	class HAERoleTest(HAETest):
	####################################################################
	def __init__(self, cm):
	'''Lars' mount/unmount test for the HA extension. '''
	HAETest.__init__(self,cm)
	self.name="HAERoleTest"

	def change_state(self, node, resource, target):
	rc = self.CM.rsh(node, "crm_resource -r %s -p target-role -v %s --meta" % (resource, target))
	return rc

	def __call__(self, node):
	self.incr("calls")
	lpc = 0
	failed = 0
	delay = 2
	done=time.time() + self.CM.Env["loop-minutes"]*60
	self.find_hae_resources(node)

	clone_max = len(self.CM.Env["nodes"])
	while time.time() <= done and not failed:
	lpc = lpc + 1

	self.change_state(node, self.r_dlm, "Stopped")
	if not self.wait_on_state(node, self.r_dlm, 0):
	self.failure("%s did not go down correctly" % self.r_dlm)
	failed = lpc

	self.change_state(node, self.r_dlm, "Started")
	if not self.wait_on_state(node, self.r_dlm, clone_max):
	self.failure("%s did not come up correctly" % self.r_dlm)
	failed = lpc

	if not self.wait_on_state(node, self.r_o2cb, clone_max):
	self.failure("%s did not come up correctly" % self.r_o2cb)
	failed = lpc

	for fs in self.r_ocfs2:
	if not self.wait_on_state(node, fs, clone_max):
	self.failure("%s did not come up correctly" % fs)
	failed = lpc

	if failed:
	return self.failure("iteration %d failed" % failed)
	return self.success()

	AllTestClasses.append(HAERoleTest)

	####################################################################
	class HAEStandbyTest(HAETest):
	####################################################################
	'''Set up a custom test to cause quorum failure issues for Andrew'''
	def __init__(self, cm):
	HAETest.__init__(self,cm)
	self.name="HAEStandbyTest"

	def change_state(self, node, resource, target):
	rc = self.CM.rsh(node, "crm_standby -l reboot -v %s" % (target))
	return rc

	def __call__(self, node):
	self.incr("calls")

	lpc = 0
	failed = 0
	done=time.time() + self.CM.Env["loop-minutes"]*60
	self.find_hae_resources(node)

	clone_max = len(self.CM.Env["nodes"])
	while time.time() <= done and not failed:
	lpc = lpc + 1

	self.change_state(node, self.r_dlm, "true")
	if not self.wait_on_state(node, self.r_dlm, clone_max-1):
	self.failure("%s did not go down correctly" % self.r_dlm)
	failed = lpc

	self.change_state(node, self.r_dlm, "false")
	if not self.wait_on_state(node, self.r_dlm, clone_max):
	self.failure("%s did not come up correctly" % self.r_dlm)
	failed = lpc

	if not self.wait_on_state(node, self.r_o2cb, clone_max):
	self.failure("%s did not come up correctly" % self.r_o2cb)
	failed = lpc

	for fs in self.r_ocfs2:
	if not self.wait_on_state(node, fs, clone_max):
	self.failure("%s did not come up correctly" % fs)
	failed = lpc

	if failed:
	return self.failure("iteration %d failed" % failed)
	return self.success()

	AllTestClasses.append(HAEStandbyTest)

	###################################################################
	class NearQuorumPointTest(CTSTest):
	###################################################################
	'''
	This test brings larger clusters near the quorum point (50%).
	In addition, it will test doing starts and stops at the same time.

	Here is how I think it should work:
	- loop over the nodes and decide randomly which will be up and which
	will be down Use a 50% probability for each of up/down.
	- figure out what to do to get into that state from the current state
	- in parallel, bring up those going up and bring those going down.
	'''

	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="NearQuorumPoint"

	def __call__(self, dummy):
	'''Perform the 'NearQuorumPoint' test. '''
	self.incr("calls")
	startset = []
	stopset = []

	#decide what to do with each node
	for node in self.CM.Env["nodes"]:
	action = self.CM.Env.RandomGen.choice(["start","stop"])
	#action = self.CM.Env.RandomGen.choice(["start","stop","no change"])
	if action == "start" :
	startset.append(node)
	elif action == "stop" :
	stopset.append(node)

	self.CM.debug("start nodes:" + repr(startset))
	self.CM.debug("stop nodes:" + repr(stopset))

	#add search patterns
	watchpats = [ ]
	for node in stopset:
	if self.CM.ShouldBeStatus[node] == "up":
	watchpats.append(self.CM["Pat:We_stopped"] % node)

	for node in startset:
	if self.CM.ShouldBeStatus[node] == "down":
	#watchpats.append(self.CM["Pat:Slave_started"] % node)
	watchpats.append(self.CM["Pat:Local_started"] % node)
	else:
	for stopping in stopset:
	if self.CM.ShouldBeStatus[stopping] == "up":
	watchpats.append(self.CM["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping)))

	if len(watchpats) == 0:
	return self.skipped()

	if len(startset) != 0:
	watchpats.append(self.CM["Pat:DC_IDLE"])

	watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)

	watch.setwatch()

	#begin actions
	for node in stopset:
	if self.CM.ShouldBeStatus[node] == "up":
	self.CM.StopaCMnoBlock(node)

	for node in startset:
	if self.CM.ShouldBeStatus[node] == "down":
	self.CM.StartaCMnoBlock(node)

	#get the result
	if watch.lookforall():
	self.CM.cluster_stable()
	return self.success()

	self.CM.log("Warn: Patterns not found: " + repr(watch.unmatched))

	#get the "bad" nodes
	upnodes = []
	for node in stopset:
	if self.CM.StataCM(node) == 1:
	upnodes.append(node)

	downnodes = []
	for node in startset:
	if self.CM.StataCM(node) == 0:
	downnodes.append(node)

	if upnodes == [] and downnodes == []:
	self.CM.cluster_stable()

	# Make sure they're completely down with no residule
	for node in stopset:
	self.CM.rsh(node, self.CM["StopCmd"])

	return self.success()

	if len(upnodes) > 0:
	self.CM.log("Warn: Unstoppable nodes: " + repr(upnodes))

	if len(downnodes) > 0:
	self.CM.log("Warn: Unstartable nodes: " + repr(downnodes))

	return self.failure()

	AllTestClasses.append(NearQuorumPointTest)

	###################################################################
	class RollingUpgradeTest(CTSTest):
	###################################################################
	'''Perform a rolling upgrade of the cluster'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="RollingUpgrade"
	self.start = StartTest(cm)
	self.stop = StopTest(cm)
	self.stopall = SimulStopLite(cm)
	self.startall = SimulStartLite(cm)

	def setup(self, node):
	# Start all remaining nodes
	ret = self.stopall(None)
	if not ret:
	return self.failure("Couldn't stop all nodes")

	for node in self.CM.Env["nodes"]:
	if not self.downgrade(node, None):
	return self.failure("Couldn't downgrade %s" % node)

	ret = self.startall(None)
	if not ret:
	return self.failure("Couldn't start all nodes")
	return self.success()

	def teardown(self, node):
	# Stop everything
	ret = self.stopall(None)
	if not ret:
	return self.failure("Couldn't stop all nodes")

	for node in self.CM.Env["nodes"]:
	if not self.upgrade(node, None):
	return self.failure("Couldn't upgrade %s" % node)

	return self.success()

	def install(self, node, version, start=1, flags="--force"):

	target_dir = "/tmp/rpm-%s" % version
	src_dir = "%s/%s" % (self.CM.Env["rpm-dir"], version)

	self.CM.log("Installing %s on %s with %s" % (version, node, flags))
	if not self.stop(node):
	return self.failure("stop failure: "+node)

	rc = self.CM.rsh(node, "mkdir -p %s" % target_dir)
	rc = self.CM.rsh(node, "rm -f %s/*.rpm" % target_dir)
	(rc, lines) = self.CM.rsh(node, "ls -1 %s/*.rpm" % src_dir, None)
	for line in lines:
	line = line[:-1]
	rc = self.CM.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir))
	rc = self.CM.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir))

	if start and not self.start(node):
	return self.failure("start failure: "+node)

	return self.success()

	def upgrade(self, node, start=1):
	return self.install(node, self.CM.Env["current-version"], start)

	def downgrade(self, node, start=1):
	return self.install(node, self.CM.Env["previous-version"], start, "--force --nodeps")

	def __call__(self, node):
	'''Perform the 'Rolling Upgrade' test. '''
	self.incr("calls")

	for node in self.CM.Env["nodes"]:
	if self.upgrade(node):
	return self.failure("Couldn't upgrade %s" % node)

	self.CM.cluster_stable()

	return self.success()

	def is_applicable(self):
	if not self.is_applicable_common():
	return None

	if not self.CM.Env.has_key("rpm-dir"):
	return None
	if not self.CM.Env.has_key("current-version"):
	return None
	if not self.CM.Env.has_key("previous-version"):
	return None

	return 1

	# Register RestartTest as a good test to run
	AllTestClasses.append(RollingUpgradeTest)

	###################################################################
	class BSC_AddResource(CTSTest):
	###################################################################
	'''Add a resource to the cluster'''
	def __init__(self, cm):
	CTSTest.__init__(self, cm)
	self.name="AddResource"
	self.resource_offset = 0
	self.cib_cmd="""cibadmin -C -o %s -X '%s' """

	def __call__(self, node):
	self.incr("calls")
	self.resource_offset = self.resource_offset + 1

	r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset)
	start_pat = "crmd.%s_start_0.confirmed.*ok"

	patterns = []
	patterns.append(start_pat % r_id)

	watch = self.create_watch(patterns, self.CM["DeadTime"])
	watch.setwatch()

	fields = string.split(self.CM.Env["IPBase"], '.')
	fields[3] = str(int(fields[3])+1)
	ip = string.join(fields, '.')
	self.CM.Env["IPBase"] = ip

	if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip):
	return self.failure("Make resource %s failed" % r_id)

	failed = 0
	watch_result = watch.lookforall()
	if watch.unmatched:
	for regex in watch.unmatched:
	self.CM.log ("Warn: Pattern not found: %s" % (regex))
	failed = 1

	if failed:
	return self.failure("Resource pattern(s) not found")

	if not self.CM.cluster_stable(self.CM["DeadTime"]):
	return self.failure("Unstable cluster")

	return self.success()

	def make_ip_resource(self, node, id, rclass, type, ip):
	self.CM.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node))
	rsc_xml="""
	<primitive id="%s" class="%s" type="%s" provider="heartbeat">
	<instance_attributes id="%s"><attributes>
	<nvpair id="%s" name="ip" value="%s"/>
	</attributes></instance_attributes>
	</primitive>""" % (id, rclass, type, id, id, ip)

	node_constraint="""
	<rsc_location id="run_%s" rsc="%s">
	<rule id="pref_run_%s" score="100">
	<expression id="%s_loc_expr" attribute="#uname" operation="eq" value="%s"/>
	</rule>
	</rsc_location>""" % (id, id, id, id, node)

	rc = 0
	(rc, lines) = self.CM.rsh(node, self.cib_cmd % ("constraints", node_constraint), None)
	if rc != 0:
	self.CM.log("Constraint creation failed: %d" % rc)
	return None

	(rc, lines) = self.CM.rsh(node, self.cib_cmd % ("resources", rsc_xml), None)
	if rc != 0:
	self.CM.log("Resource creation failed: %d" % rc)
	return None

	return 1

	def is_applicable(self):
	if self.CM.Env["DoBSC"]:
	return 1
	return None

	AllTestClasses.append(BSC_AddResource)

	class SimulStopLite(CTSTest):
	###################################################################
	'''Stop any active nodes ~ simultaneously'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="SimulStopLite"

	def __call__(self, dummy):
	'''Perform the 'SimulStopLite' setup work. '''
	self.incr("calls")

	self.CM.debug("Setup: " + self.name)

	# We ignore the "node" parameter...
	watchpats = [ ]

	for node in self.CM.Env["nodes"]:
	if self.CM.ShouldBeStatus[node] == "up":
	self.incr("WasStarted")
	watchpats.append(self.CM["Pat:We_stopped"] % node)
	#if self.CM.Env["use_logd"]:
	# watchpats.append(self.CM["Pat:Logd_stopped"] % node)

	if len(watchpats) == 0:
	self.CM.clear_all_caches()
	return self.success()

	# Stop all the nodes - at about the same time...
	watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)

	watch.setwatch()
	self.set_timer()
	for node in self.CM.Env["nodes"]:
	if self.CM.ShouldBeStatus[node] == "up":
	self.CM.StopaCMnoBlock(node)
	if watch.lookforall():
	self.CM.clear_all_caches()

	# Make sure they're completely down with no residule
	for node in self.CM.Env["nodes"]:
	self.CM.rsh(node, self.CM["StopCmd"])

	return self.success()

	did_fail=0
	up_nodes = []
	for node in self.CM.Env["nodes"]:
	if self.CM.StataCM(node) == 1:
	did_fail=1
	up_nodes.append(node)

	if did_fail:
	return self.failure("Active nodes exist: " + repr(up_nodes))

	self.CM.log("Warn: All nodes stopped but CTS didnt detect: "
	+ repr(watch.unmatched))

	self.CM.clear_all_caches()
	return self.failure("Missing log message: "+repr(watch.unmatched))

	def is_applicable(self):
	'''SimulStopLite is a setup test and never applicable'''
	return 0

	###################################################################
	class SimulStartLite(CTSTest):
	###################################################################
	'''Start any stopped nodes ~ simultaneously'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name="SimulStartLite"

	def __call__(self, dummy):
	'''Perform the 'SimulStartList' setup work. '''
	self.incr("calls")
	self.CM.debug("Setup: " + self.name)

	# We ignore the "node" parameter...
	watchpats = [ ]

	uppat = self.CM["Pat:Slave_started"]
	if self.CM.upcount() == 0:
	uppat = self.CM["Pat:Local_started"]

	for node in self.CM.Env["nodes"]:
	if self.CM.ShouldBeStatus[node] == "down":
	self.incr("WasStopped")
	watchpats.append(uppat % node)

	if len(watchpats) == 0:
	return self.success()

	watchpats.append(self.CM["Pat:DC_IDLE"])

	# Start all the nodes - at about the same time...
	watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)

	watch.setwatch()

	self.set_timer()
	for node in self.CM.Env["nodes"]:
	if self.CM.ShouldBeStatus[node] == "down":
	self.CM.StartaCMnoBlock(node)
	if watch.lookforall():
	for attempt in (1, 2, 3, 4, 5):
	if self.CM.cluster_stable():
	return self.success()
	return self.failure("Cluster did not stabilize")

	did_fail=0
	unstable = []
	for node in self.CM.Env["nodes"]:
	if self.CM.StataCM(node) == 0:
	did_fail=1
	unstable.append(node)

	if did_fail:
	return self.failure("Unstarted nodes exist: " + repr(unstable))

	unstable = []
	for node in self.CM.Env["nodes"]:
	if not self.CM.node_stable(node):
	did_fail=1
	unstable.append(node)

	if did_fail:
	return self.failure("Unstable cluster nodes exist: "
	+ repr(unstable))

	self.CM.log("ERROR: All nodes started but CTS didnt detect: "
	+ repr(watch.unmatched))
	return self.failure()


	def is_applicable(self):
	'''SimulStartLite is a setup test and never applicable'''
	return 0

	def TestList(cm, audits):
	result = []
	for testclass in AllTestClasses:
	bound_test = testclass(cm)
	if bound_test.is_applicable():
	bound_test.Audits = audits
	result.append(bound_test)
	return result

	# vim:ts=4:sw=4:et:
	diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Cman.xml b/doc/Clusters_from_Scratch/en-US/Ap-Cman.xml
	index e70080312e..af27515cab 100644
	--- a/doc/Clusters_from_Scratch/en-US/Ap-Cman.xml
	+++ b/doc/Clusters_from_Scratch/en-US/Ap-Cman.xml
	@@ -1,131 +1,125 @@
	<?xml version='1.0' encoding='utf-8' ?>
	<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
	<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
	%BOOK_ENTITIES;
	]>
	<appendix id="ap-cman">
	<title>Using CMAN for Cluster Membership and Quorum</title>

	<section>
	<title>Background</title>
	<para>
	<ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html-single/Cluster_Suite_Overview/index.html#s2-clumembership-overview-CSO">CMAN v3</ulink> is a Corsync plugin that monitors the names and number of active cluster nodes in order to deliver membership and quorum information to clients (such as the Pacemaker daemons).
	</para>
	<para>
	In a traditional Corosync-Pacemaker cluster, a Pacemaker plugin is loaded to provide membership and quorum information.
	The motivation for wanting to use CMAN for this instead, is to ensure all elements of the cluster stack are making decisions based on the same membership and quorum data.
	<footnote>
	<para>
	A failure to do this can lead to what is called <literal>internal split-brain</literal> - a situation where different parts of the stack disagree about whether some nodes are alive or dead - which quickly leads to unnecssary down-time and/or data corruption.
	</para>
	</footnote>
	</para>
	<para>
	CMAN has been around longer than Pacemaker and is part of the Red Hat cluster stack, so it is available and supported by many distributions and other pieces of software (such as OCFS2 and GFS2).
	For this reason it makes sense to support it.
	</para>
	</section>
	<section>
	<title>Adding CMAN Support</title>
	<warning>
	<para>
	Be sure to disable the Pacemaker plugin before continuing with this section.
	In most cases, this can be achieved by removing <filename>/etc/corosync/service.d/pcmk</filename> and stopping Corosync.
	</para>
	</warning>
	<section>
	<title>Adding CMAN Support - cluster.conf</title>
	<para>
	The preferred approach for enabling CMAN is to configure <filename>cluster.conf</filename> and use the <filename>/etc/init.d/cman</filename> script to start Corosync.
	Its far easier to maintain and start automatically starts the necessary pieces for using GFS2.
	</para>
	<para>
	You can find some documentation on
	<ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Cluster_Administration/s1-creating-cluster-cli-CA.html">Installing CMAN and Creating a Basic Cluster Configuration File</ulink>
	at the Red Hat website.
	However please ignore the parts about <literal>Fencing</literal>, <literal>Failover Domains</literal>, or <literal>HA Services</literal> and anything to do with <literal>rgmanager</literal> and <literal>fenced</literal>.
	All these continue to be handled by Pacemaker in the normal manner.
	</para>
	<example>
	<title>Sample cluster.conf for a two-node cluster</title>
	<programlisting>
	<![CDATA[
	<?xml version="1.0"?>
	<cluster config_version="1" name="beekhof">
	- <fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
	+ <logging debug="true"/>
	<clusternodes>
	- <clusternode name="pcmk-1" nodeid="1">
	- <fence/>
	- </clusternode>
	- <clusternode name="pcmk-2" nodeid="2">
	- <fence/>
	- </clusternode>
	+ <clusternode name="pcmk-1" nodeid="1"/>
	+ <clusternode name="pcmk-2" nodeid="2"/>
	</clusternodes>
	- <cman/>
	- <fencedevices/>
	- <rm/>
	+ <cman two_node="1" expected_votes="1"/>
	</cluster>
	]]>
	</programlisting>
	</example>
	</section>
	<section>
	<title>Adding CMAN Support - corosync.conf</title>
	<para>
	The alternative is to add the necessary cman configuration elements to <filename>corosync.conf</filename>.
	We recommend you place these directives in <filename>/etc/corosync/service.d/cman</filename> as they will differ between machines.
	</para>
	<para>
	If you choose this approach, you would continue to start and stop Corosync with it's init script as previously described in this document.
	</para>

	<example>
	<title>Sample corosync.conf extensions for a two-node cluster</title>
	<programlisting>
	[root@pcmk-1 ~]# <userinput>cat <<-END >>/etc/corosync/service.d/cman</userinput>
	cluster {
	name: beekhof

	clusternodes {
	clusternode {
	votes: 1
	nodeid: 1
	name: pcmk-1
	}
	clusternode {
	votes: 1
	nodeid: 2
	name: pcmk-2
	}
	}
	cman {
	expected_votes: 2
	cluster_id: 123
	nodename: `uname -n`
	two_node: 1
	max_queued: 10
	}
	}

	service {
	name: corosync_cman
	ver: 0
	}

	quorum {
	provider: quorum_cman
	}
	END
	</programlisting>
	</example>

	<warning>
	<para>
	Verify that <literal>nodename</literal> was set appropriately on each host.
	</para>
	</warning>

	</section>
	</section>
	</appendix>

	diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Active-Active.xml b/doc/Clusters_from_Scratch/en-US/Ch-Active-Active.xml
	index 329cfd7178..8f709287dd 100644
	--- a/doc/Clusters_from_Scratch/en-US/Ch-Active-Active.xml
	+++ b/doc/Clusters_from_Scratch/en-US/Ch-Active-Active.xml
	@@ -1,721 +1,721 @@
	<?xml version='1.0' encoding='utf-8' ?>
	<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
	<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
	%BOOK_ENTITIES;
	]>
	<chapter>
	<title>Conversion to Active/Active</title>
	<section>
	<title>Requirements</title>
	<para>
	The primary requirement for an Active/Active cluster is that the data required for your services are available, simultaneously, on both machines.
	Pacemaker makes no requirement on how this is achieved, you could use a SAN if you had one available, however since DRBD supports multiple Primaries, we can also use that.
	</para>
	<para>
	The only hitch is that we need to use a cluster-aware filesystem (and the one we used earlier with DRBD, ext4, is not one of those).
	Both OCFS2 and GFS2 are supported, however here we will use GFS2 which comes with &DISTRO; &DISTRO_VERSION; .
	</para>
	</section>
	<section>
	<title>Install a Cluster Filesystem - GFS2</title>
	<para>
	- The first thing to do is install gfs2-utils on each machine.
	+ The first thing to do is install gfs2-utils and gfs2-cluster on each machine.
	</para>
	<screen>
	-[root@pcmk-1 ~]# <userinput>yum install -y gfs2-utils gfs-pcmk</userinput>
	+[root@pcmk-1 ~]# <userinput>yum install -y gfs2-utils gfs2-cluster gfs-pcmk</userinput>
	Setting up Install Process
	Resolving Dependencies
	--> Running transaction check
	---> Package gfs-pcmk.x86_64 0:3.0.5-2.fc12 set to be updated
	--> Processing Dependency: libSaCkpt.so.3(OPENAIS_CKPT_B.01.01)(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
	--> Processing Dependency: dlm-pcmk for package: gfs-pcmk-3.0.5-2.fc12.x86_64
	--> Processing Dependency: libccs.so.3()(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
	--> Processing Dependency: libdlmcontrol.so.3()(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
	--> Processing Dependency: liblogthread.so.3()(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
	--> Processing Dependency: libSaCkpt.so.3()(64bit) for package: gfs-pcmk-3.0.5-2.fc12.x86_64
	---> Package gfs2-utils.x86_64 0:3.0.5-2.fc12 set to be updated
	--> Running transaction check
	---> Package clusterlib.x86_64 0:3.0.5-2.fc12 set to be updated
	---> Package dlm-pcmk.x86_64 0:3.0.5-2.fc12 set to be updated
	---> Package openaislib.x86_64 0:1.1.0-1.fc12 set to be updated
	--> Finished Dependency Resolution

	Dependencies Resolved

	===========================================================================================
	Â Package Â Â Â Â Â Â Â Â Arch Â Â Â Â Â Â Â Version Â Â Â Â Â Â Â Â Â Repository Â Â Â Â Size
	===========================================================================================
	Installing:
	Â gfs-pcmk Â Â Â Â Â Â Â x86_64 Â Â Â Â Â Â 3.0.5-2.fc12 Â Â Â Â Â Â Â custom Â Â Â Â Â 101 k
	Â gfs2-utils Â Â Â Â Â Â x86_64 Â Â Â Â Â Â 3.0.5-2.fc12 Â Â Â Â Â Â Â custom Â Â Â Â Â 208 k
	Installing for dependencies:
	Â clusterlib Â Â Â Â Â Â x86_64 Â Â Â Â Â Â 3.0.5-2.fc12 Â Â Â Â Â Â Â custom Â Â Â Â Â Â 65 k
	Â dlm-pcmk Â Â Â Â Â Â Â x86_64 Â Â Â Â Â Â 3.0.5-2.fc12 Â Â Â Â Â Â Â custom Â Â Â Â Â Â 93 k
	Â openaislib Â Â Â Â Â Â x86_64 Â Â Â Â Â Â 1.1.0-1.fc12 Â Â Â Â Â Â Â fedora Â Â Â Â Â Â 76 k

	Transaction Summary
	===========================================================================================
	Install Â Â Â 5 Package(s)
	Upgrade Â Â Â 0 Package(s)

	Total download size: 541 k
	Downloading Packages:
	(1/5): clusterlib-3.0.5-2.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| Â 65 kB Â Â 00:00
	(2/5): dlm-pcmk-3.0.5-2.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| Â 93 kB Â Â 00:00
	(3/5): gfs-pcmk-3.0.5-2.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| 101 kB Â Â 00:00
	(4/5): gfs2-utils-3.0.5-2.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| 208 kB Â Â 00:00
	(5/5): openaislib-1.1.0-1.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| Â 76 kB Â Â 00:00
	-------------------------------------------------------------------------------------------
	Total Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 992 kB/s \| 541 kB Â Â 00:00
	Running rpm_check_debug
	Running Transaction Test
	Finished Transaction Test
	Transaction Test Succeeded
	Running Transaction
	Â Installing Â Â : clusterlib-3.0.5-2.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 1/5
	Â Installing Â Â : openaislib-1.1.0-1.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 2/5
	Â Installing Â Â : dlm-pcmk-3.0.5-2.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 3/5
	Â Installing Â Â : gfs-pcmk-3.0.5-2.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 4/5
	Â Installing Â Â : gfs2-utils-3.0.5-2.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 5/5

	Installed:
	Â gfs-pcmk.x86_64 0:3.0.5-2.fc12 Â Â Â Â Â Â Â Â Â Â gfs2-utils.x86_64 0:3.0.5-2.fc12

	Dependency Installed:
	Â clusterlib.x86_64 0:3.0.5-2.fc12 Â dlm-pcmk.x86_64 0:3.0.5-2.fc12
	Â openaislib.x86_64 0:1.1.0-1.fc12 Â

	Complete!
	[root@pcmk-1 x86_64]#
	</screen>
	<warning>
	<para>
	If this step fails, it is likely that your version/distribution does not ship the "Pacemaker" versions of dlm_controld and/or gfs_controld.
	Normally these files would be called <filename>dlm_controld.pcmk</filename> and <filename>gfs_controld.pcmk</filename> and live in the <filename>/usr/sbin</filename> directory.
	</para>
	<para>
	If you cannot locate an installation source for these files, you will need to install a package called <literal>cman</literal> and reconfigure Corosync to use it as outlined in <xref linkend="ap-cman"/>.
	</para>
	<para>
	- When using CMAN, you can skip <xref linkend="gfs-integration"/> where <literal>dlm-clone</literal> and <literal>gfs-clone</literal> are created, and proceed directly to <xref linkend="gfs-create-filesystem"/>.
	+ When using CMAN, you can skip <xref linkend="gfs-integration"/> where <literal>dlm-clone</literal> and <literal>gfs-clone</literal> are created, and proceed directly to <xref linkend="gfs-create-filesystem"/> after ensuring that <literal>gfs2-utils</literal> and <literal>gfs2-cluster</literal> were installed.
	</para>
	</warning>
	</section>

	<section id="gfs-integration">
	<title>Setup Pacemaker-GFS2 Integration</title>
	<para>
	GFS2 needs two services to be running, the first is the user-space interface to the kernelâs distributed lock manager (DLM). The DLM is used to co-ordinate which node(s) can access a given file (and when) and integrates with Pacemaker to obtain node membership <footnote>
	<para>
	The list of nodes the cluster considers to be available
	</para>
	</footnote> information and fencing capabilities.
	</para>
	<para>
	The second service is GFS2âs own control daemon which also integrates with Pacemaker to obtain node membership data.
	</para>
	<section>
	<title>Add the DLM service</title>
	<para>
	The DLM control daemon needs to run on all active cluster nodes, so we will use the shells interactive mode to create a cloned resource.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm</userinput>
	crm(live)# <userinput>cib new stack-glue</userinput>
	INFO: stack-glue shadow CIB created
	crm(stack-glue)# <userinput>configure primitive dlm ocf:pacemaker:controld op monitor interval=120s</userinput>
	crm(stack-glue)# <userinput>configure clone dlm-clone dlm meta interleave=true</userinput>
	crm(stack-glue)# <userinput>configure show xml</userinput>
	crm(stack-glue)# <userinput>configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebData ocf:linbit:drbd \
	Â Â Â Â params drbd_resource="wwwdata" \
	Â Â Â Â op monitor interval="60s"
	primitive WebFS ocf:heartbeat:Filesystem \
	Â Â Â Â params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	<emphasis>primitive dlm ocf:pacemaker:controld \</emphasis>
	<emphasis> op monitor interval="120s"</emphasis>
	ms WebDataClone WebData \
	Â Â Â Â meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
	<emphasis>clone dlm-clone dlm \</emphasis>
	<emphasis> meta interleave="true"</emphasis>
	location prefer-pcmk-1 WebSite 50: pcmk-1
	colocation WebSite-with-WebFS inf: WebSite WebFS
	colocation fs_on_drbd inf: WebFS WebDataClone:Master
	colocation website-with-ip inf: WebSite ClusterIP
	order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
	order WebSite-after-WebFS inf: WebFS WebSite
	order apache-after-ip inf: ClusterIP WebSite
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes=â2â \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness=â100â
	</screen>
	<note>
	<para>
	TODO: Explain the meaning of the interleave option
	</para>
	</note>
	<para>
	Review the configuration before uploading it to the cluster, quitting the shell and watching the clusterâs response
	</para>
	<screen>
	crm(stack-glue)# <userinput>cib commit stack-glue</userinput>
	INFO: commited 'stack-glue' shadow CIB to the cluster
	crm(stack-glue)# <userinput>quit</userinput>
	bye
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Thu Sep Â 3 20:49:54 2009
	Stack: openais
	Current DC: pcmk-2 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	5 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	WebSite (ocf::heartbeat:apache): Â Â Â Â Started pcmk-2
	Master/Slave Set: WebDataClone
	Â Â Â Â Masters: [ pcmk-1 ]
	Â Â Â Â Slaves: [ pcmk-2 ]
	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr): Â Â Â Â Started pcmk-2
	<emphasis>Clone Set: dlm-clone</emphasis>
	<emphasis> Started: [ pcmk-2 pcmk-1 ]</emphasis>
	WebFS Â (ocf::heartbeat:Filesystem): Â Â Started pcmk-2
	</screen>
	</section>

	<section>
	<title>Add the GFS2 service</title>
	<para>
	Once the DLM is active, we can add the GFS2 control daemon.
	</para>
	<para>
	Use the crm shell to create the gfs-control cluster resource:
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm</userinput>
	crm(live)# <userinput>cib new gfs-glue --force</userinput>
	INFO: gfs-glue shadow CIB created
	crm(gfs-glue)# <userinput>configure primitive gfs-control ocf:pacemaker:controld params daemon=gfs_controld.pcmk args="-g 0" op monitor interval=120s</userinput>
	crm(gfs-glue)# <userinput>configure clone gfs-clone gfs-control meta interleave=true</userinput>
	</screen>
	<para>
	Now ensure Pacemaker only starts the gfs-control service on nodes that also have a copy of the dlm service (created above) already running
	</para>

	<screen>
	crm(gfs-glue)# <userinput>configure colocation gfs-with-dlm INFINITY: gfs-clone dlm-clone</userinput>
	crm(gfs-glue)# <userinput>configure order start-gfs-after-dlm mandatory: dlm-clone gfs-clone</userinput>
	</screen>
	<para>
	Review the configuration before uploading it to the cluster, quitting the shell and watching the clusterâs response
	</para>

	<screen>
	crm(gfs-glue)# <userinput>configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebData ocf:linbit:drbd \
	Â Â Â Â params drbd_resource="wwwdata" \
	Â Â Â Â op monitor interval="60s"
	primitive WebFS ocf:heartbeat:Filesystem \
	Â Â Â Â params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	primitive dlm ocf:pacemaker:controld \
	Â Â Â Â op monitor interval="120s"
	<emphasis>primitive gfs-control ocf:pacemaker:controld \</emphasis>
	<emphasis> params daemon=âgfs_controld.pcmkâ args=â-g 0â \</emphasis>
	<emphasis> op monitor interval="120s"</emphasis>
	ms WebDataClone WebData \
	Â Â Â Â meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
	clone dlm-clone dlm \
	Â Â Â Â meta interleave="true"
	<emphasis>clone gfs-clone gfs-control \</emphasis>
	<emphasis> meta interleave="true"</emphasis>
	location prefer-pcmk-1 WebSite 50: pcmk-1
	colocation WebSite-with-WebFS inf: WebSite WebFS
	colocation fs_on_drbd inf: WebFS WebDataClone:Master
	<emphasis>colocation gfs-with-dlm inf: gfs-clone dlm-clone</emphasis>
	colocation website-with-ip inf: WebSite ClusterIP
	order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
	order WebSite-after-WebFS inf: WebFS WebSite
	order apache-after-ip inf: ClusterIP WebSite
	<emphasis>order start-gfs-after-dlm inf: dlm-clone gfs-clone</emphasis>
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes=â2â \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness=â100â
	crm(gfs-glue)# <userinput>cib commit gfs-glue</userinput>
	INFO: commited 'gfs-glue' shadow CIB to the cluster
	crm(gfs-glue)# <userinput>quit</userinput>
	bye
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Thu Sep Â 3 20:49:54 2009
	Stack: openais
	Current DC: pcmk-2 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	6 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	WebSite (ocf::heartbeat:apache): Â Â Â Â Started pcmk-2
	Master/Slave Set: WebDataClone
	Â Â Â Â Masters: [ pcmk-1 ]
	Â Â Â Â Slaves: [ pcmk-2 ]
	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr): Â Â Â Â Started pcmk-2
	Clone Set: dlm-clone
	Â Â Â Â Started: [ pcmk-2 pcmk-1 ]
	<emphasis>Clone Set: gfs-clone</emphasis>
	<emphasis> Started: [ pcmk-2 pcmk-1 ]</emphasis>
	WebFS Â (ocf::heartbeat:Filesystem): Â Â Started pcmk-1
	</screen>
	</section>

	</section>

	<section id="gfs-create-filesystem">
	<title>Create a GFS2 Filesystem</title>
	<section>
	<title>Preparation</title>
	<para>
	Before we do anything to the existing partition, we need to make sure it is unmounted. We do this by tell the cluster to stop the WebFS resource. This will ensure that other resources (in our case, Apache) using WebFS are not only stopped, but stopped in the correct order.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm_resource --resource WebFS --set-parameter target-role --meta --parameter-value Stopped</userinput>
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Thu Sep Â 3 15:18:06 2009
	Stack: openais
	Current DC: pcmk-1 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	6 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	Master/Slave Set: WebDataClone
	Â Â Â Â Masters: [ pcmk-1 ]
	Â Â Â Â Slaves: [ pcmk-2 ]
	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr):Â Â Â Â Started pcmk-1
	Clone Set: dlm-clone
	Â Â Â Â Started: [ pcmk-2 pcmk-1 ]
	Clone Set: gfs-clone
	Â Â Â Â Started: [ pcmk-2 pcmk-1 ]
	</screen>
	<note>
	<para>
	Note that both Apache and WebFS have been stopped.
	</para>
	</note>
	</section>

	<section>
	<title>Create and Populate an GFS2 Partition</title>
	<para>
	Now that the cluster stack and integration pieces are running smoothly, we can create an GFS2 partition.
	</para>
	<warning>
	<para>
	This will erase all previous content stored on the DRBD device. Ensure you have a copy of any important data.
	</para>
	</warning>
	<para>
	We need to specify a number of additional parameters when creating a GFS2 partition.
	</para>
	<para>
	First we must use the -p option to specify that we want to use the the Kernelâs DLM. Next we use -j to indicate that it should reserve enough space for two journals (one per node accessing the filesystem).
	</para>
	<para>
	Lastly, we use -t to specify the lock table name. The format for this field is clustername:fsname. For the fsname, we just need to pick something unique and descriptive and since we havenât specified a clustername yet, we will use the default (pcmk).
	</para>
	<para>
	To specify an alternate name for the cluster, locate the service section containing âname: pacemakerâ in corosync.conf and insert the following line anywhere inside the block:
	</para>
	<para>
	clustername: myname
	</para>
	<para>
	Do this on each node in the cluster and be sure to restart them before continuing.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>mkfs.gfs2 -p lock_dlm -j 2 -t pcmk:web /dev/drbd1</userinput>
	This will destroy any data on /dev/drbd1.
	It appears to contain: data

	Are you sure you want to proceed? [y/n] y

	Device: Â Â Â Â Â Â Â Â Â Â /dev/drbd1
	Blocksize: Â Â Â Â Â Â Â Â 4096
	Device Size Â Â Â Â Â Â Â Â 1.00 GB (131072 blocks)
	Filesystem Size: Â Â Â Â Â 1.00 GB (131070 blocks)
	Journals: Â Â Â Â Â Â Â Â Â 2
	Resource Groups: Â Â Â Â Â 2
	Locking Protocol: Â Â Â Â Â "lock_dlm"
	Lock Table: Â Â Â Â Â Â Â Â "pcmk:web"
	UUID: Â Â Â Â Â Â Â Â Â Â Â 6B776F46-177B-BAF8-2C2B-292C0E078613

	[root@pcmk-1 ~]#
	</screen>
	<para>
	Then (re)populate the new filesystem with data (web pages). For now weâll create another variation on our home page.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>mount /dev/drbd1 /mnt/</userinput>
	[root@pcmk-1 ~]# <userinput>cat <<-END >/mnt/index.html</userinput>
	<html>
	<body>My Test Site - GFS2</body>
	</html>
	END
	[root@pcmk-1 ~]# <userinput>umount /dev/drbd1</userinput>
	[root@pcmk-1 ~]# <userinput>drbdadmÂ verifyÂ wwwdata</userinput>
	[root@pcmk-1 ~]#
	</screen>
	</section>

	</section>

	<section>
	<title>Reconfigure the Cluster for GFS2</title>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm</userinput>
	crm(live)# <userinput>cib new GFS2</userinput>
	INFO: GFS2 shadow CIB created
	crm(GFS2)# <userinput>configure delete WebFS</userinput>
	crm(GFS2)# <userinput>configure primitive WebFS ocf:heartbeat:Filesystem params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=âgfs2â</userinput>
	</screen>
	<para>
	Now that weâve recreated the resource, we also need to recreate all the constraints that used it. This is because the shell will automatically remove any constraints that referenced WebFS.
	</para>

	<screen>
	crm(GFS2)# <userinput>configure colocation WebSite-with-WebFS inf: WebSite WebFS</userinput>
	crm(GFS2)# <userinput>configure colocation fs_on_drbd inf: WebFS WebDataClone:Master</userinput>
	crm(GFS2)# <userinput>configure order WebFS-after-WebData inf: WebDataClone:promote WebFS:start</userinput>
	crm(GFS2)# <userinput>configure order WebSite-after-WebFS inf: WebFS WebSite</userinput>
	crm(GFS2)# <userinput>configure colocation WebFS-with-gfs-control INFINITY: WebFS gfs-clone</userinput>
	crm(GFS2)# <userinput>configure order start-WebFS-after-gfs-control mandatory: gfs-clone WebFS</userinput>
	crm(GFS2)# <userinput>configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebData ocf:linbit:drbd \
	Â Â Â Â params drbd_resource="wwwdata" \
	Â Â Â Â op monitor interval="60s"
	<emphasis>primitive WebFS ocf:heartbeat:Filesystem \</emphasis>
	<emphasis> params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=âgfs2â</emphasis>
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	primitive dlm ocf:pacemaker:controld \
	Â Â Â Â op monitor interval="120s"
	primitive gfs-control ocf:pacemaker:controld \
	Â Â params daemon=âgfs_controld.pcmkâ args=â-g 0â \
	Â Â Â Â op monitor interval="120s"
	ms WebDataClone WebData \
	Â Â Â Â meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
	clone dlm-clone dlm \
	Â Â Â Â meta interleave="true"
	clone gfs-clone gfs-control \
	Â Â Â Â meta interleave="true"
	colocation WebFS-with-gfs-control inf: WebFS gfs-clone
	colocation WebSite-with-WebFS inf: WebSite WebFS
	colocation fs_on_drbd inf: WebFS WebDataClone:Master
	colocation gfs-with-dlm inf: gfs-clone dlm-clone
	colocation website-with-ip inf: WebSite ClusterIP
	order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
	order WebSite-after-WebFS inf: WebFS WebSite
	order apache-after-ip inf: ClusterIP WebSite
	order start-WebFS-after-gfs-control inf: gfs-clone WebFS
	order start-gfs-after-dlm inf: dlm-clone gfs-clone
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes=â2â \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness=â100â
	</screen>
	<para>
	Review the configuration before uploading it to the cluster, quitting the shell and watching the clusterâs response
	</para>

	<screen>
	crm(GFS2)# <userinput>cib commit GFS2</userinput>
	INFO: commited 'GFS2' shadow CIB to the cluster
	crm(GFS2)# <userinput>quit</userinput>
	bye
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Thu Sep Â 3 20:49:54 2009
	Stack: openais
	Current DC: pcmk-2 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	6 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	WebSite (ocf::heartbeat:apache): Â Â Â Â Started pcmk-2
	Master/Slave Set: WebDataClone
	Â Â Â Â Masters: [ pcmk-1 ]
	Â Â Â Â Slaves: [ pcmk-2 ]
	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr): Â Â Â Â Started pcmk-2
	Clone Set: dlm-clone
	Â Â Â Â Started: [ pcmk-2 pcmk-1 ]
	Clone Set: gfs-clone
	Â Â Â Â Started: [ pcmk-2 pcmk-1 ]
	<emphasis>WebFS (ocf::heartbeat:Filesystem): Started pcmk-1</emphasis>
	</screen>
	</section>
	<section>
	<title>Reconfigure Pacemaker for Active/Active</title>
	<para>
	Almost everything is in place.
	Recent versions of DRBD are capable of operating in Primary/Primary mode and the filesystem weâre using is cluster aware.
	All we need to do now is reconfigure the cluster to take advantage of this.
	</para>
	<para>
	This will involve a number of changes, so weâll again use interactive mode.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm</userinput>
	[root@pcmk-1 ~]# <userinput>cib new active</userinput>
	</screen>
	<para>
	Thereâs no point making the services active on both locations if we canât reach them, so lets first clone the IP address.
	Cloned IPaddr2 resources use an iptables rule to ensure that each request only processed by one of the two clone instances.
	The additional meta options tell the cluster how many instances of the clone we want (one ârequest bucketâ for each node) and that if all other nodes fail, then the remaining node should hold all of them.
	Otherwise the requests would be simply discarded.
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>configure clone WebIP ClusterIP Â \</userinput>
	<userinput>Â Â Â Â meta globally-unique=âtrueâ clone-max=â2â clone-node-max=â2â</userinput>
	</screen>
	<para>
	Now we must tell the ClusterIP how to decide which requests are processed by which hosts.
	To do this we must specify the clusterip_hash parameter.
	</para>
	<para>
	Open the ClusterIP resource
	</para>
	<screen>[root@pcmk-1 ~]# <userinput>configure edit Â ClusterIP</userinput></screen>
	<para>
	And add the following to the params line
	</para>
	<screen>clusterip_hash="sourceip"</screen>
	<para>
	So that the complete definition looks like:
	</para>
	<screen>
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" clusterip_hash="sourceip" \
	Â Â Â Â op monitor interval="30s"
	</screen>
	<para>
	Here is the full transcript
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>crm </userinput>
	crm(live)# <userinput>cib new active</userinput>
	INFO: active shadow CIB created
	crm(active)# <userinput>configure clone WebIP ClusterIP Â \</userinput>
	Â Â Â Â meta globally-unique=âtrueâ clone-max=â2â clone-node-max=â2â
	crm(active)# <userinput>configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebData ocf:linbit:drbd \
	Â Â Â Â params drbd_resource="wwwdata" \
	Â Â Â Â op monitor interval="60s"
	primitive WebFS ocf:heartbeat:Filesystem \
	Â Â Â Â params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=âgfs2â
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip=â192.168.122.101â cidr_netmask=â32â clusterip_hash=âsourceipâ \
	Â Â Â Â op monitor interval="30s"
	primitive dlm ocf:pacemaker:controld \
	Â Â Â Â op monitor interval="120s"
	primitive gfs-control ocf:pacemaker:controld \
	Â Â params daemon=âgfs_controld.pcmkâ args=â-g 0â \
	Â Â Â Â op monitor interval="120s"
	ms WebDataClone WebData \
	Â Â Â Â meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
	<emphasis>clone WebIP ClusterIP \</emphasis>
	<emphasis> meta globally-unique=âtrueâ clone-max=â2â clone-node-max=â2â</emphasis>
	clone dlm-clone dlm \
	Â Â Â Â meta interleave="true"
	clone gfs-clone gfs-control \
	Â Â Â Â meta interleave="true"
	colocation WebFS-with-gfs-control inf: WebFS gfs-clone
	colocation WebSite-with-WebFS inf: WebSite WebFS
	colocation fs_on_drbd inf: WebFS WebDataClone:Master
	colocation gfs-with-dlm inf: gfs-clone dlm-clone
	<emphasis>colocation website-with-ip inf: WebSite WebIP</emphasis>
	order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
	order WebSite-after-WebFS inf: WebFS WebSite
	<emphasis>order apache-after-ip inf: WebIP WebSite</emphasis>
	order start-WebFS-after-gfs-control inf: gfs-clone WebFS
	order start-gfs-after-dlm inf: dlm-clone gfs-clone
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes=â2â \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness=â100â
	</screen>
	<para>
	Notice how any constraints that referenced ClusterIP have been updated to use WebIP instead.
	This is an additional benefit of using the crm shell.
	</para>
	<para>
	Next we need to convert the filesystem and Apache resources into clones.
	Again, the shell will automatically update any relevant constraints.
	</para>
	<screen>
	crm(active)# <userinput>configure clone WebFSClone WebFS</userinput>
	crm(active)# <userinput>configure clone WebSiteClone WebSite</userinput>
	</screen>
	<para>
	The last step is to tell the cluster that it is now allowed to promote both instances to be Primary (aka. Master).
	</para>
	<screen>
	crm(active)# <userinput>configure edit WebDataClone</userinput>
	</screen>
	<para>
	Change master-max to 2
	</para>
	<screen>
	crm(active)# <userinput>configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebData ocf:linbit:drbd \
	Â Â Â Â params drbd_resource="wwwdata" \
	Â Â Â Â op monitor interval="60s"
	primitive WebFS ocf:heartbeat:Filesystem \
	Â Â Â Â params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=âgfs2â
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip=â192.168.122.101â cidr_netmask=â32â clusterip_hash=âsourceipâ \
	Â Â Â Â op monitor interval="30s"
	primitive dlm ocf:pacemaker:controld \
	Â Â Â Â op monitor interval="120s"
	primitive gfs-control ocf:pacemaker:controld \
	Â Â params daemon=âgfs_controld.pcmkâ args=â-g 0â \
	Â Â Â Â op monitor interval="120s"
	ms WebDataClone WebData \
	Â Â Â Â meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
	<emphasis>clone WebFSClone WebFS</emphasis>
	clone WebIP ClusterIP Â \
	Â Â Â Â meta globally-unique=âtrueâ clone-max=â2â clone-node-max=â2â
	<emphasis>clone WebSiteClone WebSite</emphasis>
	clone dlm-clone dlm \
	Â Â Â Â meta interleave="true"
	clone gfs-clone gfs-control \
	Â Â Â Â meta interleave="true"
	<emphasis>colocation WebFS-with-gfs-control inf: WebFSClone gfs-clone</emphasis>
	<emphasis>colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone</emphasis>
	<emphasis>colocation fs_on_drbd inf: WebFSClone WebDataClone:Master</emphasis>
	colocation gfs-with-dlm inf: gfs-clone dlm-clone
	<emphasis>colocation website-with-ip inf: WebSiteClone WebIP</emphasis>
	<emphasis>order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start</emphasis>
	<emphasis>order WebSite-after-WebFS inf: WebFSClone WebSiteClone</emphasis>
	<emphasis>order apache-after-ip inf: WebIP WebSiteClone</emphasis>
	order start-WebFS-after-gfs-control inf: gfs-clone WebFSClone
	order start-gfs-after-dlm inf: dlm-clone gfs-clone
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes=â2â \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness=â100â
	</screen>
	<para>
	Review the configuration before uploading it to the cluster, quitting the shell and watching the clusterâs response
	</para>
	<screen>
	crm(active)# <userinput>cib commit active</userinput>
	INFO: commited 'active' shadow CIB to the cluster
	crm(active)# <userinput>quit</userinput>
	bye
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Thu Sep Â 3 21:37:27 2009
	Stack: openais
	Current DC: pcmk-2 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	6 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	Master/Slave Set: WebDataClone
	Â Â Â Â Masters: [ pcmk-1 pcmk-2 ]
	Clone Set: dlm-clone
	Â Â Â Â Started: [ pcmk-2 pcmk-1 ]
	Clone Set: gfs-clone
	Â Â Â Â Started: [ pcmk-2 pcmk-1 ]
	<emphasis>Clone Set: WebIP</emphasis>
	<emphasis> Started: [ pcmk-1 pcmk-2 ]</emphasis>
	<emphasis>Clone Set: WebFSClone</emphasis>
	<emphasis> Started: [ pcmk-1 pcmk-2 ]</emphasis>
	<emphasis>Clone Set: WebSiteClone</emphasis>
	<emphasis> Started: [ pcmk-1 pcmk-2 ]</emphasis>
	</screen>
	<section>
	<title>Testing Recovery</title>
	<note>
	<para>
	TODO: Put one node into standby to demonstrate failover
	</para>
	</note>
	</section>
	</section>

	</chapter>

	diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml b/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
	index 667c2c391d..8377f05620 100644
	--- a/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
	+++ b/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
	@@ -1,472 +1,490 @@
	<?xml version='1.0' encoding='utf-8' ?>
	<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
	<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
	%BOOK_ENTITIES;
	]>
	<chapter>
	<title>Apache - Adding More Services</title>
	<note>
	<para>
	Now that we have a basic but functional active/passive two-node cluster, weâre ready to add some real services. Weâre going to start with Apache because its a feature of many clusters and relatively simple to configure.
	</para>
	</note>
	<section>
	<title>Installation</title>
	<para>
	Before continuing, we need to make sure Apache is installed on <emphasis>both</emphasis> hosts.
	</para>

	<screen>
	[root@ppcmk-1 ~]# <userinput>yum install -y httpd</userinput>
	Setting up Install Process
	Resolving Dependencies
	--> Running transaction check
	---> Package httpd.x86_64 0:2.2.13-2.fc12 set to be updated
	--> Processing Dependency: httpd-tools = 2.2.13-2.fc12 for package: httpd-2.2.13-2.fc12.x86_64
	--> Processing Dependency: apr-util-ldap for package: httpd-2.2.13-2.fc12.x86_64
	--> Processing Dependency: /etc/mime.types for package: httpd-2.2.13-2.fc12.x86_64
	--> Processing Dependency: libaprutil-1.so.0()(64bit) for package: httpd-2.2.13-2.fc12.x86_64
	--> Processing Dependency: libapr-1.so.0()(64bit) for package: httpd-2.2.13-2.fc12.x86_64
	--> Running transaction check
	---> Package apr.x86_64 0:1.3.9-2.fc12 set to be updated
	---> Package apr-util.x86_64 0:1.3.9-2.fc12 set to be updated
	---> Package apr-util-ldap.x86_64 0:1.3.9-2.fc12 set to be updated
	---> Package httpd-tools.x86_64 0:2.2.13-2.fc12 set to be updated
	---> Package mailcap.noarch 0:2.1.30-1.fc12 set to be updated
	--> Finished Dependency Resolution

	Dependencies Resolved

	=======================================================================================
	Â Package Â Â Â Â Â Â Â Arch Â Â Â Â Â Â Version Â Â Â Â Â Â Â Â Repository Â Â Â Â Size
	=======================================================================================
	Installing:
	Â httpd Â Â Â Â Â Â Â x86_64 Â Â Â Â Â 2.2.13-2.fc12 Â Â Â Â Â Â rawhide Â Â Â Â Â 735 k
	Installing for dependencies:
	Â apr Â Â Â Â Â Â Â Â x86_64 Â Â Â Â Â 1.3.9-2.fc12 Â Â Â Â Â Â rawhide Â Â Â Â Â 117 k
	Â apr-util Â Â Â Â Â Â x86_64 Â Â Â Â Â 1.3.9-2.fc12 Â Â Â Â Â Â rawhide Â Â Â Â Â Â 84 k
	Â apr-util-ldap Â Â Â x86_64 Â Â Â Â Â 1.3.9-2.fc12 Â Â Â Â Â Â rawhide Â Â Â Â Â Â 15 k
	Â httpd-tools Â Â Â Â x86_64 Â Â Â Â Â 2.2.13-2.fc12 Â Â Â Â Â Â rawhide Â Â Â Â Â Â 63 k
	Â mailcap Â Â Â Â Â Â noarch Â Â Â Â Â 2.1.30-1.fc12 Â Â Â Â Â Â rawhide Â Â Â Â Â Â 25 k

	Transaction Summary
	=======================================================================================
	Install Â Â Â 6 Package(s)
	Upgrade Â Â Â 0 Package(s)

	Total download size: 1.0 M
	Downloading Packages:
	(1/6): apr-1.3.9-2.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| 117 kB Â Â 00:00 Â Â
	(2/6): apr-util-1.3.9-2.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| Â 84 kB Â Â 00:00 Â Â
	(3/6): apr-util-ldap-1.3.9-2.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â \| Â 15 kB Â Â 00:00 Â Â
	(4/6): httpd-2.2.13-2.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| 735 kB Â Â 00:00 Â Â
	(5/6): httpd-tools-2.2.13-2.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â \| Â 63 kB Â Â 00:00 Â Â
	(6/6): mailcap-2.1.30-1.fc12.noarch.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| Â 25 kB Â Â 00:00 Â Â
	----------------------------------------------------------------------------------------
	Total Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 875 kB/s \| 1.0 MB Â Â 00:01 Â Â
	Running rpm_check_debug
	Running Transaction Test
	Finished Transaction Test
	Transaction Test Succeeded
	Running Transaction
	Â Installing Â Â : apr-1.3.9-2.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 1/6
	Â Installing Â Â : apr-util-1.3.9-2.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 2/6
	Â Installing Â Â : apr-util-ldap-1.3.9-2.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 3/6
	Â Installing Â Â : httpd-tools-2.2.13-2.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 4/6
	Â Installing Â Â : mailcap-2.1.30-1.fc12.noarch Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 5/6
	Â Installing Â Â : httpd-2.2.13-2.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 6/6

	Installed:
	Â httpd.x86_64 0:2.2.13-2.fc12 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â

	Dependency Installed:
	Â apr.x86_64 0:1.3.9-2.fc12 Â Â Â Â Â Â apr-util.x86_64 0:1.3.9-2.fc12
	Â apr-util-ldap.x86_64 0:1.3.9-2.fc12Â httpd-tools.x86_64 0:2.2.13-2.fc12
	Â mailcap.noarch 0:2.1.30-1.fc12 Â

	Complete!
	[root@pcmk-1 ~]#
	</screen>
	<para>
	Also, we need the wget tool in order for the cluster to be able to check the status of the Apache server.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>yum install -y wget</userinput>
	Setting up Install Process
	Resolving Dependencies
	--> Running transaction check
	---> Package wget.x86_64 0:1.11.4-5.fc12 set to be updated
	--> Finished Dependency Resolution

	Dependencies Resolved

	===========================================================================================
	Â Package Â Â Â Â Arch Â Â Â Â Â Â Version Â Â Â Â Â Â Â Â Â Â Â Repository Â Â Â Â Â Â Â Size
	===========================================================================================
	Installing:
	Â wget Â Â Â Â x86_64Â Â Â Â Â 1.11.4-5.fc12 Â Â Â Â Â Â Â Â Â Â rawhideÂ Â Â Â Â Â Â Â 393 k

	Transaction Summary
	===========================================================================================
	Install Â Â Â 1 Package(s)
	Upgrade Â Â Â 0 Package(s)

	Total download size: 393 k
	Downloading Packages:
	wget-1.11.4-5.fc12.x86_64.rpm Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â \| 393 kB Â Â 00:00 Â Â
	Running rpm_check_debug
	Running Transaction Test
	Finished Transaction Test
	Transaction Test Succeeded
	Running Transaction
	Â Installing Â Â : wget-1.11.4-5.fc12.x86_64 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â 1/1

	Installed:
	Â wget.x86_64 0:1.11.4-5.fc12

	Complete!
	[root@pcmk-1 ~]#
	</screen>
	</section>

	<section>
	<title>Preparation</title>
	<para>
	First we need to create a page for Apache to serve up. On Fedora the default Apache docroot is /var/www/html, so weâll create an index file there.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>cat <<-END >/var/www/html/index.html</userinput>
	<html>
	<body>My Test Site - pcmk-1</body>
	</html>
	END
	[root@pcmk-1 ~]#
	</screen>
	<para>
	For the moment, we will simplify things by serving up only a static site and manually sync the data between the two nodes. So run the command again on pcmk-2.
	</para>

	<screen>
	[root@pcmk-2 ~]# <userinput>cat <<-END >/var/www/html/index.html</userinput>
	<html>
	<body>My Test Site - pcmk-2</body>
	</html>
	END
	[root@pcmk-2 ~]#
	</screen>
	</section>

	<section>
	<title>Enable the Apache status URL</title>
	<para>
	In order to monitor the health of your Apache instance, and recover it if it fails, the resource agent used by Pacemaker assumes the server-status URL is available.
	Look for the following in /etc/httpd/conf/httpd.conf and make sure it is not disabled or commented out:
	</para>
	<screen>
	<Location /server-status>
	SetHandler server-status
	Order deny,allow
	Deny from all
	Allow from 127.0.0.1
	</Location>
	</screen>
	</section>

	<section>
	<title>Update the Configuration</title>
	<para>
	At this point, Apache is ready to go, all that needs to be done is to add it to the cluster. Lets call the resource WebSite. We need to use an OCF script called apache in the heartbeat namespace <footnote>
	<para>
	Compare the key used here ocf:heartbeat:apache with the one we used earlier for the IP address: ocf:heartbeat:IPaddr2
	</para>
	</footnote> , the only required parameter is the path to the main Apache configuration file and weâll tell the cluster to check once a minute that apache is still running.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm configure primitive WebSite ocf:heartbeat:apache params configfile=/etc/httpd/conf/httpd.conf op monitor interval=1min</userinput>
	[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
	node pcmk-1
	node pcmk-2
	<emphasis>primitive WebSite ocf:heartbeat:apache \</emphasis>
	<emphasis> params configfile="/etc/httpd/conf/httpd.conf" \</emphasis>
	<emphasis> op monitor interval="1min"</emphasis>
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes="2" \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness="100"
	</screen>
	<para>
	After a short delay, we should see the cluster start apache
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Fri Aug 28 16:12:49 2009
	Stack: openais
	Current DC: pcmk-2 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	2 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr):Â Â Â Â Started pcmk-2
	WebSiteÂ Â Â Â (ocf::heartbeat:apache):Â Â Â Â <emphasis>Started pcmk-1</emphasis>
	</screen>
	<para>
	Wait a moment, the WebSite resource isnât running on the same host as our IP address!
	</para>
	</section>

	<section>
	<title>Ensuring Resources Run on the Same Host</title>
	<para>
	- To reduce the load on any one machine, Pacemaker will generally try to spread the configured resources across the cluster nodes. However we can tell the cluster that two resources are related and need to run on the same host (or not at all). Here we instruct the cluster that WebSite can only run on the host that ClusterIP is active on. If ClusterIP is not active anywhere, WebSite will not be permitted to run anywhere.
	+ To reduce the load on any one machine, Pacemaker will generally try to spread the configured resources across the cluster nodes.
	+ However we can tell the cluster that two resources are related and need to run on the same host (or not at all).
	+ Here we instruct the cluster that WebSite can only run on the host that ClusterIP is active on.
	</para>
	-
	+ <para>
	+ For the constraint, we need a name (choose something descriptive like website-with-ip), indicate that its mandatory (so that if ClusterIP is not active anywhere, WebSite will not be permitted to run anywhere either) by specifying a score of INFINITY and finally list the two resources.
	+ </para>
	+ <note>
	+ <para>
	+ If ClusterIP is not active anywhere, WebSite will not be permitted to run anywhere.
	+ </para>
	+ </note>
	+ <important>
	+ <para>
	+ Colocation constraints are "directional", in that they imply certain things about the order in which the two resources will have a location chosen.
	+ In this case we're saying <literal>WebSite</literal> needs to be placed on the same machine as <literal>ClusterIP</literal>, this implies that we must know the location of <literal>ClusterIP</literal> before choosing a location for <literal>WebSite</literal>.
	+ </para>
	+ </important>
	<screen>
	[root@pcmk-1 ~]# <userinput>crm configure colocation website-with-ip INFINITY: WebSite ClusterIP</userinput>
	[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	<emphasis>colocation website-with-ip inf: WebSite ClusterIP</emphasis>
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes="2" \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness="100"
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Fri Aug 28 16:14:34 2009
	Stack: openais
	Current DC: pcmk-2 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	2 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr):Â Â Â Â Started pcmk-2
	WebSiteÂ Â Â Â (ocf::heartbeat:apache):Â Â Â Â Started pcmk-2
	</screen>
	</section>

	<section>
	- <title>Controlling Resource Start/Stop Ordering</title>
	- <para>
	- When Apache starts, it binds to the available IP addresses. It doesnât know about any addresses we add afterwards, so not only do they need to run on the same node, but we need to make sure ClusterIP is already active before we start WebSite. We do this by adding an ordering constraint. We need to give it a name (chose something descriptive like apache-after-ip), indicate that its mandatory (so that any recovery for ClusterIP will also trigger recovery of WebSite) and list the two resources in the order we need them to start.
	- </para>
	+ <title>Controlling Resource Start/Stop Ordering</title>
	+ <para>
	+ When Apache starts, it binds to the available IP addresses.
	+ It doesnât know about any addresses we add afterwards, so not only do they need to run on the same node, but we need to make sure ClusterIP is already active before we start WebSite.
	+ We do this by adding an ordering constraint.
	+ We need to give it a name (choose something descriptive like apache-after-ip), indicate that its mandatory (so that any recovery for ClusterIP will also trigger recovery of WebSite) and list the two resources in the order we need them to start.
	+ </para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm configure order apache-after-ip mandatory: ClusterIP WebSite</userinput>
	[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	colocation website-with-ip inf: WebSite ClusterIP
	<emphasis>order apache-after-ip inf: ClusterIP WebSite</emphasis>
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes="2" \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness="100"
	</screen>
	</section>

	<section>
	<title>Specifying a Preferred Location</title>
	<para>
	Pacemaker does not rely on any sort of hardware symmetry between nodes, so it may well be that one machine is more powerful than the other. In such cases it makes sense to host the resources there if it is available. To do this we create a location constraint. Again we give it a descriptive name (prefer-pcmk-1), specify the resource we want to run there (WebSite), how badly weâd like it to run there (weâll use 50 for now, but in a two-node situation almost any value above 0 will do) and the hostâs name.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm configure location prefer-pcmk-1 WebSite 50: pcmk-1</userinput>
	[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	<emphasis>location prefer-pcmk-1 WebSite 50: pcmk-1</emphasis>
	colocation website-with-ip inf: WebSite ClusterIP
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes="2" \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness="100"
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Fri Aug 28 16:17:35 2009
	Stack: openais
	Current DC: pcmk-2 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	2 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr):Â Â Â Â <emphasis>Started pcmk-2</emphasis>
	WebSiteÂ Â Â Â (ocf::heartbeat:apache):Â Â Â Â <emphasis>Started pcmk-2</emphasis>
	</screen>
	<para>
	Wait a minute, the resources are still on pcmk-2!
	</para>
	<para>
	Even though we now prefer pcmk-1 over pcmk-2, that preference is (intentionally) less than the resource stickiness (how much we preferred not to have unnecessary downtime).
	</para>
	<para>
	To see the current placement scores, you can use a tool called ptest
	</para>
	<para>
	ptest -sL
	<note>
	<para>
	Include output
	</para>
	</note>
	</para>
	<para>
	There is a way to force them to move though...
	</para>
	</section>

	<section>
	<title>Manually Moving Resources Around the Cluster</title>
	<para>
	There are always times when an administrator needs to override the cluster and force resources to move to a specific location. Underneath we use location constraints like the one we created above, happily you donât need to care. Just provide the name of the resource and the intended location, weâll do the rest.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm resource move WebSite pcmk-1</userinput>
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Fri Aug 28 16:19:24 2009
	Stack: openais
	Current DC: pcmk-2 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	2 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr):Â Â Â Â Started pcmk-1
	WebSiteÂ Â Â Â (ocf::heartbeat:apache):Â Â Â Â Started pcmk-1
	Notice how the colocation rule we created has ensured that ClusterIP was also moved to pcmk-1.
	For the curious, we can see the effect of this command by examining the configuration
	crm configure show
	[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	<emphasis>location cli-prefer-WebSite WebSite \</emphasis>
	<emphasis> rule $id="cli-prefer-rule-WebSite" inf: #uname eq pcmk-1</emphasis>
	location prefer-pcmk-1 WebSite 50: pcmk-1
	colocation website-with-ip inf: WebSite ClusterIP
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes="2" \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness="100"
	</screen>
	<para>
	Highlighted is the automated constraint used to move the resources to pcmk-1
	</para>
	<section>
	<title>Giving Control Back to the Cluster</title>
	<para>
	Once weâve finished whatever activity that required us to move the resources to pcmk-1, in our case nothing, we can then allow the cluster to resume normal operation with the unmove command. Since we previously configured a default stickiness, the resources will remain on pcmk-1.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm resource unmove WebSite</userinput>
	[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	location prefer-pcmk-1 WebSite 50: pcmk-1
	colocation website-with-ip inf: WebSite ClusterIP
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes="2" \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness="100"
	</screen>
	<para>
	Note that the automated constraint is now gone. If we check the cluster status, we can also see that as expected the resources are still active on pcmk-1.
	</para>

	<screen>
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Fri Aug 28 16:20:53 2009
	Stack: openais
	Current DC: pcmk-2 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	2 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr):Â Â Â Â <emphasis>Started pcmk-1</emphasis>
	WebSiteÂ Â Â Â (ocf::heartbeat:apache):Â Â Â Â <emphasis>Started pcmk-1</emphasis>
	</screen>
	</section>

	</section>

	</chapter>

	diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml b/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
	index 03d974b410..5fc6805de8 100644
	--- a/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
	+++ b/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
	@@ -1,528 +1,528 @@
	<?xml version='1.0' encoding='utf-8' ?>
	<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
	<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
	%BOOK_ENTITIES;
	]>
	<chapter>
	<title>Replicated Storage with DRBD</title>
	<para>
	Even if youâre serving up static websites, having to manually synchronize the contents of that website to all the machines in the cluster is not ideal.
	For dynamic websites, such as a wiki, its not even an option.
	Not everyone care afford network-attached storage but somehow the data needs to be kept in sync.
	Enter DRBD which can be thought of as network based RAID-1.
	See <ulink url="http://www.drbd.org/">http://www.drbd.org</ulink>/ for more details.
	</para>
	<para>
	</para>
	<section>
	<title>Install the DRBD Packages</title>
	<para>
	Since its inclusion in the upstream 2.6.33 kernel, everything needed to use DRBD ships with &DISTRO; &DISTRO_VERSION;.
	All you need to do is install it:
	</para>
	<screen>
	-[root@pcmk-1 ~]# <userinput>yum install -y drbd-pacemaker</userinput>
	+[root@pcmk-1 ~]# <userinput>yum install -y drbd-pacemaker drbd-udev</userinput>
	Loaded plugins: presto, refresh-packagekit
	Setting up Install Process
	Resolving Dependencies
	--> Running transaction check
	---> Package drbd-pacemaker.x86_64 0:8.3.7-2.fc13 set to be updated
	--> Processing Dependency: drbd-utils = 8.3.7-2.fc13 for package: drbd-pacemaker-8.3.7-2.fc13.x86_64
	--> Running transaction check
	---> Package drbd-utils.x86_64 0:8.3.7-2.fc13 set to be updated
	--> Finished Dependency Resolution

	Dependencies Resolved

	=================================================================================
	Package Arch Version Repository Size
	=================================================================================
	Installing:
	drbd-pacemaker x86_64 8.3.7-2.fc13 fedora 19 k
	Installing for dependencies:
	drbd-utils x86_64 8.3.7-2.fc13 fedora 165 k

	Transaction Summary
	=================================================================================
	Install 2 Package(s)
	Upgrade 0 Package(s)

	Total download size: 184 k
	Installed size: 427 k
	Downloading Packages:
	Setting up and reading Presto delta metadata
	fedora/prestodelta \| 1.7 kB 00:00
	Processing delta metadata
	Package(s) data still to download: 184 k
	(1/2): drbd-pacemaker-8.3.7-2.fc13.x86_64.rpm \| 19 kB 00:01
	(2/2): drbd-utils-8.3.7-2.fc13.x86_64.rpm \| 165 kB 00:02
	---------------------------------------------------------------------------------
	Total 45 kB/s \| 184 kB 00:04
	Running rpm_check_debug
	Running Transaction Test
	Transaction Test Succeeded
	Running Transaction
	Installing : drbd-utils-8.3.7-2.fc13.x86_64 1/2
	Installing : drbd-pacemaker-8.3.7-2.fc13.x86_64 2/2

	Installed:
	drbd-pacemaker.x86_64 0:8.3.7-2.fc13

	Dependency Installed:
	drbd-utils.x86_64 0:8.3.7-2.fc13

	Complete!
	[root@pcmk-1 ~]#
	</screen>
	</section>
	<section>
	<title>Configure DRBD</title>
	<para>
	Before we configure DRBD, we need to set aside some disk for it to use.
	</para>
	<section>
	<title>Create A Partition for DRBD</title>
	<para>
	If you have more than 1Gb free, feel free to use it.
	For this guide however, 1Gb is plenty of space for a single html file and sufficient for later holding the GFS2 metadata.
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>lvcreate -n drbd-demo -L 1G VolGroup</userinput>
	Â Logical volume "drbd-demo" created
	[root@pcmk-1 ~]# <userinput>lvs</userinput>
	Â LV Â Â Â Â VG Â Â Â Attr Â LSize Â Origin Snap% Â Move Log Copy% Â Convert
	Â <emphasis>drbd-demo VolGroup -wi-a- 1.00G</emphasis> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â
	Â lv_root Â VolGroup -wi-ao Â 7.30G Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â
	Â lv_swap Â VolGroup -wi-ao 500.00M
	</screen>
	<para>
	Repeat this on the second node, be sure to use the same size partition.
	</para>

	<screen>
	[root@pcmk-2 ~]# <userinput>lvs</userinput>
	Â LV Â Â Â VG Â Â Â Attr Â LSize Â Origin Snap% Â Move Log Copy% Â Convert
	Â lv_root VolGroup -wi-ao Â 7.30G Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â
	Â lv_swap <emphasis>VolGroup</emphasis> -wi-ao 500.00M Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â
	[root@pcmk-2 ~]# <userinput>lvcreate -n drbd-demo -L 1G VolGroup</userinput>
	Â <emphasis> Logical volume "drbd-demo" created</emphasis>
	[root@pcmk-2 ~]# <userinput>lvs</userinput>
	Â LV Â Â Â Â VG Â Â Â Attr Â LSize Â Origin Snap% Â Move Log Copy% Â Convert
	Â <emphasis>drbd-demo VolGroup -wi-a- 1.00G </emphasis>Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â
	Â lv_root Â VolGroup -wi-ao Â 7.30G Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â
	Â lv_swap Â VolGroup -wi-ao 500.00M
	</screen>
	</section>

	<section>
	<title>Write the DRBD Config</title>
	<para>
	There is no series of commands for build a DRBD configuration, so simply copy the configuration below to /etc/drbd.conf
	</para>
	<para>
	Detailed information on the directives used in this configuration (and other alternatives) is available from <ulink url="http://www.drbd.org/users-guide/ch-configure.html">http://www.drbd.org/users-guide/ch-configure.html</ulink>
	</para>
	<warning>
	<para>
	Be sure to use the names and addresses of <emphasis>your</emphasis> nodes if they differ from the ones used in this guide.
	</para>
	</warning>
	<screen>
	global {
	Â usage-count yes;
	}
	common {
	Â protocol C;
	}
	resource wwwdata {
	Â meta-disk internal;
	Â device Â Â /dev/drbd1;
	Â syncer {
	Â Â verify-alg sha1;
	Â }
	Â net {
	Â Â allow-two-primaries;
	Â }
	Â <emphasis> on pcmk-1</emphasis> {
	Â Â disk Â Â Â /dev/mapper/<emphasis>VolGroup</emphasis>-drbd--demo;
	Â Â address Â 192.168.122.101<emphasis>:7789;</emphasis>
	Â }
	Â <emphasis>on</emphasis>
	<emphasis>pcmk-2</emphasis> {
	Â Â disk Â Â Â /dev/mapper/<emphasis>VolGroup</emphasis>-drbd--demo;
	Â Â address Â 192.168.122.102<emphasis>:7789;</emphasis>
	Â }
	}
	</screen>
	<note>
	<para>
	TODO: Explain the reason for the allow-two-primaries option
	</para>
	</note>
	</section>

	<section>
	<title>Initialize and Load DRBD</title>
	<para>
	With the configuration in place, we can now perform the DRBD initialization
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>drbdadm create-md wwwdata</userinput>
	md_offset 12578816
	al_offset 12546048
	bm_offset 12541952

	Found some data
	Â ==> This might destroy existing data! <==

	Do you want to proceed?
	[need to type 'yes' to confirm] <userinput>yes</userinput>

	Writing meta data...
	initializing activity log
	NOT initialized bitmap
	New drbd meta data block successfully created.
	success
	</screen>
	<para>
	Now load the DRBD kernel module and confirm that everything is sane
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>modprobe drbd</userinput>
	[root@pcmk-1 ~]# <userinput>drbdadm up wwwdata</userinput>
	[root@pcmk-1 ~]# <userinput>cat /proc/drbd</userinput>
	version: 8.3.6 (api:88/proto:86-90)
	GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57

	<emphasis> 1: cs:WFConnection ro:Secondary/Unknown ds:Inconsistent/DUnknown C r--</emphasis>--
	Â Â ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:12248
	[root@pcmk-1 ~]#

	Repeat on the second node
	drbdadm --force create-md wwwdata
	modprobe drbd
	drbdadm up wwwdata
	cat /proc/drbd
	[root@pcmk-2 ~]# <userinput>drbdadm --force create-md wwwdata</userinput>
	Writing meta data...
	initializing activity log
	NOT initialized bitmap
	New drbd meta data block successfully created.
	success
	[root@pcmk-2 ~]# <userinput>modprobe drbd</userinput>
	WARNING: Deprecated config file /etc/modprobe.conf, all config files belong into /etc/modprobe.d/.
	[root@pcmk-2 ~]# <userinput>drbdadm up wwwdata</userinput>
	[root@pcmk-2 ~]# <userinput>cat /proc/drbd</userinput>
	version: 8.3.6 (api:88/proto:86-90)
	GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57

	<emphasis> 1: cs:Connected ro:Secondary/Secondary ds:Inconsistent/Inconsistent C r----</emphasis>
	Â Â ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:12248
	</screen>
	<para>
	Now we need to tell DRBD which set of data to use.
	Since both sides contain garbage, we can run the following on pcmk-1:
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>drbdadm -- --overwrite-data-of-peer primary wwwdata</userinput>
	[root@pcmk-1 ~]# <userinput>cat /proc/drbd</userinput>
	version: 8.3.6 (api:88/proto:86-90)
	GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
	Â 1: cs:SyncSource ro:Primary/Secondary ds:UpToDate/<emphasis>Inconsistent</emphasis> C r----
	Â Â ns:2184 nr:0 dw:0 dr:2472 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:10064
	Â Â Â Â [=====>..............] sync'ed: 33.4% (10064/12248)K
	Â Â Â Â finish: 0:00:37 speed: 240 (240) K/sec
	[root@pcmk-1 ~]# <userinput>cat /proc/drbd</userinput>
	version: 8.3.6 (api:88/proto:86-90)
	GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
	Â 1: <emphasis>cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate</emphasis> C r----
	Â Â ns:12248 nr:0 dw:0 dr:12536 al:0 bm:1 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
	</screen>
	<para>
	pcmk-1 is now in the Primary state which allows it to be written to.
	Which means its a good point at which to create a filesystem and populate it with some data to serve up via our WebSite resource.
	</para>
	</section>
	<section>
	<title>Populate DRBD with Data</title>
	<screen>
	[root@pcmk-1 ~]# <userinput>mkfs.ext4 /dev/drbd1</userinput>
	mke2fs 1.41.4 (27-Jan-2009)
	Filesystem label=
	OS type: Linux
	Block size=1024 (log=0)
	Fragment size=1024 (log=0)
	3072 inodes, 12248 blocks
	612 blocks (5.00%) reserved for the super user
	First data block=1
	Maximum filesystem blocks=12582912
	2 block groups
	8192 blocks per group, 8192 fragments per group
	1536 inodes per group
	Superblock backups stored on blocks:
	Â Â Â Â 8193

	Writing inode tables: done Â Â Â Â Â Â Â Â Â Â Â Â Â Â
	Creating journal (1024 blocks): done
	Writing superblocks and filesystem accounting information: done

	This filesystem will be automatically checked every 26 mounts or
	180 days, whichever comes first. Â Use tune2fs -c or -i to override.

	Now mount the newly created filesystem so we can create our index file
	mount /dev/drbd1 /mnt/
	cat <<-END >/mnt/index.html
	<html>
	<body>My Test Site - drbd</body>
	</html>
	END
	umount /dev/drbd1
	[root@pcmk-1 ~]# <userinput>mount /dev/drbd1 /mnt/</userinput>
	[root@pcmk-1 ~]# <userinput>cat <<-END >/mnt/index.html</userinput>
	> <html>
	> <body>My Test Site - drbd</body>
	> </html>
	> END
	[root@pcmk-1 ~]# <userinput>umount /dev/drbd1</userinput>
	</screen>
	</section>
	</section>
	<section>
	<title>Configure the Cluster for DRBD</title>
	<para>
	One handy feature of the crm shell is that you can use it in interactive mode to make several changes atomically.
	</para>
	<para>
	First we launch the shell. The prompt will change to indicate youâre in interactive mode.
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>crm</userinput>
	cib crm(live)#
	</screen>
	<para>
	Next we must create a working copy or the current configuration.
	This is where all our changes will go.
	The cluster will not see any of them until we say its ok.
	Notice again how the prompt changes, this time to indicate that weâre no longer looking at the live cluster.
	</para>
	<screen>
	cib crm(live)# <userinput>cib new drbd</userinput>
	INFO: drbd shadow CIB created
	crm(drbd)#
	</screen>
	<para>
	Now we can create our DRBD clone and display the revised configuration.
	</para>
	<screen>
	crm(drbd)# <userinput>configure primitive WebData ocf:linbit:drbd params drbd_resource=wwwdata \</userinput>
	<userinput>Â Â Â Â op monitor interval=60s</userinput>
	crm(drbd)# <userinput>configure ms WebDataClone WebData meta master-max=1 master-node-max=1 \</userinput>
	<userinput>Â Â Â Â clone-max=2 clone-node-max=1 notify=true</userinput>
	crm(drbd)# <userinput>configure show</userinput>
	node pcmk-1
	node pcmk-2
	<emphasis>primitive WebData ocf:linbit:drbd \</emphasis>
	<emphasis> params drbd_resource="wwwdata" \</emphasis>
	<emphasis> op monitor interval="60s"</emphasis>
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	<emphasis>ms WebDataClone WebData \</emphasis>
	<emphasis> meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"</emphasis>
	location prefer-pcmk-1 WebSite 50: pcmk-1
	colocation website-with-ip inf: WebSite ClusterIP
	order apache-after-ip inf: ClusterIP WebSite
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes=â2â \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness=â100â
	</screen>
	<para>
	Once weâre happy with the changes, we can tell the cluster to start using them and use crm_mon to check everything is functioning.
	</para>
	<screen>
	crm(drbd)# <userinput>cib commit drbd</userinput>
	INFO: commited 'drbd' shadow CIB to the cluster
	crm(drbd)# <userinput>quit</userinput>
	bye
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Tue Sep Â 1 09:37:13 2009
	Stack: openais
	Current DC: pcmk-1 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	3 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr): Â Â Â Â Started pcmk-1
	WebSite (ocf::heartbeat:apache): Â Â Â Â Started pcmk-1
	<emphasis>Master/Slave Set: WebDataClone</emphasis>
	<emphasis> Masters: [ pcmk-2 ]</emphasis>
	<emphasis> Slaves: [ pcmk-1 ]</emphasis>
	</screen>
	<note>
	<para>
	Include details on adding a second DRBD resource
	</para>
	</note>
	<para>
	Now that DRBD is functioning we can configure a Filesystem resource to use it.
	In addition to the filesystemâs definition, we also need to tell the cluster where it can be located (only on the DRBD Primary) and when it is allowed to start (after the Primary was promoted).
	</para>
	<para>
	Once again weâll use the shellâs interactive mode
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>crm</userinput>
	crm(live)# <userinput>cib new fs</userinput>
	INFO: fs shadow CIB created
	crm(fs)# <userinput>configure primitive WebFS ocf:heartbeat:Filesystem \</userinput>
	<userinput>Â Â Â Â params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"</userinput>
	crm(fs)# <userinput>configure colocation fs_on_drbd inf: WebFS WebDataClone:Master</userinput>
	crm(fs)# <userinput>configure order WebFS-after-WebData inf: WebDataClone:promote WebFS:start</userinput>
	</screen>
	<para>
	We also need to tell the cluster that Apache needs to run on the same machine as the filesystem and that it must be active before Apache can start.
	</para>
	<screen>
	crm(fs)# <userinput>configure colocation WebSite-with-WebFS inf: WebSite WebFS</userinput>
	crm(fs)# <userinput>configure order WebSite-after-WebFS inf: WebFS WebSite</userinput>
	</screen>
	<para>
	Time to review the updated configuration:
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
	node pcmk-1
	node pcmk-2
	primitive WebData ocf:linbit:drbd \
	Â Â Â Â params drbd_resource="wwwdata" \
	Â Â Â Â op monitor interval="60s"
	primitive WebFS ocf:heartbeat:Filesystem \
	Â Â Â Â params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"
	primitive WebSite ocf:heartbeat:apache \
	Â Â Â Â params configfile="/etc/httpd/conf/httpd.conf" \
	Â Â Â Â op monitor interval="1min"
	primitive ClusterIP ocf:heartbeat:IPaddr2 \
	Â Â Â Â params ip="192.168.122.101" cidr_netmask="32" \
	Â Â Â Â op monitor interval="30s"
	ms WebDataClone WebData \
	Â Â Â Â meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
	location prefer-pcmk-1 WebSite 50: pcmk-1
	colocation WebSite-with-WebFS inf: WebSite WebFS
	colocation fs_on_drbd inf: WebFS WebDataClone:Master
	colocation website-with-ip inf: WebSite ClusterIP
	order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
	order WebSite-after-WebFS inf: WebFS WebSite
	order apache-after-ip inf: ClusterIP WebSite
	property $id="cib-bootstrap-options" \
	Â Â Â Â dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
	Â Â Â Â cluster-infrastructure="openais" \
	Â Â Â Â expected-quorum-votes=â2â \
	Â Â Â Â stonith-enabled="false" \
	Â Â Â Â no-quorum-policy="ignore"
	rsc_defaults $id="rsc-options" \
	Â Â Â Â resource-stickiness=â100â
	</screen>
	<para>
	After reviewing the new configuration, we again upload it and watch the cluster put it into effect.
	</para>
	<screen>
	crm(fs)# <userinput>cib commit fs</userinput>
	INFO: commited 'fs' shadow CIB to the cluster
	crm(fs)# <userinput>quit</userinput>
	bye
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Tue Sep Â 1 10:08:44 2009
	Stack: openais
	Current DC: pcmk-1 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	4 Resources configured.
	============

	Online: [ pcmk-1 pcmk-2 ]

	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr): Â Â Â Â Started pcmk-1
	<emphasis>WebSite (ocf::heartbeat:apache): Started pcmk-1</emphasis>
	Master/Slave Set: WebDataClone
	Â Â Â Â Masters: [ pcmk-1 ]
	Â Â Â Â Slaves: [ pcmk-2 ]
	<emphasis>WebFS (ocf::heartbeat:Filesystem): Started pcmk-1</emphasis>
	</screen>
	<section>
	<title>Testing Migration</title>
	<para>
	We could shut down the active node again, but another way to safely simulate recovery is to put the node into what is called âstandby modeâ.
	Nodes in this state tell the cluster that they are not allowed to run resources.
	Any resources found active there will be moved elsewhere.
	This feature can be particularly useful when updating the resourcesâ packages.
	</para>
	<para>
	Put the local node into standby mode and observe the cluster move all the resources to the other node.
	Note also that the nodeâs status will change to indicate that it can no longer host resources.
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>crm node standby</userinput>
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Tue Sep Â 1 10:09:57 2009
	Stack: openais
	Current DC: pcmk-1 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	4 Resources configured.
	============

	<emphasis>Node pcmk-1: standby</emphasis>
	Online: [ pcmk-2 ]

	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr): Â Â Â Â <emphasis>Started pcmk-2</emphasis>
	WebSite (ocf::heartbeat:apache): Â Â Â Â <emphasis>Started pcmk-2</emphasis>
	Master/Slave Set: WebDataClone
	Â Â Â Â <emphasis>Masters: [ pcmk-2 ]</emphasis>
	Â Â Â Â Stopped: [ WebData:1 ]
	WebFS Â (ocf::heartbeat:Filesystem): Â Â <emphasis>Started pcmk-2</emphasis>
	</screen>
	<para>
	Once weâve done everything we needed to on pcmk-1 (in this case nothing, we just wanted to see the resources move), we can allow the node to be a full cluster member again.
	</para>
	<screen>
	[root@pcmk-1 ~]# <userinput>crm node online</userinput>
	[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
	============
	Last updated: Tue Sep Â 1 10:13:25 2009
	Stack: openais
	Current DC: pcmk-1 - partition with quorum
	Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
	2 Nodes configured, 2 expected votes
	4 Resources configured.
	============

	<emphasis>Online: [ pcmk-1 pcmk-2 ]</emphasis>

	ClusterIPÂ Â Â Â (ocf::heartbeat:IPaddr): Â Â Â Â Started pcmk-2
	WebSite (ocf::heartbeat:apache): Â Â Â Â Started pcmk-2
	Master/Slave Set: WebDataClone
	Â Â Â Â Masters: [ pcmk-2 ]
	Â Â Â Â Slaves: [ pcmk-1 ]
	WebFS Â (ocf::heartbeat:Filesystem): Â Â Started pcmk-2
	</screen>
	<para>
	Notice that our resource stickiness settings prevent the services from migrating back to pcmk-1.
	</para>
	</section>
	</section>
	</chapter>

	diff --git a/extra/resources/SysInfo b/extra/resources/SysInfo
	index 8f9870bd1a..a2fd4ac126 100644
	--- a/extra/resources/SysInfo
	+++ b/extra/resources/SysInfo
	@@ -1,388 +1,330 @@
	#!/bin/sh
	#
	#
	# SysInfo OCF Resource Agent
	# It records (in the CIB) various attributes of a node
	#
	# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée
	# All Rights Reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of version 2 of the GNU General Public License as
	# published by the Free Software Foundation.
	#
	# This program is distributed in the hope that it would be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	# Further, this software is distributed without any warranty that it is
	# free of the rightful claim of any third person regarding infringement
	# or the like. Any license provided herein, whether implied or
	# otherwise, applies only to this software file. Patent licenses, if
	# any, provided herein do not apply to combinations of this program with
	# other software, or any other product whatsoever.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write the Free Software Foundation,
	# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	#
	#######################################################################
	# Initialization:

	. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs

	#######################################################################

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="SysInfo">
	<version>1.0</version>

	<longdesc lang="en">
	This is a SysInfo Resource Agent.
	It records (in the CIB) various attributes of a node
	Sample Linux output:
	arch: i686
	os: Linux-2.4.26-gentoo-r14
	free_swap: 1999
	cpu_info: Intel(R) Celeron(R) CPU 2.40GHz
	cpu_speed: 4771.02
	cpu_cores: 1
	cpu_load: 0.00
	ram_total: 513
	ram_free: 117
	root_free: 2.4

	Sample Darwin output:
	arch: i386
	os: Darwin-8.6.2
	cpu_info: Intel Core Duo
	cpu_speed: 2.16
	cpu_cores: 2
	cpu_load: 0.18
	ram_total: 2016
	ram_free: 787
	root_free: 13

	Units:
	free_swap: Mb
	ram_*: Mb
	cpu_speed (Linux): bogomips
	cpu_speed (Darwin): Ghz
	*_free: GB (or user-defined: disk_unit)

	</longdesc>
	<shortdesc lang="en">SysInfo resource agent</shortdesc>

	<parameters>

	<parameter name="pidfile" unique="0">
	<longdesc lang="en">PID file</longdesc>
	<shortdesc lang="en">PID file</shortdesc>
	<content type="string" default="$OCF_RESKEY_pidfile" />
	</parameter>

	<parameter name="delay" unique="0">
	<longdesc lang="en">Interval to allow values to stabilize</longdesc>
	<shortdesc lang="en">Dampening Delay</shortdesc>
	<content type="string" default="0s" />
	</parameter>

	<parameter name="disks" unique="1">
	<longdesc lang="en">
	Filesystems or Paths to be queried for free disk space as a SPACE separated list - e.g "/dev/sda1 /tmp".
	Results will be written to an attribute with leading slashes removed, and other slashes replaced with underscore, and the word 'free' appended - e.g /dev/sda1 -> dev_sda1_free
	Note: The root filesystem '/' is always queried to an attribute named 'root_free'
	</longdesc>
	<shortdesc lang="en">List of Filesytems/Paths to query for free disk space</shortdesc>
	content type="string" />
	</parameter>

	<parameter name="disk_unit" unique="1">
	<longdesc lang="en">
	Unit to report disk free space in.
	Can be one of: B, K, M, G, T, P (case-insensitive)
	</longdesc>
	<shortdesc lang="en">Unit to report disk free space in</shortdesc>
	content type="string" default="G"/>
	</parameter>


	</parameters>
	<actions>
	<action name="start" timeout="90" />
	<action name="stop" timeout="100" />
	<action name="monitor" timeout="20s" interval="60s"/>
	<action name="meta-data" timeout="5" />
	<action name="validate-all" timeout="30" />
	</actions>
	</resource-agent>
	END
	}

	#######################################################################

	UpdateStat() {
	name=$1; shift
	value="$*"
	- echo -e "$name:\t$value"
	+ printf "%s:\t%s\n" "$name" "$value"
	${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -v "$value"
	}

	SysInfoStats() {

	UpdateStat arch "`uname -m`"
	UpdateStat os "`uname -s`-`uname -r`"

	case `uname -s` in
	"Darwin")
	mem=`top -l 1 \| grep Mem: \| awk '{print $10}'`
	mem_used=`top -l 1 \| grep Mem: \| awk '{print $8}'`
	mem=`SysInfo_mem_units $mem`
	mem_used=`SysInfo_mem_units $mem_used`
	mem_total=`expr $mem_used + $mem`
	- cpu_type=`system_profiler SPHardwareDataType \| grep "CPU Type:"`
	- cpu_type=${cpu_type/*: /}
	- cpu_speed=`system_profiler SPHardwareDataType \| grep "CPU Speed:" \| awk '{print $3}'`
	- cpu_cores=`system_profiler SPHardwareDataType \| grep "Number Of"`
	- cpu_cores=${cpu_cores/*: /}
	+ cpu_type=`system_profiler SPHardwareDataType \| awk -F': ' '/^CPU Type/ {print $2; exit}'`
	+ cpu_speed=`system_profiler SPHardwareDataType \| awk -F': ' '/^CPU Speed/ {print $2; exit}'`
	+ cpu_cores=`system_profiler SPHardwareDataType \| awk -F': ' '/^Number Of/ {print $2; exit}'`
	;;
	"Linux")
	if [ -f /proc/cpuinfo ]; then
	- cpu_type=`grep "model name" /proc/cpuinfo \| head -n 1`
	- cpu_type=${cpu_type/*: /}
	- cpu_speed=`grep "bogomips" /proc/cpuinfo \| head -n 1`
	- cpu_speed=${cpu_speed/*: /}
	+ cpu_type=`awk -F': ' '/model name/ {print $2; exit}' /proc/cpuinfo`
	+ cpu_speed=`awk -F': ' '/bogomips/ {print $2; exit}' /proc/cpuinfo`
	cpu_cores=`grep "^processor" /proc/cpuinfo \| wc -l`
	fi

	if [ -f /proc/meminfo ]; then
	# meminfo results are in kB
	mem=`grep "SwapFree" /proc/meminfo \| awk '{print $2"k"}'`
	if [ ! -z $mem ]; then
	UpdateStat free_swap `SysInfo_mem_units $mem`
	fi
	mem=`grep "Inactive" /proc/meminfo \| awk '{print $2"k"}'`
	mem_total=`grep "MemTotal" /proc/meminfo \| awk '{print $2"k"}'`
	else
	mem=`top -n 1 \| grep Mem: \| awk '{print $7}'`
	fi
	;;
	*)
	esac

	if [ x != x"$cpu_type" ]; then
	UpdateStat cpu_info "$cpu_type"
	fi

	if [ x != x"$cpu_speed" ]; then
	UpdateStat cpu_speed "$cpu_speed"
	fi

	if [ x != x"$cpu_cores" ]; then
	UpdateStat cpu_cores "$cpu_cores"
	fi

	loads=`uptime`
	load15=`echo ${loads} \| awk '{print $10}'`
	UpdateStat cpu_load $load15

	if [ ! -z "$mem" ]; then
	# Massage the memory values
	UpdateStat ram_total `SysInfo_mem_units $mem_total`
	UpdateStat ram_free `SysInfo_mem_units $mem`
	fi

	# Portability notes:
	# o tail: explicit "-n" not available in Solaris; instead simplify
	# 'tail -n <c>' to the equivalent 'tail -<c>'.
	for disk in "/" ${OCF_RESKEY_disks}; do
	unset disk_free disk_label
	- disk_free=`df -k ${disk} \| tail -1 \| awk '{print $4}'`
	+ disk_free=`df -h ${disk} \| tail -1 \| awk '{print $4}'`
	if [ x != x"$disk_free" ]; then
	disk_label=`echo $disk \| sed -e 's#^/$#root#;s#^/*##;s#/#_#g'`
	UpdateStat ${disk_label}_free `SysInfo_hdd_units $disk_free`
	fi
	done
	}

	+SysInfo_megabytes() {
	+ # Size in megabytes
	+ echo $1 \| awk '{ n = $0;
	+ sub(/[0-9]+(.[0-9]+)?/, "");
	+ split(n, a, $0);
	+ n=a[1];
	+ if ($0 == "G" \|\| $0 == "") { n *= 1024 };
	+ if (/^kB?/) { n /= 1024 };
	+ printf "%d\n", n }' # Intentionaly round to an integer
	+}
	+
	SysInfo_mem_units() {
	mem=$1

	if [ -z $1 ]; then
	return
	fi

	- memlen=`expr ${#mem} - 1`
	- memlen_alt=`expr ${#mem} - 2`
	- if [ ${mem:$memlen:1} = "G" ]; then
	- mem="${mem:0:$memlen}"
	- if [ $mem != ${mem/./} ]; then
	- mem_before=${mem/.*/}
	- mem_after=${mem/*./}
	- mem=$[mem_before*1024]
	- if [ ${#mem_after} = 0 ]; then
	- :
	- elif [ ${#mem_after} = 1 ]; then
	- mem=$[mem+100*$mem_after]
	- elif [ ${#mem_after} = 2 ]; then
	- mem=$[mem+10*$mem_after]
	- elif [ ${#mem_after} = 3 ]; then
	- mem=$[mem+$mem_after]
	- else
	- mem_after=${mem_after:0:3}
	- mem=$[mem+$mem_after]
	- fi
	- fi
	- elif [ ${mem:$memlen:1} = "M" ]; then
	- mem=${mem/.*/}
	- mem="${mem:0:$memlen}"
	- elif [ ${mem:$memlen:1} = "k" ]; then
	- mem="${mem:0:$memlen}"
	- mem=${mem/.*/}
	- mem=`expr $mem / 1024`
	- elif [ ${mem:$memlen_alt:2} = "kB" ]; then
	- mem="${mem:0:$memlen_alt}"
	- mem=${mem/.*/}
	- mem=`expr $mem / 1024`
	- elif [ ${mem:$memlen_alt:2} = "Mb" ]; then
	- mem="${mem:0:$memlen_alt}"
	- mem=${mem/.*/}
	- elif [ ${mem:$memlen_alt:2} = "MB" ]; then
	- mem="${mem:0:$memlen_alt}"
	- mem=${mem/.*/}
	- fi
	-
	+ mem=$(SysInfo_megabytes "$1")
	# Round to the next multiple of 50
	- memlen=`expr ${#mem} - 2`
	- mem_round="${mem:$memlen:2}"
	- if [ x$mem_round = x ]; then
	- :
	- elif [ $mem_round = "00" ]; then
	- :
	- elif [ $mem_round -lt "50" ]; then
	- mem=$[mem+50]
	- mem=$[mem-$mem_round]
	-
	- else
	- mem=$[mem+100]
	- mem=$[mem-$mem_round]
	+ r=$(($mem % 50))
	+ if [ $r != 0 ]; then
	+ mem=$(($mem + 50 - $r))
	fi
	+
	echo $mem
	}

	SysInfo_hdd_units() {
	- disk_size=$1
	- disk_unit=${OCF_RESKEY_disk_unit}
	- if [ -z $disk_unit ]; then
	- disk_unit="G"
	- fi
	-
	- case $disk_unit in
	- [Pp])
	- disk_size=$((disk_size/1024/1024/1024/1024))
	- ;;
	- [Tt])
	- disk_size=$((disk_size/1024/1024/1024))
	- ;;
	- [Gg])
	- disk_size=$((disk_size/1024/1024))
	- ;;
	- [Mm])
	- disk_size=$((disk_size/1024))
	- ;;
	- [Kk])
	- #Already in KB
	- ;;
	- [Bb])
	- disk_size=$((disk_size*1024))
	- ;;
	- *)
	- ocf_log err "disk_unit set to invalid unit"
	- exit $OCF_ERR_ARGS
	+ # Defauts to size in gigabytes
	+
	+ case $OCF_RESKEY_disk_unit in
	+ [Pp]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024 / 1024));;
	+ [Tt]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024));;
	+ [Gg]) echo $(($(SysInfo_megabytes "$1") / 1024));;
	+ [Mm]) echo SysInfo_megabytes "$1";;
	+ [Kk]) echo $(($(SysInfo_megabytes "$1") * 1024));;
	+ [Bb]) echo $(($(SysInfo_megabytes "$1") * 1024 * 1024));;
	+ *)
	+ ocf_log err "Invalid value for disk_unit: $OCF_RESKEY_disk_unit"
	+ echo $(($(SysInfo_megabytes "$1") / 1024));;
	esac
	-
	- echo $disk_size
	}

	SysInfo_usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	SysInfo_start() {
	echo $OCF_RESKEY_clone > $OCF_RESKEY_pidfile
	SysInfoStats
	exit $OCF_SUCCESS
	}

	SysInfo_stop() {
	rm $OCF_RESKEY_pidfile
	exit $OCF_SUCCESS
	}

	SysInfo_monitor() {
	if [ -f $OCF_RESKEY_pidfile ]; then
	clone=`cat $OCF_RESKEY_pidfile`
	fi

	if [ x$clone = x ]; then
	rm $OCF_RESKEY_pidfile
	exit $OCF_NOT_RUNNING

	elif [ $clone = $OCF_RESKEY_clone ]; then
	SysInfoStats
	exit $OCF_SUCCESS

	elif [ x$OCF_RESKEY_CRM_meta_globally_unique = xtrue
	-o x$OCF_RESKEY_CRM_meta_globally_unique = xTrue
	-o x$OCF_RESKEY_CRM_meta_globally_unique = xyes
	-o x$OCF_RESKEY_CRM_meta_globally_unique = xYes
	]; then
	SysInfoStats
	exit $OCF_SUCCESS
	fi
	exit $OCF_NOT_RUNNING
	}

	SysInfo_validate() {
	return $OCF_SUCCESS
	}

	if [ $# -ne 1 ]; then
	SysInfo_usage
	exit $OCF_ERR_ARGS
	fi

	: ${OCF_RESKEY_pidfile:="$HA_VARRUN/SysInfo-${OCF_RESOURCE_INSTANCE}"}
	+: ${OCF_RESKEY_disk_unit:="G"}
	: ${OCF_RESKEY_clone:="0"}
	if [ x != x${OCF_RESKEY_delay} ]; then
	OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}"
	fi

	case $__OCF_ACTION in
	meta-data) meta_data
	exit $OCF_SUCCESS
	;;
	start) SysInfo_start
	;;
	stop) SysInfo_stop
	;;
	monitor) SysInfo_monitor
	;;
	validate-all) SysInfo_validate
	;;
	usage\|help) SysInfo_usage
	exit $OCF_SUCCESS
	;;
	*) SysInfo_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac

	exit $?
	diff --git a/lib/common/ais.c b/lib/common/ais.c
	index 15db59bc75..90559dc921 100644
	--- a/lib/common/ais.c
	+++ b/lib/common/ais.c
	@@ -1,1225 +1,1229 @@
	/*
	* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this library; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#include <crm_internal.h>
	#include <bzlib.h>
	#include <crm/ais.h>
	#include <crm/common/ipc.h>
	#include <crm/common/cluster.h>
	#include <sys/utsname.h>
	#include "stack.h"
	#ifdef SUPPORT_COROSYNC
	# include <corosync/corodefs.h>
	#endif

	#ifdef SUPPORT_CMAN
	# include <libcman.h>
	cman_handle_t pcmk_cman_handle = NULL;
	#endif

	#ifdef SUPPORT_CS_QUORUM
	# include <sys/socket.h>
	# include <netinet/in.h>
	# include <arpa/inet.h>

	# include <corosync/cpg.h>
	# include <corosync/quorum.h>

	quorum_handle_t pcmk_quorum_handle = 0;
	cpg_handle_t pcmk_cpg_handle = 0;
	struct cpg_name pcmk_cpg_group = {
	.length = 0,
	.value[0] = 0,
	};
	#endif

	static char *pcmk_uname = NULL;
	static int pcmk_uname_len = 0;
	static uint32_t pcmk_nodeid = 0;

	#define cs_repeat(counter, max, code) do { \
	code; \
	if(rc == CS_ERR_TRY_AGAIN) { \
	counter++; \
	crm_debug("Retrying operation after %ds", counter); \
	sleep(counter); \
	} \
	} while(rc == CS_ERR_TRY_AGAIN && counter < max)

	enum crm_ais_msg_types text2msg_type(const char *text)
	{
	int type = crm_msg_none;

	CRM_CHECK(text != NULL, return type);
	if(safe_str_eq(text, "ais")) {
	type = crm_msg_ais;
	} else if(safe_str_eq(text, "crm_plugin")) {
	type = crm_msg_ais;
	} else if(safe_str_eq(text, CRM_SYSTEM_CIB)) {
	type = crm_msg_cib;
	} else if(safe_str_eq(text, CRM_SYSTEM_CRMD)) {
	type = crm_msg_crmd;
	} else if(safe_str_eq(text, CRM_SYSTEM_DC)) {
	type = crm_msg_crmd;
	} else if(safe_str_eq(text, CRM_SYSTEM_TENGINE)) {
	type = crm_msg_te;
	} else if(safe_str_eq(text, CRM_SYSTEM_PENGINE)) {
	type = crm_msg_pe;
	} else if(safe_str_eq(text, CRM_SYSTEM_LRMD)) {
	type = crm_msg_lrmd;
	} else if(safe_str_eq(text, CRM_SYSTEM_STONITHD)) {
	type = crm_msg_stonithd;
	} else if(safe_str_eq(text, "stonith-ng")) {
	type = crm_msg_stonith_ng;
	} else if(safe_str_eq(text, "attrd")) {
	type = crm_msg_attrd;

	} else {
	/* This will normally be a transient client rather than
	* a cluster daemon. Set the type to the pid of the client
	*/
	int scan_rc = sscanf(text, "%d", &type);
	if(scan_rc != 1) {
	/* Ensure its sane */
	type = crm_msg_none;
	}
	}
	return type;
	}

	char get_ais_data(const AIS_Message msg)
	{
	int rc = BZ_OK;
	char *uncompressed = NULL;
	unsigned int new_size = msg->size + 1;

	if(msg->is_compressed == FALSE) {
	crm_debug_2("Returning uncompressed message data");
	uncompressed = strdup(msg->data);

	} else {
	crm_debug_2("Decompressing message data");
	crm_malloc0(uncompressed, new_size);

	rc = BZ2_bzBuffToBuffDecompress(
	uncompressed, &new_size, (char*)msg->data, msg->compressed_size, 1, 0);

	CRM_ASSERT(rc == BZ_OK);
	CRM_ASSERT(new_size == msg->size);
	}

	return uncompressed;
	}


	#if SUPPORT_COROSYNC
	int ais_fd_sync = -1;
	int ais_fd_async = -1; /* never send messages via this channel */
	void *ais_ipc_ctx = NULL;
	hdb_handle_t ais_ipc_handle = 0;
	GFDSource *ais_source = NULL;
	GFDSource *ais_source_sync = NULL;
	GFDSource *cman_source = NULL;
	GFDSource *cpg_source = NULL;
	GFDSource *quorumd_source = NULL;
	static char *ais_cluster_name = NULL;

	gboolean get_ais_nodeid(uint32_t id, char *uname)
	{
	struct iovec iov;
	int retries = 0;
	int rc = CS_OK;
	coroipc_response_header_t header;
	struct crm_ais_nodeid_resp_s answer;

	header.error = CS_OK;
	header.id = crm_class_nodeid;
	header.size = sizeof(coroipc_response_header_t);

	CRM_CHECK(id != NULL, return FALSE);
	CRM_CHECK(uname != NULL, return FALSE);

	iov.iov_base = &header;
	iov.iov_len = header.size;

	retry:
	errno = 0;
	rc = coroipcc_msg_send_reply_receive(
	ais_ipc_handle, &iov, 1, &answer, sizeof (answer));
	if(rc == CS_OK) {
	CRM_CHECK(answer.header.size == sizeof (struct crm_ais_nodeid_resp_s),
	crm_err("Odd message: id=%d, size=%d, error=%d",
	answer.header.id, answer.header.size, answer.header.error));
	CRM_CHECK(answer.header.id == crm_class_nodeid, crm_err("Bad response id: %d", answer.header.id));
	}

	if(rc == CS_ERR_TRY_AGAIN && retries < 20) {
	retries++;
	crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries);
	sleep(retries); /* Proportional back off */
	goto retry;
	}

	if(rc != CS_OK) {
	crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc));
	return FALSE;

	} else if(answer.header.error != CS_OK) {
	crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc));
	return FALSE;
	}

	crm_info("Server details: id=%u uname=%s cname=%s",
	answer.id, answer.uname, answer.cname);

	*id = answer.id;
	*uname = crm_strdup(answer.uname);
	ais_cluster_name = crm_strdup(answer.cname);

	return TRUE;
	}

	gboolean crm_get_cluster_name(char **cname)
	{
	CRM_CHECK(cname != NULL, return FALSE);
	if(ais_cluster_name) {
	*cname = crm_strdup(ais_cluster_name);
	return TRUE;
	}
	return FALSE;
	}

	gboolean
	send_ais_text(int class, const char *data,
	gboolean local, const char *node, enum crm_ais_msg_types dest)
	{
	static int msg_id = 0;
	static int local_pid = 0;
	enum cluster_type_e cluster_type = get_cluster_type();

	int retries = 0;
	int rc = CS_OK;
	int buf_len = sizeof(coroipc_response_header_t);

	char *buf = NULL;
	struct iovec iov;
	const char *transport = "pcmk";
	coroipc_response_header_t *header = NULL;
	AIS_Message *ais_msg = NULL;
	enum crm_ais_msg_types sender = text2msg_type(crm_system_name);

	/* There are only 6 handlers registered to crm_lib_service in plugin.c */
	CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE);

	if(data == NULL) {
	data = "";
	}

	if(local_pid == 0) {
	local_pid = getpid();
	}

	if(sender == crm_msg_none) {
	sender = local_pid;
	}

	crm_malloc0(ais_msg, sizeof(AIS_Message));

	ais_msg->id = msg_id++;
	ais_msg->header.id = class;
	ais_msg->header.error = CS_OK;

	ais_msg->host.type = dest;
	ais_msg->host.local = local;
	if(node) {
	ais_msg->host.size = strlen(node);
	memset(ais_msg->host.uname, 0, MAX_NAME);
	memcpy(ais_msg->host.uname, node, ais_msg->host.size);
	ais_msg->host.id = 0;

	} else {
	ais_msg->host.size = 0;
	memset(ais_msg->host.uname, 0, MAX_NAME);
	ais_msg->host.id = 0;
	}

	ais_msg->sender.id = 0;
	ais_msg->sender.type = sender;
	ais_msg->sender.pid = local_pid;
	ais_msg->sender.size = pcmk_uname_len;
	memset(ais_msg->sender.uname, 0, MAX_NAME);
	memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size);

	ais_msg->size = 1 + strlen(data);

	if(ais_msg->size < CRM_BZ2_THRESHOLD) {
	failback:
	crm_realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size);
	memcpy(ais_msg->data, data, ais_msg->size);

	} else {
	char *compressed = NULL;
	char *uncompressed = crm_strdup(data);
	unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */

	crm_debug_5("Compressing message payload");
	crm_malloc(compressed, len);

	rc = BZ2_bzBuffToBuffCompress(
	compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK);

	crm_free(uncompressed);

	if(rc != BZ_OK) {
	crm_err("Compression failed: %d", rc);
	crm_free(compressed);
	goto failback;
	}

	crm_realloc(ais_msg, sizeof(AIS_Message) + len + 1);
	memcpy(ais_msg->data, compressed, len);
	ais_msg->data[len] = 0;
	crm_free(compressed);

	ais_msg->is_compressed = TRUE;
	ais_msg->compressed_size = len;

	crm_debug_2("Compression details: %d -> %d",
	ais_msg->size, ais_data_len(ais_msg));
	}

	ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg);

	crm_debug_3("Sending%s message %d to %s.%s (data=%d, total=%d)",
	ais_msg->is_compressed?" compressed":"",
	ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest),
	ais_data_len(ais_msg), ais_msg->header.size);

	iov.iov_base = ais_msg;
	iov.iov_len = ais_msg->header.size;
	crm_realloc(buf, buf_len);

	do {
	if(rc == CS_ERR_TRY_AGAIN) {
	retries++;
	crm_info("Peer overloaded or membership in flux:"
	" Re-sending message (Attempt %d of 20)", retries);
	sleep(retries); /* Proportional back off */
	}

	errno = 0;
	switch(cluster_type) {
	case pcmk_cluster_classic_ais:
	rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len);
	header = (coroipc_response_header_t *)buf;
	if(rc == CS_OK) {
	CRM_CHECK(header->size == sizeof (coroipc_response_header_t),
	crm_err("Odd message: id=%d, size=%d, class=%d, error=%d",
	header->id, header->size, class, header->error));

	CRM_ASSERT(buf_len >= header->size);
	CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK,
	crm_err("Bad response id (%d) for request (%d)", header->id, ais_msg->header.id));
	CRM_CHECK(header->error == CS_OK, rc = header->error);
	}
	break;

	case pcmk_cluster_corosync:
	case pcmk_cluster_cman:
	transport = "cpg";
	CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail);
	rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1);
	if(rc == CS_ERR_TRY_AGAIN) {
	cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED;
	int rc2 = cpg_flow_control_state_get (pcmk_cpg_handle, &fc_state);
	if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) {
	crm_warn("Connection overloaded, cannot send messages");
	goto bail;

	} else if (rc2 != CS_OK) {
	crm_warn("Could not determin the connection state: %s (%d)", ais_error2text(rc2), rc2);
	goto bail;
	}
	}
	break;

	case pcmk_cluster_unknown:
	case pcmk_cluster_invalid:
	case pcmk_cluster_heartbeat:
	CRM_ASSERT(is_openais_cluster());
	break;
	}

	} while (rc == CS_ERR_TRY_AGAIN && retries < 20);

	bail:
	if(rc != CS_OK) {
	crm_perror(LOG_ERR,"Sending message %d via %s: FAILED (rc=%d): %s",
	ais_msg->id, transport, rc, ais_error2text(rc));

	} else {
	crm_debug_4("Message %d: sent", ais_msg->id);
	}

	crm_free(buf);
	crm_free(ais_msg);
	return (rc == CS_OK);
	}

	gboolean
	send_ais_message(xmlNode *msg,
	gboolean local, const char *node, enum crm_ais_msg_types dest)
	{
	gboolean rc = TRUE;
	char *data = NULL;

	if(is_classic_ais_cluster()) {
	if(ais_fd_async < 0 \|\| ais_source == NULL) {
	crm_err("Not connected to AIS: %d %p", ais_fd_async, ais_source);
	return FALSE;
	}
	}

	data = dump_xml_unformatted(msg);
	rc = send_ais_text(0, data, local, node, dest);
	crm_free(data);
	return rc;
	}

	void terminate_ais_connection(void)
	{
	crm_notice("Disconnecting from AIS");

	/* G_main_del_fd(ais_source); */
	/* G_main_del_fd(ais_source_sync); */

	-#ifdef SUPPORT_CMAN
	- if(is_cman_cluster()) {
	- cman_stop_notification(pcmk_cman_handle);
	- cman_finish(pcmk_cman_handle);
	+ if(is_classic_ais_cluster() == FALSE) {
	+ coroipcc_service_disconnect(ais_ipc_handle);
	+
	+ } else {
	+ cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group);
	}
	-#endif

	if(is_corosync_cluster()) {
	quorum_finalize(pcmk_quorum_handle);
	}

	- if(is_classic_ais_cluster() == FALSE) {
	- coroipcc_service_disconnect(ais_ipc_handle);
	-
	- } else {
	- cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group);
	+#ifdef SUPPORT_CMAN
	+ if(is_cman_cluster()) {
	+ cman_stop_notification(pcmk_cman_handle);
	+ cman_finish(pcmk_cman_handle);
	}
	+#endif
	}

	int ais_membership_timer = 0;
	gboolean ais_membership_force = FALSE;

	static gboolean ais_dispatch_message(
	AIS_Message msg, gboolean (dispatch)(AIS_Message,char,int))
	{
	char *data = NULL;
	char *uncompressed = NULL;

	xmlNode *xml = NULL;
	CRM_ASSERT(msg != NULL);

	crm_debug_3("Got new%s message (size=%d, %d, %d)",
	msg->is_compressed?" compressed":"",
	ais_data_len(msg), msg->size, msg->compressed_size);

	data = msg->data;
	if(msg->is_compressed && msg->size > 0) {
	int rc = BZ_OK;
	unsigned int new_size = msg->size + 1;

	if(check_message_sanity(msg, NULL) == FALSE) {
	goto badmsg;
	}

	crm_debug_5("Decompressing message data");
	crm_malloc0(uncompressed, new_size);
	rc = BZ2_bzBuffToBuffDecompress(
	uncompressed, &new_size, data, msg->compressed_size, 1, 0);

	if(rc != BZ_OK) {
	crm_err("Decompression failed: %d", rc);
	goto badmsg;
	}

	CRM_ASSERT(rc == BZ_OK);
	CRM_ASSERT(new_size == msg->size);

	data = uncompressed;

	} else if(check_message_sanity(msg, data) == FALSE) {
	goto badmsg;

	} else if(safe_str_eq("identify", data)) {
	int pid = getpid();
	char *pid_s = crm_itoa(pid);
	send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais);
	crm_free(pid_s);
	goto done;
	}

	if(msg->header.id != crm_class_members) {
	crm_update_peer(msg->sender.id, 0,0,0,0, msg->sender.uname, msg->sender.uname, NULL, NULL);
	}

	if(msg->header.id == crm_class_rmpeer) {
	uint32_t id = crm_int_helper(data, NULL);
	crm_info("Removing peer %s/%u", data, id);
	reap_crm_member(id);
	goto done;

	} else if(msg->header.id == crm_class_members
	\|\| msg->header.id == crm_class_quorum) {

	xml = string2xml(data);
	if(xml == NULL) {
	crm_err("Invalid membership update: %s", data);
	goto badmsg;
	}

	if(is_classic_ais_cluster() == FALSE) {
	xmlNode *node = NULL;
	for(node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) {
	crm_update_cman_node(node, crm_peer_seq);
	}

	} else {
	xmlNode *node = NULL;
	const char *value = NULL;
	gboolean quorate = FALSE;

	value = crm_element_value(xml, "quorate");
	CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); goto badmsg);
	if(crm_is_true(value)) {
	quorate = TRUE;
	}

	value = crm_element_value(xml, "id");
	CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); goto badmsg);
	crm_peer_seq = crm_int_helper(value, NULL);

	if(quorate != crm_have_quorum) {
	crm_notice("Membership %s: quorum %s", value, quorate?"acquired":"lost");
	crm_have_quorum = quorate;

	} else {
	crm_info("Membership %s: quorum %s", value, quorate?"retained":"still lost");
	}

	for(node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) {
	crm_update_ais_node(node, crm_peer_seq);
	}
	}
	}

	if(dispatch != NULL) {
	dispatch(msg, data, 0);
	}

	done:
	crm_free(uncompressed);
	free_xml(xml);
	return TRUE;

	badmsg:
	crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
	" min=%d, total=%d, size=%d, bz2_size=%d",
	msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
	ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
	msg->sender.pid, (int)sizeof(AIS_Message),
	msg->header.size, msg->size, msg->compressed_size);
	goto done;
	}

	gboolean ais_dispatch(int sender, gpointer user_data)
	{
	int rc = CS_OK;
	char *buffer = NULL;
	gboolean good = TRUE;
	gboolean (dispatch)(AIS_Message,char*,int) = user_data;

	- rc = coroipcc_dispatch_get (ais_ipc_handle, (void**)&buffer, 0);
	+ do {
	+ rc = coroipcc_dispatch_get (ais_ipc_handle, (void**)&buffer, 0);

	- if (rc == 0 \|\| buffer == NULL) {
	- /* Zero is a legal "no message afterall" value */
	- return TRUE;
	+ if (rc == 0 \|\| buffer == NULL) {
	+ /* Zero is a legal "no message afterall" value */
	+ return TRUE;
	+
	+ } else if (rc != CS_OK) {
	+ crm_perror(LOG_ERR,"Receiving message body failed: (%d) %s", rc, ais_error2text(rc));
	+ goto bail;
	+ }

	- } else if (rc != CS_OK) {
	- crm_perror(LOG_ERR,"Receiving message body failed: (%d) %s", rc, ais_error2text(rc));
	- goto bail;
	- }
	+ good = ais_dispatch_message((AIS_Message*)buffer, dispatch);
	+ coroipcc_dispatch_put (ais_ipc_handle);

	- good = ais_dispatch_message((AIS_Message*)buffer, dispatch);
	- coroipcc_dispatch_put (ais_ipc_handle);
	+ } while(good);
	+
	return good;

	bail:
	crm_err("AIS connection failed");
	return FALSE;
	}

	static void
	ais_destroy(gpointer user_data)
	{
	crm_err("AIS connection terminated");
	ais_fd_sync = -1;
	exit(1);
	}

	static gboolean pcmk_proc_dispatch(IPC_Channel *ch, gpointer user_data)
	{
	xmlNode *msg = NULL;
	gboolean stay_connected = TRUE;

	while(IPC_ISRCONN(ch)) {
	if(ch->ops->is_message_pending(ch) == 0) {
	break;
	}

	msg = xmlfromIPC(ch, MAX_IPC_DELAY);

	if(msg) {
	xmlNode *node = NULL;
	for(node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) {
	int id = 0;
	int children = 0;
	const char *uname = crm_element_value(node, "uname");
	crm_element_value_int(node, "processes", &children);

	crm_update_peer(id, 0, 0, 0, children, NULL, uname, NULL, NULL);
	}
	free_xml(msg);
	}

	if(ch->ch_status != IPC_CONNECT) {
	break;
	}
	}

	if (ch->ch_status != IPC_CONNECT) {
	stay_connected = FALSE;
	}
	return stay_connected;
	}

	#ifdef SUPPORT_CMAN

	static gboolean pcmk_cman_dispatch(int sender, gpointer user_data)
	{
	- int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ONE);
	+ int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ALL);
	if(rc < 0) {
	crm_err("Connection to cman failed: %d", rc);
	return FALSE;
	}
	return TRUE;
	}

	#define MAX_NODES 256

	static void cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg)
	{
	int rc = 0, lpc = 0, node_count = 0;

	cman_cluster_t cluster;
	static cman_node_t cman_nodes[MAX_NODES];
	gboolean (*dispatch)(unsigned long long, gboolean) = privdata;

	switch (reason) {
	case CMAN_REASON_STATECHANGE:

	memset(&cluster, 0, sizeof(cluster));
	rc = cman_get_cluster(pcmk_cman_handle, &cluster);
	if (rc < 0) {
	crm_err("Couldn't query cman cluster details: %d %d", rc, errno);
	return;
	}

	crm_peer_seq = cluster.ci_generation;
	if(arg != crm_have_quorum) {
	crm_notice("Membership %llu: quorum %s", crm_peer_seq, arg?"acquired":"lost");
	crm_have_quorum = arg;

	} else {
	crm_info("Membership %llu: quorum %s", crm_peer_seq, arg?"retained":"still lost");
	}

	rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes);
	if (rc < 0) {
	crm_err("Couldn't query cman node list: %d %d", rc, errno);
	return;
	}

	for (lpc = 0; lpc < node_count; lpc++) {
	if (cman_nodes[lpc].cn_nodeid == 0) {
	/* Never allow node ID 0 to be considered a member #315711 */
	cman_nodes[lpc].cn_member = 0;
	}
	crm_update_peer(cman_nodes[lpc].cn_nodeid, cman_nodes[lpc].cn_incarnation,
	cman_nodes[lpc].cn_member?crm_peer_seq:0, 0, 0,
	cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_name, NULL,
	cman_nodes[lpc].cn_member?CRM_NODE_MEMBER:CRM_NODE_LOST);
	}

	if(dispatch) {
	dispatch(crm_peer_seq, crm_have_quorum);
	}
	break;

	case CMAN_REASON_TRY_SHUTDOWN:
	/* Always reply with a negative - pacemaker needs to be stopped first */
	crm_info("CMAN wants to shut down: %s", arg?"forced":"optional");
	cman_replyto_shutdown(pcmk_cman_handle, 0);
	break;

	case CMAN_REASON_CONFIG_UPDATE:
	/* Ignore */
	break;
	}
	}
	#endif

	gboolean init_cman_connection(
	gboolean (dispatch)(unsigned long long, gboolean), void (destroy)(gpointer))
	{
	#ifdef SUPPORT_CMAN
	int rc = -1, fd = -1;
	cman_cluster_t cluster;

	crm_info("Configuring Pacemaker to obtain quorum from cman");

	memset(&cluster, 0, sizeof(cluster));

	pcmk_cman_handle = cman_init(dispatch);
	if(pcmk_cman_handle == NULL \|\| cman_is_active(pcmk_cman_handle) == FALSE) {
	crm_err("Couldn't connect to cman");
	goto cman_bail;
	}

	rc = cman_get_cluster(pcmk_cman_handle, &cluster);
	if (rc < 0) {
	crm_err("Couldn't query cman cluster details: %d %d", rc, errno);
	goto cman_bail;
	}
	ais_cluster_name = crm_strdup(cluster.ci_name);

	rc = cman_start_notification(pcmk_cman_handle, cman_event_callback);
	if (rc < 0) {
	crm_err("Couldn't register for cman notifications: %d %d", rc, errno);
	goto cman_bail;
	}

	/* Get the current membership state */
	cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE,
	cman_is_quorate(pcmk_cman_handle));

	fd = cman_get_fd(pcmk_cman_handle);
	crm_debug("Adding fd=%d to mainloop", fd);
	cman_source = G_main_add_fd(
	G_PRIORITY_HIGH, fd, FALSE, pcmk_cman_dispatch, dispatch, destroy);

	cman_bail:
	if (rc < 0) {
	cman_finish(pcmk_cman_handle);
	return FALSE;
	}
	#else
	crm_err("cman qorum is not supported in this build");
	exit(100);
	#endif
	return TRUE;
	}

	#ifdef SUPPORT_CS_QUORUM
	gboolean (pcmk_cpg_dispatch_fn)(AIS_Message,char*,int) = NULL;

	static gboolean pcmk_cpg_dispatch(int sender, gpointer user_data)
	{
	int rc = 0;
	pcmk_cpg_dispatch_fn = user_data;
	rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL);
	if(rc != CS_OK) {
	crm_err("Connection to the CPG API failed: %d", rc);
	return FALSE;
	}
	return TRUE;
	}

	static void pcmk_cpg_deliver (
	cpg_handle_t handle,
	const struct cpg_name *groupName,
	uint32_t nodeid,
	uint32_t pid,
	void *msg,
	size_t msg_len)
	{
	AIS_Message ais_msg = (AIS_Message)msg;
	if(ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) {
	crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u",
	nodeid, pid, ais_msg->sender.id);
	return;

	} else if(ais_msg->host.size != 0
	&& safe_str_neq(ais_msg->host.uname, pcmk_uname)) {
	/* Not for us */
	return;
	}

	ais_msg->sender.id = nodeid;
	if(ais_msg->sender.size == 0) {
	crm_node_t *peer = crm_get_peer(nodeid, NULL);
	if(peer == NULL) {
	crm_err("Peer with nodeid=%u is unknown", nodeid);

	} else if(peer->uname == NULL) {
	crm_err("No uname for peer with nodeid=%u", nodeid);

	} else {
	crm_notice("Fixing uname for peer with nodeid=%u", nodeid);
	ais_msg->sender.size = strlen(peer->uname);
	memset(ais_msg->sender.uname, 0, MAX_NAME);
	memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size);
	}
	}

	ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn);
	}

	static void pcmk_cpg_membership(
	cpg_handle_t handle,
	const struct cpg_name *groupName,
	const struct cpg_address *member_list, size_t member_list_entries,
	const struct cpg_address *left_list, size_t left_list_entries,
	const struct cpg_address *joined_list, size_t joined_list_entries)
	{
	/* Don't care about CPG membership */
	}

	static gboolean pcmk_quorum_dispatch(int sender, gpointer user_data)
	{
	int rc = 0;
	rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
	if(rc < 0) {
	crm_err("Connection to the Quorum API failed: %d", rc);
	return FALSE;
	}
	return TRUE;
	}

	static void pcmk_quorum_notification(
	quorum_handle_t handle,
	uint32_t quorate,
	uint64_t ring_id,
	uint32_t view_list_entries,
	uint32_t *view_list)
	{
	int i;

	if(quorate != crm_have_quorum) {
	crm_notice("Membership "U64T": quorum %s (%lu)", ring_id,
	quorate?"acquired":"lost", (long unsigned int)view_list_entries);
	crm_have_quorum = quorate;

	} else {
	crm_info("Membership "U64T": quorum %s (%lu)", ring_id,
	quorate?"retained":"still lost", (long unsigned int)view_list_entries);
	}
	for (i=0; i<view_list_entries; i++) {
	crm_debug(" %d ", view_list[i]);
	}
	}

	cpg_callbacks_t cpg_callbacks = {
	.cpg_deliver_fn = pcmk_cpg_deliver,
	.cpg_confchg_fn = pcmk_cpg_membership,
	};

	quorum_callbacks_t quorum_callbacks = {
	.quorum_notify_fn = pcmk_quorum_notification,
	};

	#endif

	static gboolean init_cpg_connection(
	gboolean (dispatch)(AIS_Message,char,int), void (destroy)(gpointer), uint32_t *nodeid)
	{
	#ifdef SUPPORT_CS_QUORUM
	int rc = -1;
	int fd = 0;
	int retries = 0;

	strcpy(pcmk_cpg_group.value, crm_system_name);
	pcmk_cpg_group.length = strlen(crm_system_name)+1;

	cs_repeat(retries, 30, rc = cpg_initialize (&pcmk_cpg_handle, &cpg_callbacks));
	if (rc != CS_OK) {
	crm_err("Could not connect to the Cluster Process Group API: %d\n", rc);
	goto bail;
	}

	retries = 0;
	cs_repeat(
	retries, 30, rc = cpg_local_get (pcmk_cpg_handle, (unsigned int*)nodeid));
	if (rc != CS_OK) {
	crm_err("Could not get local node id from the CPG API");
	goto bail;
	}

	retries = 0;
	cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group));
	if (rc != CS_OK) {
	crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc);
	goto bail;
	}

	rc = cpg_fd_get(pcmk_cpg_handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the CPG API connection: %d\n", rc);
	goto bail;
	}

	crm_debug("Adding fd=%d to mainloop", fd);
	cpg_source = G_main_add_fd(
	G_PRIORITY_HIGH, fd, FALSE, pcmk_cpg_dispatch, dispatch, destroy);

	bail:
	if (rc != CS_OK) {
	cpg_finalize(pcmk_cpg_handle);
	return FALSE;
	}
	#else
	crm_err("corosync qorum is not supported in this build");
	exit(100);
	#endif
	return TRUE;
	}

	gboolean init_quorum_connection(
	gboolean (dispatch)(unsigned long long, gboolean), void (destroy)(gpointer))
	{
	#ifdef SUPPORT_CS_QUORUM
	int rc = -1;
	int fd = 0;
	int quorate = 0;

	crm_info("Configuring Pacemaker to obtain quorum from Corosync");

	rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks);
	if ( rc != CS_OK) {
	crm_err("Could not connect to the Quorum API: %d\n", rc);
	goto bail;
	}

	rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
	if ( rc != CS_OK) {
	crm_err("Could not obtain the current Quorum API state: %d\n", rc);
	goto bail;
	}
	crm_notice("Quorum %s", quorate?"acquired":"lost");
	crm_have_quorum = quorate;

	rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES\|CS_TRACK_CURRENT);
	if ( rc != CS_OK) {
	crm_err("Could not setup Quorum API notifications: %d\n", rc);
	goto bail;
	}

	rc = quorum_fd_get(pcmk_quorum_handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the Quorum API connection: %d\n", rc);
	goto bail;
	}

	quorumd_source = G_main_add_fd(
	G_PRIORITY_HIGH, fd, FALSE, pcmk_quorum_dispatch, dispatch, destroy);

	bail:
	if (rc != CS_OK) {
	quorum_finalize(pcmk_quorum_handle);
	return FALSE;
	}

	#else
	crm_err("corosync quorum is not supported in this build");
	exit(100);
	#endif
	return TRUE;
	}

	static gboolean init_ais_connection_classic(
	gboolean (dispatch)(AIS_Message,char*,int),
	void (destroy)(gpointer), char our_uuid, char our_uname, int nodeid)
	{
	int rc;
	int pid = 0;
	char *pid_s = NULL;
	struct utsname name;

	crm_info("Creating connection to our Corosync plugin");
	rc = coroipcc_service_connect(
	COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID,
	AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE,
	&ais_ipc_handle);
	if(ais_ipc_handle) {
	coroipcc_fd_get(ais_ipc_handle, &ais_fd_async);
	}
	if(ais_fd_async <= 0 && rc == CS_OK) {
	crm_err("No context created, but connection reported 'ok'");
	rc = CS_ERR_LIBRARY;
	}
	if (rc != CS_OK) {
	crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, ais_error2text(rc), rc);
	}

	if(rc != CS_OK) {
	return FALSE;
	}

	if(destroy == NULL) {
	destroy = ais_destroy;
	}

	if(dispatch) {
	crm_debug("Adding fd=%d to mainloop", ais_fd_async);
	ais_source = G_main_add_fd(
	G_PRIORITY_HIGH, ais_fd_async, FALSE, ais_dispatch, dispatch, destroy);
	}

	crm_info("AIS connection established");

	pid = getpid();
	pid_s = crm_itoa(pid);
	send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais);
	crm_free(pid_s);

	if(uname(&name) < 0) {
	crm_perror(LOG_ERR,"Could not determin the current host");
	exit(100);
	}

	get_ais_nodeid(&pcmk_nodeid, &pcmk_uname);
	if(safe_str_neq(name.nodename, pcmk_uname)) {
	crm_crit("Node name mismatch! OpenAIS supplied %s, our lookup returned %s",
	pcmk_uname, name.nodename);
	crm_notice("Node name mismatches usually occur when assigned automatically by DHCP servers");
	crm_notice("If this node was part of the cluster with a different name,"
	" you will need to remove the old entry with crm_node --remove");
	}
	return TRUE;
	}

	gboolean init_ais_connection(
	gboolean (dispatch)(AIS_Message,char,int), void (destroy)(gpointer),
	char our_uuid, char our_uname, int *nodeid)
	{
	int retries = 0;
	enum cluster_type_e type = get_cluster_type();

	while(retries++ < 30) {
	int rc = init_ais_connection_once(type, dispatch, destroy, our_uuid, our_uname, nodeid);
	switch(rc) {
	case CS_OK:
	if(getenv("HA_mcp")) {
	IPC_Channel *ch = init_client_ipc_comms_nodispatch("pcmk");
	G_main_add_IPC_Channel(G_PRIORITY_HIGH, ch, FALSE, pcmk_proc_dispatch, NULL, destroy);
	}
	return TRUE;
	break;
	case CS_ERR_TRY_AGAIN:
	break;
	default:
	return FALSE;
	}
	}

	crm_err("Retry count exceeded: %d", retries);
	return FALSE;
	}

	static char *get_local_node_name(void)
	{
	char *name = NULL;
	struct utsname res;

	if(is_cman_cluster()) {
	#ifdef SUPPORT_CMAN
	cman_node_t us;
	cman_handle_t cman;

	cman = cman_init(NULL);
	if(cman != NULL && cman_is_active(cman)) {
	us.cn_name[0] = 0;
	cman_get_node(cman, CMAN_NODEID_US, &us);
	name = crm_strdup(us.cn_name);
	crm_info("Using CMAN node name: %s", name);

	} else {
	crm_err("Couldn't determin node name from CMAN");
	}

	cman_finish(cman);
	#endif

	} else if(uname(&res) < 0) {
	crm_perror(LOG_ERR,"Could not determin the current host");
	exit(100);

	} else {
	name = crm_strdup(res.nodename);
	}
	return name;
	}

	extern int set_cluster_type(enum cluster_type_e type);

	gboolean init_ais_connection_once(
	enum cluster_type_e type,
	gboolean (dispatch)(AIS_Message,char*,int),
	void (destroy)(gpointer), char our_uuid, char our_uname, int nodeid)
	{
	enum cluster_type_e use_type = 0;
	crm_peer_init();
	if(type) {
	set_cluster_type(type);
	}

	use_type = get_cluster_type();
	/* Here we just initialize comms */
	switch(use_type) {
	case pcmk_cluster_classic_ais:
	if(init_ais_connection_classic(
	dispatch, destroy, our_uuid, &pcmk_uname, nodeid) == FALSE) {
	goto bail;
	}
	break;
	case pcmk_cluster_cman:
	case pcmk_cluster_corosync:
	if(init_cpg_connection(dispatch, destroy, &pcmk_nodeid) == FALSE) {
	goto bail;
	}
	pcmk_uname = get_local_node_name();
	break;
	default:
	crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(use_type), use_type);
	goto bail;
	break;
	}

	crm_info("Connection to '%s': established", name_for_cluster_type(type));

	CRM_ASSERT(pcmk_uname != NULL);
	pcmk_uname_len = strlen(pcmk_uname);

	if(pcmk_nodeid != 0) {
	/* Ensure the local node always exists */
	crm_update_peer(pcmk_nodeid, 0, 0, 0, 0, pcmk_uname, pcmk_uname, NULL, NULL);
	}

	if(our_uuid != NULL) {
	*our_uuid = crm_strdup(pcmk_uname);
	}

	if(our_uname != NULL) {
	*our_uname = crm_strdup(pcmk_uname);
	}

	if(nodeid != NULL) {
	*nodeid = pcmk_nodeid;
	}

	return TRUE;

	bail:
	if(type) {
	set_cluster_type(pcmk_cluster_unknown);
	}
	return FALSE;
	}

	gboolean check_message_sanity(const AIS_Message msg, const char data)
	{
	gboolean sane = TRUE;
	gboolean repaired = FALSE;
	int dest = msg->host.type;
	int tmp_size = msg->header.size - sizeof(AIS_Message);

	if(sane && msg->header.size == 0) {
	crm_warn("Message with no size");
	sane = FALSE;
	}

	if(sane && msg->header.error != CS_OK) {
	crm_warn("Message header contains an error: %d", msg->header.error);
	sane = FALSE;
	}

	if(sane && ais_data_len(msg) != tmp_size) {
	crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size);
	sane = TRUE;
	}

	if(sane && ais_data_len(msg) == 0) {
	crm_warn("Message with no payload");
	sane = FALSE;
	}

	if(sane && data && msg->is_compressed == FALSE) {
	int str_size = strlen(data) + 1;
	if(ais_data_len(msg) != str_size) {
	int lpc = 0;
	crm_warn("Message payload is corrupted: expected %d bytes, got %d",
	ais_data_len(msg), str_size);
	sane = FALSE;
	for(lpc = (str_size - 10); lpc < msg->size; lpc++) {
	if(lpc < 0) {
	lpc = 0;
	}
	crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]);
	}
	}
	}

	if(sane == FALSE) {
	crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
	msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
	ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
	msg->sender.pid, msg->is_compressed, ais_data_len(msg),
	msg->header.size);

	} else if(repaired) {
	crm_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
	msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
	ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
	msg->sender.pid, msg->is_compressed, ais_data_len(msg),
	msg->header.size);
	} else {
	crm_debug_3("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
	msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
	ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
	msg->sender.pid, msg->is_compressed, ais_data_len(msg),
	msg->header.size);
	}

	return sane;
	}
	#endif

	diff --git a/lib/common/utils.c b/lib/common/utils.c
	index 2f6332fdd7..cc8d3da248 100644
	--- a/lib/common/utils.c
	+++ b/lib/common/utils.c
	@@ -1,2654 +1,2657 @@
	/*
	* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this library; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#include <crm_internal.h>

	#ifndef _GNU_SOURCE
	# define _GNU_SOURCE
	#endif

	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/wait.h>
	#include <sys/stat.h>
	#include <stdio.h>
	#include <unistd.h>
	#include <string.h>

	#include <stdlib.h>
	#include <limits.h>
	#include <ctype.h>
	#include <pwd.h>
	#include <grp.h>
	#include <time.h>
	#include <libgen.h>

	#include <crm/crm.h>
	#include <crm/msg_xml.h>
	#include <crm/common/xml.h>
	#include <crm/common/util.h>
	#include <crm/common/ipc.h>
	#include <crm/common/iso8601.h>
	#include <libxml2/libxml/relaxng.h>


	#if HAVE_HB_CONFIG_H
	#include <heartbeat/hb_config.h> /* for HB_COREDIR */
	#endif

	#if HAVE_GLUE_CONFIG_H
	#include <glue_config.h> /* for HB_COREDIR */
	#endif

	#ifndef MAXLINE
	# define MAXLINE 512
	#endif

	#ifdef HAVE_GETOPT_H
	# include <getopt.h>
	#endif

	CRM_TRACE_INIT_DATA(common);

	static uint ref_counter = 0;
	unsigned int crm_log_level = LOG_INFO;
	gboolean crm_config_error = FALSE;
	gboolean crm_config_warning = FALSE;
	const char *crm_system_name = "unknown";

	int node_score_red = 0;
	int node_score_green = 0;
	int node_score_yellow = 0;
	int node_score_infinity = INFINITY;

	void crm_set_env_options(void);

	gboolean
	check_time(const char *value)
	{
	if(crm_get_msec(value) < 5000) {
	return FALSE;
	}
	return TRUE;
	}

	gboolean
	check_timer(const char *value)
	{
	if(crm_get_msec(value) < 0) {
	return FALSE;
	}
	return TRUE;
	}

	gboolean
	check_boolean(const char *value)
	{
	int tmp = FALSE;
	if(crm_str_to_boolean(value, &tmp) != 1) {
	return FALSE;
	}
	return TRUE;
	}

	gboolean
	check_number(const char *value)
	{
	errno = 0;
	if(value == NULL) {
	return FALSE;

	} else if(safe_str_eq(value, MINUS_INFINITY_S)) {

	} else if(safe_str_eq(value, INFINITY_S)) {

	} else {
	crm_int_helper(value, NULL);
	}

	if(errno != 0) {
	return FALSE;
	}
	return TRUE;
	}

	int
	char2score(const char *score)
	{
	int score_f = 0;

	if(score == NULL) {

	} else if(safe_str_eq(score, MINUS_INFINITY_S)) {
	score_f = -node_score_infinity;

	} else if(safe_str_eq(score, INFINITY_S)) {
	score_f = node_score_infinity;

	} else if(safe_str_eq(score, "+"INFINITY_S)) {
	score_f = node_score_infinity;

	} else if(safe_str_eq(score, "red")) {
	score_f = node_score_red;

	} else if(safe_str_eq(score, "yellow")) {
	score_f = node_score_yellow;

	} else if(safe_str_eq(score, "green")) {
	score_f = node_score_green;

	} else {
	score_f = crm_parse_int(score, NULL);
	if(score_f > 0 && score_f > node_score_infinity) {
	score_f = node_score_infinity;

	} else if(score_f < 0 && score_f < -node_score_infinity) {
	score_f = -node_score_infinity;
	}
	}

	return score_f;
	}

	char *
	score2char(int score)
	{
	if(score >= node_score_infinity) {
	return crm_strdup(INFINITY_S);

	} else if(score <= -node_score_infinity) {
	return crm_strdup("-"INFINITY_S);
	}
	return crm_itoa(score);
	}


	const char *
	cluster_option(GHashTable* options, gboolean(validate)(const char),
	const char name, const char old_name, const char *def_value)
	{
	const char *value = NULL;
	CRM_ASSERT(name != NULL);

	if(options != NULL) {
	value = g_hash_table_lookup(options, name);
	}

	if(value == NULL && old_name && options != NULL) {
	value = g_hash_table_lookup(options, old_name);
	if(value != NULL) {
	crm_config_warn("Using deprecated name '%s' for"
	" cluster option '%s'", old_name, name);
	g_hash_table_insert(
	options, crm_strdup(name), crm_strdup(value));
	value = g_hash_table_lookup(options, old_name);
	}
	}

	if(value == NULL) {
	crm_debug_2("Using default value '%s' for cluster option '%s'",
	def_value, name);

	if(options == NULL) {
	return def_value;
	}

	g_hash_table_insert(
	options, crm_strdup(name), crm_strdup(def_value));
	value = g_hash_table_lookup(options, name);
	}

	if(validate && validate(value) == FALSE) {
	crm_config_err("Value '%s' for cluster option '%s' is invalid."
	" Defaulting to %s", value, name, def_value);
	g_hash_table_replace(options, crm_strdup(name),
	crm_strdup(def_value));
	value = g_hash_table_lookup(options, name);
	}

	return value;
	}


	const char *
	get_cluster_pref(GHashTable options, pe_cluster_option option_list, int len, const char *name)
	{
	int lpc = 0;
	const char *value = NULL;
	gboolean found = FALSE;
	for(lpc = 0; lpc < len; lpc++) {
	if(safe_str_eq(name, option_list[lpc].name)) {
	found = TRUE;
	value = cluster_option(options,
	option_list[lpc].is_valid,
	option_list[lpc].name,
	option_list[lpc].alt_name,
	option_list[lpc].default_value);
	}
	}
	CRM_CHECK(found, crm_err("No option named: %s", name));
	CRM_ASSERT(value != NULL);
	return value;
	}

	void
	config_metadata(const char name, const char version, const char desc_short, const char desc_long,
	pe_cluster_option *option_list, int len)
	{
	int lpc = 0;

	fprintf(stdout, "<?xml version=\"1.0\"?>"
	"<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n"
	"<resource-agent name=\"%s\">\n"
	" <version>%s</version>\n"
	" <longdesc lang=\"en\">%s</longdesc>\n"
	" <shortdesc lang=\"en\">%s</shortdesc>\n"
	" <parameters>\n", name, version, desc_long, desc_short);

	for(lpc = 0; lpc < len; lpc++) {
	if(option_list[lpc].description_long == NULL
	&& option_list[lpc].description_short == NULL) {
	continue;
	}
	fprintf(stdout, " <parameter name=\"%s\" unique=\"0\">\n"
	" <shortdesc lang=\"en\">%s</shortdesc>\n"
	" <content type=\"%s\" default=\"%s\"/>\n"
	" <longdesc lang=\"en\">%s%s%s</longdesc>\n"
	" </parameter>\n",
	option_list[lpc].name,
	option_list[lpc].description_short,
	option_list[lpc].type,
	option_list[lpc].default_value,
	option_list[lpc].description_long?option_list[lpc].description_long:option_list[lpc].description_short,
	option_list[lpc].values?" Allowed values: ":"",
	option_list[lpc].values?option_list[lpc].values:"");
	}
	fprintf(stdout, " </parameters>\n</resource-agent>\n");
	}

	void
	verify_all_options(GHashTable options, pe_cluster_option option_list, int len)
	{
	int lpc = 0;
	for(lpc = 0; lpc < len; lpc++) {
	cluster_option(options,
	option_list[lpc].is_valid,
	option_list[lpc].name,
	option_list[lpc].alt_name,
	option_list[lpc].default_value);
	}
	}

	char *
	generateReference(const char custom1, const char custom2)
	{

	const char *local_cust1 = custom1;
	const char *local_cust2 = custom2;
	int reference_len = 4;
	char *since_epoch = NULL;

	reference_len += 20; /* too big */
	reference_len += 40; /* too big */

	if(local_cust1 == NULL) { local_cust1 = "_empty_"; }
	reference_len += strlen(local_cust1);

	if(local_cust2 == NULL) { local_cust2 = "_empty_"; }
	reference_len += strlen(local_cust2);

	crm_malloc0(since_epoch, reference_len);

	if(since_epoch != NULL) {
	sprintf(since_epoch, "%s-%s-%ld-%u",
	local_cust1, local_cust2,
	(unsigned long)time(NULL), ref_counter++);
	}

	return since_epoch;
	}

	gboolean
	decodeNVpair(const char srcstring, char separator, char name, char *value)
	{
	int lpc = 0;
	int len = 0;
	const char *temp = NULL;

	CRM_ASSERT(name != NULL && value != NULL);
	*name = NULL;
	*value = NULL;

	crm_debug_4("Attempting to decode: [%s]", srcstring);
	if (srcstring != NULL) {
	len = strlen(srcstring);
	while(lpc <= len) {
	if (srcstring[lpc] == separator) {
	crm_malloc0(*name, lpc+1);
	if(*name == NULL) {
	break; /* and return FALSE */
	}
	strncpy(*name, srcstring, lpc);
	(*name)[lpc] = '\0';

	/* this sucks but as the strtok manpage says..
	* it is a bug
	*/
	len = len-lpc; len--;
	if(len <= 0) {
	*value = NULL;
	} else {

	crm_malloc0(*value, len+1);
	if(*value == NULL) {
	crm_free(*name);
	break; /* and return FALSE */
	}
	temp = srcstring+lpc+1;
	strncpy(*value, temp, len);
	(*value)[len] = '\0';
	}
	return TRUE;
	}
	lpc++;
	}
	}

	if(*name != NULL) {
	crm_free(*name);
	}
	*name = NULL;
	*value = NULL;

	return FALSE;
	}

	char *
	crm_concat(const char prefix, const char suffix, char join)
	{
	int len = 0;
	char *new_str = NULL;
	CRM_ASSERT(prefix != NULL);
	CRM_ASSERT(suffix != NULL);
	len = strlen(prefix) + strlen(suffix) + 2;

	crm_malloc0(new_str, (len));
	sprintf(new_str, "%s%c%s", prefix, join, suffix);
	new_str[len-1] = 0;
	return new_str;
	}


	char *
	generate_hash_key(const char crm_msg_reference, const char sys)
	{
	char *hash_key = crm_concat(sys?sys:"none", crm_msg_reference, '_');
	crm_debug_3("created hash key: (%s)", hash_key);
	return hash_key;
	}

	char *
	generate_hash_value(const char src_node, const char src_subsys)
	{
	char *hash_value = NULL;

	if (src_node == NULL \|\| src_subsys == NULL) {
	return NULL;
	}

	if (strcasecmp(CRM_SYSTEM_DC, src_subsys) == 0) {
	hash_value = crm_strdup(src_subsys);
	CRM_ASSERT(hash_value);
	return hash_value;
	}

	hash_value = crm_concat(src_node, src_subsys, '_');
	crm_info("created hash value: (%s)", hash_value);
	return hash_value;
	}

	char *
	crm_itoa(int an_int)
	{
	int len = 32;
	char *buffer = NULL;

	crm_malloc0(buffer, (len+1));
	if(buffer != NULL) {
	snprintf(buffer, len, "%d", an_int);
	}

	return buffer;
	}

	extern int LogToLoggingDaemon(int priority, const char * buf, int bstrlen, gboolean use_pri_str);

	#ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER
	GLogFunc glib_log_default;

	static void
	crm_glib_handler(const gchar log_domain, GLogLevelFlags flags, const gchar message, gpointer user_data)
	{
	int log_level = LOG_WARNING;
	GLogLevelFlags msg_level = (flags & G_LOG_LEVEL_MASK);

	switch(msg_level) {
	case G_LOG_LEVEL_CRITICAL:
	/* log and record how we got here */
	crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, message, TRUE, TRUE);
	return;

	case G_LOG_LEVEL_ERROR: log_level = LOG_ERR; break;
	case G_LOG_LEVEL_MESSAGE: log_level = LOG_NOTICE; break;
	case G_LOG_LEVEL_INFO: log_level = LOG_INFO; break;
	case G_LOG_LEVEL_DEBUG: log_level = LOG_DEBUG; break;

	case G_LOG_LEVEL_WARNING:
	case G_LOG_FLAG_RECURSION:
	case G_LOG_FLAG_FATAL:
	case G_LOG_LEVEL_MASK:
	log_level = LOG_WARNING;
	break;
	}

	do_crm_log(log_level, "%s: %s", log_domain, message);
	}
	#endif

	void crm_log_deinit(void) {
	#ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER
	g_log_set_default_handler(glib_log_default, NULL);
	#endif
	}

	gboolean crm_log_init(
	const char *entity, int level, gboolean coredir, gboolean to_stderr,
	int argc, char **argv)
	{
	return crm_log_init_worker(entity, level, coredir, to_stderr, argc, argv, FALSE);
	}

	gboolean crm_log_init_quiet(
	const char *entity, int level, gboolean coredir, gboolean to_stderr,
	int argc, char **argv)
	{
	return crm_log_init_worker(entity, level, coredir, to_stderr, argc, argv, TRUE);
	}

	#if SUPPORT_TRACING
	static int
	update_trace_data(struct _pcmk_ddebug_query query, struct _pcmk_ddebug start, struct _pcmk_ddebug *stop)
	{
	int lpc = 0;
	unsigned nfound = 0;
	struct _pcmk_ddebug *dp;
	const char *match = "unknown";

	CRM_ASSERT(stop != NULL);
	CRM_ASSERT(start != NULL);

	for (dp = start; dp != stop; dp++) {
	gboolean bump = FALSE;
	lpc++;
	/* fprintf(stderr, "checking: %-12s %20s:%u fmt:%s\n", */
	/* dp->function, dp->filename, dp->lineno, dp->format); */

	if (query->functions && strstr(query->functions, dp->function) != NULL) {
	match = "function";
	bump = TRUE;
	}

	if(query->files) {
	char token[500];
	const char *offset = NULL;
	const char *next = query->files;

	do {
	offset = next;
	next = strchrnul(offset, ',');
	snprintf(token, 499, "%.*s", (int)(next-offset), offset);

	if (query->files && strstr(dp->filename, token) != NULL) {
	match = "file";
	bump = TRUE;

	} else if(next[0] != 0) {
	next++;
	}

	} while(bump == FALSE && next != NULL && next[0] != 0);
	}

	if (query->formats && strstr(query->formats, dp->format) != NULL) {
	match = "format";
	bump = TRUE;
	}

	if(bump) {
	nfound++;
	dp->bump = LOG_NOTICE;
	do_crm_log_always(LOG_INFO, "Detected '%s' match: %-12s %20s:%u fmt:%s",
	match, dp->function, dp->filename, dp->lineno, dp->format);
	}
	}

	query->total += lpc;
	query->matches += nfound;
	return nfound;
	}

	#define _GNU_SOURCE
	#include <link.h>
	#include <stdlib.h>
	#include <stdio.h>
	static int
	ddebug_callback(struct dl_phdr_info info, size_t size, void data)
	{
	if(strlen(info->dlpi_name) > 0) {
	struct _pcmk_ddebug_query *query = data;

	void *handle;
	void *start;
	void *stop;
	char *error;

	handle = dlopen (info->dlpi_name, RTLD_LAZY);
	error = dlerror();
	if (!handle \|\| error) {
	crm_err("%s", error);
	if(handle) {
	dlclose(handle);
	}
	return 0;
	}

	start = dlsym(handle, "__start___verbose");
	error = dlerror();
	if (error) {
	goto done;
	}

	stop = dlsym(handle, "__stop___verbose");
	error = dlerror();
	if (error) {
	goto done;

	} else {
	unsigned long int len = (unsigned long int)stop - (unsigned long int)start;
	crm_info("Checking for query matches in %lu trace symbols from: %s (offset: %p)",
	len/sizeof(struct _pcmk_ddebug), info->dlpi_name, start);

	update_trace_data(query, start, stop);
	}
	done:
	dlclose(handle);
	}

	return 0;
	}
	#endif


	void update_all_trace_data(void)
	{
	#if SUPPORT_TRACING
	gboolean search = FALSE;
	const char *env_value = NULL;
	struct _pcmk_ddebug_query query;

	memset(&query, 0, sizeof(struct _pcmk_ddebug_query));

	env_value = getenv("PCMK_trace_files");
	if(env_value) {
	search = TRUE;
	query.files = env_value;
	}

	env_value = getenv("PCMK_trace_formats");
	if(env_value) {
	search = TRUE;
	query.formats = env_value;
	}

	env_value = getenv("PCMK_trace_functions");
	if(env_value) {
	search = TRUE;
	query.functions = env_value;
	}

	if(search) {
	update_trace_data(&query, __start___verbose, __stop___verbose);
	dl_iterate_phdr(ddebug_callback, &query);
	if(query.matches == 0) {
	do_crm_log_always(LOG_DEBUG,
	"no matches for query: {fn='%s', file='%s', fmt='%s'} in %llu entries",
	crm_str(query.functions), crm_str(query.files), crm_str(query.formats), query.total);
	} else {
	do_crm_log_always(LOG_INFO,
	"%llu matches for query: {fn='%s', file='%s', fmt='%s'} in %llu entries",
	query.matches, crm_str(query.functions), crm_str(query.files), crm_str(query.formats),
	query.total);
	}
	}
	/* return query.matches; */
	#endif
	}

	gboolean
	crm_log_init_worker(
	const char *entity, int level, gboolean coredir, gboolean to_stderr,
	int argc, char **argv, gboolean quiet)
	{
	/* Redirect messages from glib functions to our handler */
	/* cl_malloc_forced_for_glib(); */
	#ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER
	glib_log_default = g_log_set_default_handler(crm_glib_handler, NULL);
	#endif

	/* and for good measure... - this enum is a bit field (!) */
	g_log_set_always_fatal((GLogLevelFlags)0); /value out of range/

	if(entity) {
	crm_system_name = entity;

	} else if(argc > 0 && argv != NULL) {
	crm_system_name = basename(argv[0]);
	if(strstr(crm_system_name, "lt-") == crm_system_name) {
	crm_system_name += 3;
	}

	} else if(crm_system_name == NULL) {
	crm_system_name = "Unknown";
	}

	setenv("PCMK_service", crm_system_name, 1);
	cl_log_set_entity(crm_system_name);
	set_crm_log_level(level);
	crm_set_env_options();

	if(quiet) {
	/* Nuke any syslog activity */
	unsetenv("HA_logfacility");

	} else {
	cl_log_args(argc, argv);
	if(getenv("HA_logfacility") == NULL) {
	/* Set a default */
	cl_log_set_facility(HA_LOG_FACILITY);
	} /* else: picked up by crm_set_env_options() */
	}

	cl_log_enable_stderr(to_stderr);

	if(coredir) {
	const char *user = getenv("USER");
	if(user != NULL && safe_str_neq(user, "root") && safe_str_neq(user, CRM_DAEMON_USER)) {
	crm_info("Not switching to corefile directory for %s", user);
	coredir = FALSE;
	}
	}

	if(coredir) {
	int user = getuid();
	const char *base = HA_COREDIR;
	struct passwd *pwent = getpwuid(user);

	if (pwent == NULL) {
	crm_perror(LOG_ERR, "Cannot get name for uid: %d", user);

	} else if(safe_str_neq(pwent->pw_name, "root")
	&& safe_str_neq(pwent->pw_name, "nobody")
	&& safe_str_neq(pwent->pw_name, CRM_DAEMON_USER)) {
	crm_debug("Don't change active directory for regular user: %s", pwent->pw_name);

	} else if (chdir(base) < 0) {
	crm_perror(LOG_ERR, "Cannot change active directory to %s", base);

	} else if (chdir(pwent->pw_name) < 0) {
	crm_perror(LOG_ERR, "Cannot change active directory to %s/%s", base, pwent->pw_name);
	} else {
	crm_info("Changed active directory to %s/%s", base, pwent->pw_name);
	#if 0
	{
	char path[512];
	snprintf(path, 512, "%s-%d", crm_system_name, getpid());
	mkdir(path, 0750);
	chdir(path);
	crm_info("Changed active directory to %s/%s/%s",
	base, pwent->pw_name, path);
	}
	#endif
	}
	}

	update_all_trace_data();

	crm_signal(DEBUG_INC, alter_debug);
	crm_signal(DEBUG_DEC, alter_debug);

	return TRUE;
	}

	/* returns the old value */
	unsigned int
	set_crm_log_level(unsigned int level)
	{
	unsigned int old = crm_log_level;
	crm_log_level = level;
	return old;
	}

	unsigned int
	get_crm_log_level(void)
	{
	return crm_log_level;
	}

	static int
	crm_version_helper(const char text, char *end_text)
	{
	int atoi_result = -1;
	CRM_ASSERT(end_text != NULL);

	errno = 0;

	if(text != NULL && text[0] != 0) {
	atoi_result = (int)strtol(text, end_text, 10);

	if(errno == EINVAL) {
	crm_err("Conversion of '%s' %c failed", text, text[0]);
	atoi_result = -1;
	}
	}
	return atoi_result;
	}


	/*
	* version1 < version2 : -1
	* version1 = version2 : 0
	* version1 > version2 : 1
	*/
	int
	compare_version(const char version1, const char version2)
	{
	int rc = 0;
	int lpc = 0;
	char ver1_copy = NULL, ver2_copy = NULL;
	char rest1 = NULL, rest2 = NULL;

	if(version1 == NULL && version2 == NULL) {
	return 0;
	} else if(version1 == NULL) {
	return -1;
	} else if(version2 == NULL) {
	return 1;
	}

	ver1_copy = crm_strdup(version1);
	ver2_copy = crm_strdup(version2);
	rest1 = ver1_copy;
	rest2 = ver2_copy;

	while(1) {
	int digit1 = 0;
	int digit2 = 0;

	lpc++;

	if(rest1 == rest2) {
	break;
	}

	if(rest1 != NULL) {
	digit1 = crm_version_helper(rest1, &rest1);
	}

	if(rest2 != NULL) {
	digit2 = crm_version_helper(rest2, &rest2);
	}

	if(digit1 < digit2){
	rc = -1;
	crm_debug_5("%d < %d", digit1, digit2);
	break;

	} else if (digit1 > digit2){
	rc = 1;
	crm_debug_5("%d > %d", digit1, digit2);
	break;
	}

	if(rest1 != NULL && rest1[0] == '.') {
	rest1++;
	}
	if(rest1 != NULL && rest1[0] == 0) {
	rest1 = NULL;
	}

	if(rest2 != NULL && rest2[0] == '.') {
	rest2++;
	}
	if(rest2 != NULL && rest2[0] == 0) {
	rest2 = NULL;
	}
	}

	crm_free(ver1_copy);
	crm_free(ver2_copy);

	if(rc == 0) {
	crm_debug_3("%s == %s (%d)", version1, version2, lpc);
	} else if(rc < 0) {
	crm_debug_3("%s < %s (%d)", version1, version2, lpc);
	} else if(rc > 0) {
	crm_debug_3("%s > %s (%d)", version1, version2, lpc);
	}

	return rc;
	}

	gboolean do_stderr = FALSE;

	void
	alter_debug(int nsig)
	{
	crm_signal(DEBUG_INC, alter_debug);
	crm_signal(DEBUG_DEC, alter_debug);

	switch(nsig) {
	case DEBUG_INC:
	if (crm_log_level < 100) {
	crm_log_level++;
	}
	break;

	case DEBUG_DEC:
	if (crm_log_level > 0) {
	crm_log_level--;
	}
	break;

	default:
	fprintf(stderr, "Unknown signal %d\n", nsig);
	cl_log(LOG_ERR, "Unknown signal %d", nsig);
	break;
	}
	}


	void g_hash_destroy_str(gpointer data)
	{
	crm_free(data);
	}

	#include <sys/types.h>
	/* #include <stdlib.h> */
	/* #include <limits.h> */

	long long
	crm_int_helper(const char text, char *end_text)
	{
	long long result = -1;
	char *local_end_text = NULL;
	int saved_errno = 0;

	errno = 0;

	if(text != NULL) {
	#ifdef ANSI_ONLY
	if(end_text != NULL) {
	result = strtol(text, end_text, 10);
	} else {
	result = strtol(text, &local_end_text, 10);
	}
	#else
	if(end_text != NULL) {
	result = strtoll(text, end_text, 10);
	} else {
	result = strtoll(text, &local_end_text, 10);
	}
	#endif

	saved_errno = errno;
	/* CRM_CHECK(errno != EINVAL); */
	if(errno == EINVAL) {
	crm_err("Conversion of %s failed", text);
	result = -1;

	} else if(errno == ERANGE) {
	crm_err("Conversion of %s was clipped: %lld", text, result);

	} else if(errno != 0) {
	crm_perror(LOG_ERR,"Conversion of %s failed:", text);
	}

	if(local_end_text != NULL && local_end_text[0] != '\0') {
	crm_err("Characters left over after parsing '%s': '%s'", text, local_end_text);
	}

	errno = saved_errno;
	}
	return result;
	}

	int
	crm_parse_int(const char text, const char default_text)
	{
	int atoi_result = -1;
	if(text != NULL) {
	atoi_result = crm_int_helper(text, NULL);
	if(errno == 0) {
	return atoi_result;
	}
	}

	if(default_text != NULL) {
	atoi_result = crm_int_helper(default_text, NULL);
	if(errno == 0) {
	return atoi_result;
	}

	} else {
	crm_err("No default conversion value supplied");
	}

	return -1;
	}

	gboolean
	safe_str_neq(const char a, const char b)
	{
	if(a == b) {
	return FALSE;

	} else if(a==NULL \|\| b==NULL) {
	return TRUE;

	} else if(strcasecmp(a, b) == 0) {
	return FALSE;
	}
	return TRUE;
	}

	char *
	crm_strdup_fn(const char src, const char file, const char *fn, int line)
	{
	char *dup = NULL;
	CRM_CHECK(src != NULL,
	crm_err("Could not perform copy at %s:%d (%s)", file, line, fn);
	return NULL);
	crm_malloc0(dup, strlen(src) + 1);
	return strcpy(dup, src);
	}



	#define ENV_PREFIX "HA_"
	void
	crm_set_env_options(void)
	{
	cl_inherit_logging_environment(500);
	cl_log_set_logd_channel_source(NULL, NULL);

	if(debug_level > 0 && (debug_level+LOG_INFO) > (int)crm_log_level) {
	set_crm_log_level(LOG_INFO + debug_level);
	}
	}

	gboolean
	crm_is_true(const char * s)
	{
	gboolean ret = FALSE;
	if(s != NULL) {
	crm_str_to_boolean(s, &ret);
	}
	return ret;
	}

	int
	crm_str_to_boolean(const char * s, int * ret)
	{
	if(s == NULL) {
	return -1;

	} else if (strcasecmp(s, "true") == 0
	\|\| strcasecmp(s, "on") == 0
	\|\| strcasecmp(s, "yes") == 0
	\|\| strcasecmp(s, "y") == 0
	\|\| strcasecmp(s, "1") == 0){
	*ret = TRUE;
	return 1;

	} else if (strcasecmp(s, "false") == 0
	\|\| strcasecmp(s, "off") == 0
	\|\| strcasecmp(s, "no") == 0
	\|\| strcasecmp(s, "n") == 0
	\|\| strcasecmp(s, "0") == 0){
	*ret = FALSE;
	return 1;
	}
	return -1;
	}

	#ifndef NUMCHARS
	# define NUMCHARS "0123456789."
	#endif

	#ifndef WHITESPACE
	# define WHITESPACE " \t\n\r\f"
	#endif

	unsigned long long
	crm_get_interval(const char * input)
	{
	ha_time_t *interval = NULL;
	char *input_copy = crm_strdup(input);
	char *input_copy_mutable = input_copy;
	unsigned long long msec = 0;

	if(input == NULL) {
	return 0;

	} else if(input[0] != 'P') {
	crm_free(input_copy);
	return crm_get_msec(input);
	}

	interval = parse_time_duration(&input_copy_mutable);
	msec = date_in_seconds(interval);
	free_ha_date(interval);
	crm_free(input_copy);
	return msec * 1000;
	}

	long long
	crm_get_msec(const char * input)
	{
	const char *cp = input;
	const char *units;
	long long multiplier = 1000;
	long long divisor = 1;
	long long msec = -1;
	char *end_text = NULL;
	/* double dret; */

	if(input == NULL) {
	return msec;
	}

	cp += strspn(cp, WHITESPACE);
	units = cp + strspn(cp, NUMCHARS);
	units += strspn(units, WHITESPACE);

	if (strchr(NUMCHARS, *cp) == NULL) {
	return msec;
	}

	if (strncasecmp(units, "ms", 2) == 0
	\|\| strncasecmp(units, "msec", 4) == 0) {
	multiplier = 1;
	divisor = 1;
	} else if (strncasecmp(units, "us", 2) == 0
	\|\| strncasecmp(units, "usec", 4) == 0) {
	multiplier = 1;
	divisor = 1000;
	} else if (strncasecmp(units, "s", 1) == 0
	\|\| strncasecmp(units, "sec", 3) == 0) {
	multiplier = 1000;
	divisor = 1;
	} else if (strncasecmp(units, "m", 1) == 0
	\|\| strncasecmp(units, "min", 3) == 0) {
	multiplier = 60*1000;
	divisor = 1;
	} else if (strncasecmp(units, "h", 1) == 0
	\|\| strncasecmp(units, "hr", 2) == 0) {
	multiplier = 60601000;
	divisor = 1;
	} else if (units != EOS && units != '\n' && *units != '\r') {
	return msec;
	}

	msec = crm_int_helper(cp, &end_text);
	msec *= multiplier;
	msec /= divisor;
	/* dret += 0.5; */
	/* msec = (long long)dret; */
	return msec;
	}

	const char *
	op_status2text(op_status_t status)
	{
	switch(status) {
	case LRM_OP_PENDING:
	return "pending";
	break;
	case LRM_OP_DONE:
	return "complete";
	break;
	case LRM_OP_ERROR:
	return "Error";
	break;
	case LRM_OP_TIMEOUT:
	return "Timed Out";
	break;
	case LRM_OP_NOTSUPPORTED:
	return "NOT SUPPORTED";
	break;
	case LRM_OP_CANCELLED:
	return "Cancelled";
	break;
	}
	crm_err("Unknown status: %d", status);
	return "UNKNOWN!";
	}

	char *
	generate_op_key(const char rsc_id, const char op_type, int interval)
	{
	int len = 35;
	char *op_id = NULL;

	CRM_CHECK(rsc_id != NULL, return NULL);
	CRM_CHECK(op_type != NULL, return NULL);

	len += strlen(op_type);
	len += strlen(rsc_id);
	crm_malloc0(op_id, len);
	CRM_CHECK(op_id != NULL, return NULL);
	sprintf(op_id, "%s_%s_%d", rsc_id, op_type, interval);
	return op_id;
	}

	gboolean
	parse_op_key(const char key, char rsc_id, char op_type, int interval)
	{
	char *notify = NULL;
	char *mutable_key = NULL;
	char *mutable_key_ptr = NULL;
	int len = 0, offset = 0, ch = 0;

	CRM_CHECK(key != NULL, return FALSE);

	*interval = 0;
	len = strlen(key);
	offset = len-1;

	crm_debug_3("Source: %s", key);

	while(offset > 0 && isdigit(key[offset])) {
	int digits = len-offset;
	ch = key[offset] - '0';
	CRM_CHECK(ch < 10, return FALSE);
	CRM_CHECK(ch >= 0, return FALSE);
	while(digits > 1) {
	digits--;
	ch = ch * 10;
	}
	*interval += ch;
	offset--;
	}

	crm_debug_3(" Interval: %d", *interval);
	CRM_CHECK(key[offset] == '_', return FALSE);

	mutable_key = crm_strdup(key);
	mutable_key_ptr = mutable_key_ptr;
	mutable_key[offset] = 0;
	offset--;

	while(offset > 0 && key[offset] != '_') {
	offset--;
	}

	CRM_CHECK(key[offset] == '_',
	crm_free(mutable_key); return FALSE);

	mutable_key_ptr = mutable_key+offset+1;

	crm_debug_3(" Action: %s", mutable_key_ptr);

	*op_type = crm_strdup(mutable_key_ptr);

	mutable_key[offset] = 0;
	offset--;

	CRM_CHECK(mutable_key != mutable_key_ptr,
	crm_free(mutable_key); return FALSE);

	notify = strstr(mutable_key, "_post_notify");
	if(safe_str_eq(notify, "_post_notify")) {
	notify[0] = 0;
	}

	notify = strstr(mutable_key, "_pre_notify");
	if(safe_str_eq(notify, "_pre_notify")) {
	notify[0] = 0;
	}

	crm_debug_3(" Resource: %s", mutable_key);
	*rsc_id = mutable_key;

	return TRUE;
	}

	char *
	generate_notify_key(const char rsc_id, const char notify_type, const char *op_type)
	{
	int len = 12;
	char *op_id = NULL;

	CRM_CHECK(rsc_id != NULL, return NULL);
	CRM_CHECK(op_type != NULL, return NULL);
	CRM_CHECK(notify_type != NULL, return NULL);

	len += strlen(op_type);
	len += strlen(rsc_id);
	len += strlen(notify_type);
	crm_malloc0(op_id, len);
	if(op_id != NULL) {
	sprintf(op_id, "%s_%s_notify_%s_0", rsc_id, notify_type, op_type);
	}
	return op_id;
	}

	char *
	generate_transition_magic_v202(const char *transition_key, int op_status)
	{
	int len = 80;
	char *fail_state = NULL;

	CRM_CHECK(transition_key != NULL, return NULL);

	len += strlen(transition_key);

	crm_malloc0(fail_state, len);
	if(fail_state != NULL) {
	snprintf(fail_state, len, "%d:%s", op_status,transition_key);
	}
	return fail_state;
	}

	char *
	generate_transition_magic(const char *transition_key, int op_status, int op_rc)
	{
	int len = 80;
	char *fail_state = NULL;

	CRM_CHECK(transition_key != NULL, return NULL);

	len += strlen(transition_key);

	crm_malloc0(fail_state, len);
	if(fail_state != NULL) {
	snprintf(fail_state, len, "%d:%d;%s",
	op_status, op_rc, transition_key);
	}
	return fail_state;
	}

	gboolean
	decode_transition_magic(
	const char magic, char uuid, int transition_id, int *action_id,
	int op_status, int op_rc, int *target_rc)
	{
	int res = 0;
	char *key = NULL;
	gboolean result = TRUE;

	CRM_CHECK(magic != NULL, return FALSE);
	CRM_CHECK(op_rc != NULL, return FALSE);
	CRM_CHECK(op_status != NULL, return FALSE);

	crm_malloc0(key, strlen(magic)+1);
	res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key);
	if(res != 3) {
	crm_crit("Only found %d items in: %s", res, magic);
	result = FALSE;
	goto bail;
	}

	CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc),
	result = FALSE;
	goto bail;
	);

	bail:
	crm_free(key);
	return result;
	}

	char *
	generate_transition_key(int transition_id, int action_id, int target_rc, const char *node)
	{
	int len = 40;
	char *fail_state = NULL;

	CRM_CHECK(node != NULL, return NULL);

	len += strlen(node);

	crm_malloc0(fail_state, len);
	if(fail_state != NULL) {
	snprintf(fail_state, len, "%d:%d:%d:%s",
	action_id, transition_id, target_rc, node);
	}
	return fail_state;
	}


	gboolean
	decode_transition_key(
	const char key, char uuid, int transition_id, int action_id, int target_rc)
	{
	int res = 0;
	gboolean done = FALSE;

	CRM_CHECK(uuid != NULL, return FALSE);
	CRM_CHECK(target_rc != NULL, return FALSE);
	CRM_CHECK(action_id != NULL, return FALSE);
	CRM_CHECK(transition_id != NULL, return FALSE);

	crm_malloc0(*uuid, strlen(key)+1);
	res = sscanf(key, "%d:%d:%d:%s", action_id, transition_id, target_rc, *uuid);
	switch(res) {
	case 4:
	/* Post Pacemaker 0.6 */
	done = TRUE;
	break;
	case 3:
	case 2:
	/* this can be tricky - the UUID might start with an integer */

	/* Until Pacemaker 0.6 */
	done = TRUE;
	*target_rc = -1;
	res = sscanf(key, "%d:%d:%s", action_id, transition_id, *uuid);
	if(res == 2) {
	*action_id = -1;
	res = sscanf(key, "%d:%s", transition_id, *uuid);
	CRM_CHECK(res == 2, done = FALSE);

	} else if(res != 3) {
	CRM_CHECK(res == 3, done = FALSE);
	}
	break;

	case 1:
	/* Prior to Heartbeat 2.0.8 */
	done = TRUE;
	*action_id = -1;
	*target_rc = -1;
	res = sscanf(key, "%d:%s", transition_id, *uuid);
	CRM_CHECK(res == 2, done = FALSE);
	break;
	default:
	crm_crit("Unhandled sscanf result (%d) for %s", res, key);

	}

	if(strlen(*uuid) != 36) {
	crm_warn("Bad UUID (%s) in sscanf result (%d) for %s", *uuid, res, key);
	}

	if(done == FALSE) {
	crm_err("Cannot decode '%s' rc=%d", key, res);

	crm_free(*uuid);
	*uuid = NULL;
	*target_rc = -1;
	*action_id = -1;
	*transition_id = -1;
	}

	return done;
	}

	void
	filter_action_parameters(xmlNode param_set, const char version)
	{
	char *key = NULL;
	char *timeout = NULL;
	char *interval = NULL;

	const char *attr_filter[] = {
	XML_ATTR_ID,
	XML_ATTR_CRM_VERSION,
	XML_LRM_ATTR_OP_DIGEST,
	};

	gboolean do_delete = FALSE;
	int lpc = 0;
	static int meta_len = 0;
	if(meta_len == 0) {
	meta_len = strlen(CRM_META);
	}

	if(param_set == NULL) {
	return;
	}

	for(lpc = 0; lpc < DIMOF(attr_filter); lpc++) {
	xml_remove_prop(param_set, attr_filter[lpc]);
	}

	key = crm_meta_name(XML_LRM_ATTR_INTERVAL);
	interval = crm_element_value_copy(param_set, key);
	crm_free(key);

	key = crm_meta_name(XML_ATTR_TIMEOUT);
	timeout = crm_element_value_copy(param_set, key);

	xml_prop_name_iter(param_set, prop_name,
	do_delete = FALSE;
	if(strncasecmp(prop_name, CRM_META, meta_len) == 0) {
	do_delete = TRUE;
	}

	if(do_delete) {
	xml_remove_prop(param_set, prop_name);
	}
	);

	if(crm_get_msec(interval) > 0 && compare_version(version, "1.0.8") > 0) {
	/* Re-instate the operation's timeout value */
	if(timeout != NULL) {
	crm_xml_add(param_set, key, timeout);
	}
	}

	crm_free(interval);
	crm_free(timeout);
	crm_free(key);
	}

	void
	filter_reload_parameters(xmlNode param_set, const char restart_string)
	{
	int len = 0;
	char *name = NULL;
	char *match = NULL;

	if(param_set == NULL) {
	return;
	}

	xml_prop_name_iter(param_set, prop_name,
	name = NULL;
	len = strlen(prop_name) + 3;

	crm_malloc0(name, len);
	sprintf(name, " %s ", prop_name);
	name[len-1] = 0;

	match = strstr(restart_string, name);
	if(match == NULL) {
	crm_debug_3("%s not found in %s",
	prop_name, restart_string);
	xml_remove_prop(param_set, prop_name);
	}
	crm_free(name);
	);
	}

	void
	crm_abort(const char file, const char function, int line,
	const char *assert_condition, gboolean do_core, gboolean do_fork)
	{
	int rc = 0;
	int pid = 0;
	int status = 0;

	if(do_core == FALSE) {
	do_crm_log(LOG_ERR, "%s: Triggered assert at %s:%d : %s",
	function, file, line, assert_condition);
	return;

	} else if(do_fork) {
	pid=fork();

	} else {
	do_crm_log(LOG_ERR, "%s: Triggered fatal assert at %s:%d : %s",
	function, file, line, assert_condition);
	}

	switch(pid) {
	case -1:
	do_crm_log(LOG_CRIT, "%s: Cannot create core for non-fatal assert at %s:%d : %s",
	function, file, line, assert_condition);
	return;

	default: /* Parent */
	do_crm_log(LOG_ERR,
	"%s: Forked child %d to record non-fatal assert at %s:%d : %s",
	function, pid, file, line, assert_condition);
	do {
	rc = waitpid(pid, &status, 0);
	if(rc < 0 && errno != EINTR) {
	crm_perror(LOG_ERR,"%s: Cannot wait on forked child %d", function, pid);
	}

	} while(rc < 0 && errno == EINTR);

	return;

	case 0: /* Child */
	abort();
	break;
	}
	}

	char *
	generate_series_filename(
	const char directory, const char series, int sequence, gboolean bzip)
	{
	int len = 40;
	char *filename = NULL;
	const char *ext = "raw";

	CRM_CHECK(directory != NULL, return NULL);
	CRM_CHECK(series != NULL, return NULL);

	len += strlen(directory);
	len += strlen(series);
	crm_malloc0(filename, len);
	CRM_CHECK(filename != NULL, return NULL);

	if(bzip) {
	ext = "bz2";
	}
	sprintf(filename, "%s/%s-%d.%s", directory, series, sequence, ext);

	return filename;
	}

	int
	get_last_sequence(const char directory, const char series)
	{
	FILE *file_strm = NULL;
	int start = 0, length = 0, read_len = 0;
	char *series_file = NULL;
	char *buffer = NULL;
	int seq = 0;
	int len = 36;

	CRM_CHECK(directory != NULL, return 0);
	CRM_CHECK(series != NULL, return 0);

	len += strlen(directory);
	len += strlen(series);
	crm_malloc0(series_file, len);
	CRM_CHECK(series_file != NULL, return 0);
	sprintf(series_file, "%s/%s.last", directory, series);

	file_strm = fopen(series_file, "r");
	if(file_strm == NULL) {
	crm_debug("Series file %s does not exist", series_file);
	crm_free(series_file);
	return 0;
	}

	/* see how big the file is */
	start = ftell(file_strm);
	fseek(file_strm, 0L, SEEK_END);
	length = ftell(file_strm);
	fseek(file_strm, 0L, start);

	CRM_ASSERT(length >= 0);
	CRM_ASSERT(start == ftell(file_strm));

	crm_debug_3("Reading %d bytes from file", length);
	crm_malloc0(buffer, (length+1));
	read_len = fread(buffer, 1, length, file_strm);

	if(read_len != length) {
	crm_err("Calculated and read bytes differ: %d vs. %d",
	length, read_len);
	crm_free(buffer);
	buffer = NULL;

	} else if(length <= 0) {
	crm_info("%s was not valid", series_file);
	crm_free(buffer);
	buffer = NULL;
	}

	crm_free(series_file);
	seq = crm_parse_int(buffer, "0");
	crm_free(buffer);
	fclose(file_strm);
	return seq;
	}

	void
	write_last_sequence(
	const char directory, const char series, int sequence, int max)
	{
	int rc = 0;
	int len = 36;
	FILE *file_strm = NULL;
	char *series_file = NULL;

	CRM_CHECK(directory != NULL, return);
	CRM_CHECK(series != NULL, return);

	if(max == 0) {
	return;
	}
	while(max > 0 && sequence > max) {
	sequence -= max;
	}

	len += strlen(directory);
	len += strlen(series);
	crm_malloc0(series_file, len);
	sprintf(series_file, "%s/%s.last", directory, series);

	file_strm = fopen(series_file, "w");
	if(file_strm == NULL) {
	crm_err("Cannout open series file %s for writing", series_file);
	goto bail;
	}

	rc = fprintf(file_strm, "%d", sequence);
	if(rc < 0) {
	crm_perror(LOG_ERR,"Cannot write to series file %s", series_file);
	}

	bail:
	if(file_strm != NULL) {
	fflush(file_strm);
	fclose(file_strm);
	}

	crm_free(series_file);
	}

	#define LOCKSTRLEN 11

	int crm_pid_active(long pid)
	{
	int rc = 0;
	int running = 0;
	char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX];

	if(pid <= 0) {
	return -1;

	} else if (kill(pid, 0) < 0 && errno == ESRCH) {
	return 0;
	}

	#ifndef HAVE_PROC_PID
	return 1;
	#endif

	/* check to make sure pid hasn't been reused by another process */
	snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid);

	rc = readlink(proc_path, exe_path, PATH_MAX-1);
	if(rc < 0) {
	crm_perror(LOG_ERR, "Could not read from %s", proc_path);
	goto bail;
	}

	exe_path[rc] = 0;
	snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid());
	rc = readlink(proc_path, myexe_path, PATH_MAX-1);
	if(rc < 0) {
	crm_perror(LOG_ERR, "Could not read from %s", proc_path);
	goto bail;
	}

	myexe_path[rc] = 0;
	if(strcmp(exe_path, myexe_path) == 0) {
	running = 1;
	}

	bail:
	return running;
	}


	int
	crm_read_pidfile(const char *filename)
	{
	int fd;
	long pid = -1;
	char buf[LOCKSTRLEN+1];
	if ((fd = open(filename, O_RDONLY)) < 0) {
	goto bail;
	}

	if (read(fd, buf, sizeof(buf)) < 1) {
	goto bail;
	}

	if (sscanf(buf, "%lu", &pid) > 0) {
	if (pid <= 0){
	pid = -LSB_STATUS_STOPPED;
	}
	}

	bail:
	if(fd >= 0) { close(fd); }
	return pid;
	}

	int
	crm_lock_pidfile(const char *filename)
	{
	struct stat sbuf;
	int fd = 0, rc = 0;
	long pid = 0, mypid = 0;
	char lf_name[256], tf_name[256], buf[LOCKSTRLEN+1];

	mypid = (unsigned long) getpid();
	snprintf(lf_name, sizeof(lf_name), "%s",filename);
	snprintf(tf_name, sizeof(tf_name), "%s.%lu", filename, mypid);

	if ((fd = open(lf_name, O_RDONLY)) >= 0) {
	if (fstat(fd, &sbuf) >= 0 && sbuf.st_size < LOCKSTRLEN) {
	sleep(1); /* if someone was about to create one,
	* give'm a sec to do so
	* Though if they follow our protocol,
	* this won't happen. They should really
	* put the pid in, then link, not the
	* other way around.
	*/
	}
	if (read(fd, buf, sizeof(buf)) > 0) {
	if (sscanf(buf, "%lu", &pid) > 0) {
	if (pid > 1 && pid != getpid() && crm_pid_active(pid)) {
	/* locked by existing process - give up */
	close(fd);
	return -1;
	}
	}
	}
	unlink(lf_name);
	close(fd);
	}

	if ((fd = open(tf_name, O_CREAT \| O_WRONLY \| O_EXCL, 0644)) < 0) {
	/* Hmmh, why did we fail? Anyway, nothing we can do about it */
	return -3;
	}

	/* Slight overkill with the %d format ;-) /
	snprintf(buf, sizeof(buf), "%*lu\n", LOCKSTRLEN-1, mypid);

	if (write(fd, buf, LOCKSTRLEN) != LOCKSTRLEN) {
	/* Again, nothing we can do about this */
	rc = -3;
	close(fd);
	goto out;
	}
	close(fd);

	switch (link(tf_name, lf_name)) {
	case 0:
	if (stat(tf_name, &sbuf) < 0) {
	/* something weird happened */
	rc = -3;

	} else if (sbuf.st_nlink < 2) {
	/* somehow, it didn't get through - NFS trouble? */
	rc = -2;

	} else {
	rc = 0;
	}
	break;

	case EEXIST:
	rc = -1;
	break;

	default:
	rc = -3;
	}
	out:
	unlink(tf_name);
	return rc;
	}

	void
	crm_make_daemon(const char name, gboolean daemonize, const char pidfile)
	{
	long pid;
	const char *devnull = "/dev/null";

	if(daemonize == FALSE) {
	return;
	}

	pid = fork();
	if (pid < 0) {
	fprintf(stderr, "%s: could not start daemon\n", name);
	crm_perror(LOG_ERR,"fork");
	exit(LSB_EXIT_GENERIC);

	} else if (pid > 0) {
	exit(LSB_EXIT_OK);
	}

	if (crm_lock_pidfile(pidfile) < 0 ) {
	pid = crm_read_pidfile(pidfile);
	if(crm_pid_active(pid) > 0) {
	crm_warn("%s: already running [pid %ld] (%s).\n", name, pid, pidfile);
	exit(LSB_EXIT_OK);
	}
	}

	umask(022);
	close(STDIN_FILENO);
	(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
	close(STDOUT_FILENO);
	(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
	close(STDERR_FILENO);
	(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
	}

	gboolean
	crm_is_writable(const char dir, const char file,
	const char user, const char group, gboolean need_both)
	{
	int s_res = -1;
	struct stat buf;
	char *full_file = NULL;
	const char *target = NULL;

	gboolean pass = TRUE;
	gboolean readwritable = FALSE;

	CRM_ASSERT(dir != NULL);
	if(file != NULL) {
	full_file = crm_concat(dir, file, '/');
	target = full_file;
	s_res = stat(full_file, &buf);
	if( s_res == 0 && S_ISREG(buf.st_mode) == FALSE ) {
	crm_err("%s must be a regular file", target);
	pass = FALSE;
	goto out;
	}
	}

	if (s_res != 0) {
	target = dir;
	s_res = stat(dir, &buf);
	if(s_res != 0) {
	crm_err("%s must exist and be a directory", dir);
	pass = FALSE;
	goto out;

	} else if( S_ISDIR(buf.st_mode) == FALSE ) {
	crm_err("%s must be a directory", dir);
	pass = FALSE;
	}
	}

	if(user) {
	struct passwd *sys_user = NULL;
	sys_user = getpwnam(user);
	readwritable = (sys_user != NULL
	&& buf.st_uid == sys_user->pw_uid
	&& (buf.st_mode & (S_IRUSR\|S_IWUSR)));
	if(readwritable == FALSE) {
	crm_err("%s must be owned and r/w by user %s",
	target, user);
	if(need_both) {
	pass = FALSE;
	}
	}
	}

	if(group) {
	struct group *sys_grp = getgrnam(group);
	readwritable = (
	sys_grp != NULL
	&& buf.st_gid == sys_grp->gr_gid
	&& (buf.st_mode & (S_IRGRP\|S_IWGRP)));
	if(readwritable == FALSE) {
	if(need_both \|\| user == NULL) {
	pass = FALSE;
	crm_err("%s must be owned and r/w by group %s",
	target, group);
	} else {
	crm_warn("%s should be owned and r/w by group %s",
	target, group);
	}
	}
	}

	out:
	crm_free(full_file);
	return pass;
	}

	static unsigned long long crm_bit_filter = 0; /* 0x00000002ULL; */
	static unsigned int bit_log_level = LOG_DEBUG_5;

	long long
	crm_clear_bit(const char *function, long long word, long long bit)
	{
	unsigned int level = bit_log_level;
	if(bit & crm_bit_filter) {
	level = LOG_ERR;
	}

	do_crm_log_unlikely(level, "Bit 0x%.16llx cleared by %s", bit, function);
	word &= ~bit;

	return word;
	}

	long long
	crm_set_bit(const char *function, long long word, long long bit)
	{
	unsigned int level = bit_log_level;
	if(bit & crm_bit_filter) {
	level = LOG_ERR;
	}

	do_crm_log_unlikely(level, "Bit 0x%.16llx set by %s", bit, function);
	word \|= bit;
	return word;
	}

	const char *
	name_for_cluster_type(enum cluster_type_e type)
	{
	switch(type) {
	case pcmk_cluster_classic_ais:
	return "classic openais (with plugin)";
	case pcmk_cluster_cman:
	return "cman";
	case pcmk_cluster_corosync:
	return "corosync";
	case pcmk_cluster_heartbeat:
	return "heartbeat";
	case pcmk_cluster_unknown:
	return "unknown";
	case pcmk_cluster_invalid:
	return "invalid";
	}
	crm_err("Invalid cluster type: %d", type);
	return "invalid";
	}

	/* Do not expose these two */
	int set_cluster_type(enum cluster_type_e type);
	static enum cluster_type_e cluster_type = pcmk_cluster_unknown;

	int set_cluster_type(enum cluster_type_e type)
	{
	if(cluster_type == pcmk_cluster_unknown) {
	crm_info("Cluster type set to: %s", name_for_cluster_type(cluster_type));
	cluster_type = type;
	return 0;
	} else if(cluster_type == type) {
	return 0;

	} else if(pcmk_cluster_unknown == type) {
	cluster_type = type;
	return 0;
	}
	crm_err("Cluster type already set to %s", name_for_cluster_type(cluster_type));
	return -1;
	}

	enum cluster_type_e
	get_cluster_type(void)
	{
	if(cluster_type == pcmk_cluster_unknown) {
	const char *cluster = getenv("HA_cluster_type");
	cluster_type = pcmk_cluster_invalid;
	if(cluster) {
	crm_info("Cluster type is: '%s'.", cluster);
	+ } else {
	+ cluster = "heartbeat";
	}
	- if(cluster == NULL \|\| safe_str_eq(cluster, "heartbeat")) {
	+
	+ if(safe_str_eq(cluster, "heartbeat")) {
	#if SUPPORT_HEARTBEAT
	cluster_type = pcmk_cluster_heartbeat;
	#else
	crm_crit("This installation of Pacemaker does not support the '%s' cluster infrastructure. Terminating.",
	cluster);
	exit(100);
	#endif
	} else if(safe_str_eq(cluster, "openais")) {
	#if SUPPORT_COROSYNC
	cluster_type = pcmk_cluster_classic_ais;
	#else
	crm_crit("This installation of Pacemaker does not support the '%s' cluster infrastructure. Terminating.",
	cluster);
	exit(100);
	#endif
	} else if(safe_str_eq(cluster, "corosync")) {
	#if SUPPORT_COROSYNC
	cluster_type = pcmk_cluster_corosync;
	#else
	crm_crit("This installation of Pacemaker does not support the '%s' cluster infrastructure. Terminating.",
	cluster);
	exit(100);
	#endif
	} else if(safe_str_eq(cluster, "cman")) {
	#if SUPPORT_CMAN
	cluster_type = pcmk_cluster_cman;
	#else
	crm_crit("This installation of Pacemaker does not support the '%s' cluster infrastructure. Terminating.",
	cluster);
	exit(100);
	#endif
	} else {
	crm_crit("Unknown cluster type: '%s'. Terminating.", cluster);
	exit(100);
	}
	}
	return cluster_type;
	}

	gboolean is_cman_cluster(void)
	{
	return get_cluster_type() == pcmk_cluster_cman;
	}

	gboolean is_corosync_cluster(void)
	{
	return get_cluster_type() == pcmk_cluster_corosync;
	}

	gboolean is_classic_ais_cluster(void)
	{
	return get_cluster_type() == pcmk_cluster_classic_ais;
	}

	gboolean is_openais_cluster(void)
	{
	enum cluster_type_e type = get_cluster_type();
	if(type == pcmk_cluster_classic_ais) {
	return TRUE;
	} else if(type == pcmk_cluster_corosync) {
	return TRUE;
	} else if(type == pcmk_cluster_cman) {
	return TRUE;
	}
	return FALSE;
	}

	gboolean is_heartbeat_cluster(void)
	{
	return get_cluster_type() == pcmk_cluster_heartbeat;
	}

	gboolean crm_str_eq(const char a, const char b, gboolean use_case)
	{
	if(a == b) {
	return TRUE;

	} else if(a == NULL \|\| b == NULL) {
	/* shouldn't be comparing NULLs */
	return FALSE;

	} else if(use_case && a[0] != b[0]) {
	return FALSE;

	} else if(strcasecmp(a, b) == 0) {
	return TRUE;
	}
	return FALSE;
	}

	char crm_meta_name(const char field)
	{
	int lpc = 0;
	int max = 0;
	char *crm_name = NULL;

	CRM_CHECK(field != NULL, return NULL);
	crm_name = crm_concat(CRM_META, field, '_');

	/* Massage the names so they can be used as shell variables */
	max = strlen(crm_name);
	for(; lpc < max; lpc++) {
	switch(crm_name[lpc]) {
	case '-':
	crm_name[lpc] = '_';
	break;
	}
	}
	return crm_name;
	}

	const char crm_meta_value(GHashTable hash, const char *field)
	{
	char *key = NULL;
	const char *value = NULL;

	key = crm_meta_name(field);
	if(key) {
	value = g_hash_table_lookup(hash, key);
	crm_free(key);
	}

	return value;
	}

	static struct crm_option *crm_long_options = NULL;
	static const char *crm_app_description = NULL;
	static const char *crm_short_options = NULL;
	static const char *crm_app_usage = NULL;

	static struct option crm_create_long_opts(struct crm_option long_options)
	{
	struct option *long_opts = NULL;

	#ifdef HAVE_GETOPT_H
	int index = 0, lpc = 0;

	/*
	* A previous, possibly poor, choice of '?' as the short form of --help
	* means that getopt_long() returns '?' for both --help and for "unknown option"
	*
	* This dummy entry allows us to differentiate between the two in crm_get_option()
	* and exit with the correct error code
	*/
	crm_realloc(long_opts, (index+1) * sizeof(struct option));
	long_opts[index].name = "__dummmy__";
	long_opts[index].has_arg = 0;
	long_opts[index].flag = 0;
	long_opts[index].val = '_';
	index++;

	for(lpc = 0; long_options[lpc].name != NULL; lpc++) {
	if(long_options[lpc].name[0] == '-') {
	continue;
	}

	crm_realloc(long_opts, (index+1) * sizeof(struct option));
	/*fprintf(stderr, "Creating %d %s = %c\n", index,
	* long_options[lpc].name, long_options[lpc].val); */
	long_opts[index].name = long_options[lpc].name;
	long_opts[index].has_arg = long_options[lpc].has_arg;
	long_opts[index].flag = long_options[lpc].flag;
	long_opts[index].val = long_options[lpc].val;
	index++;
	}

	/* Now create the list terminator */
	crm_realloc(long_opts, (index+1) * sizeof(struct option));
	long_opts[index].name = NULL;
	long_opts[index].has_arg = 0;
	long_opts[index].flag = 0;
	long_opts[index].val = 0;
	#endif

	return long_opts;
	}

	void crm_set_options(const char short_options, const char app_usage, struct crm_option long_options, const char app_desc)
	{
	if(short_options) {
	crm_short_options = short_options;
	}
	if(long_options) {
	crm_long_options = long_options;
	}
	if(app_desc) {
	crm_app_description = app_desc;
	}
	if(app_usage) {
	crm_app_usage = app_usage;
	}
	}

	int crm_get_option(int argc, char *argv, int index)
	{
	#ifdef HAVE_GETOPT_H
	static struct option *long_opts = NULL;
	if(long_opts == NULL && crm_long_options) {
	long_opts = crm_create_long_opts(crm_long_options);
	}

	if(long_opts) {
	int flag = getopt_long(argc, argv, crm_short_options, long_opts, index);
	switch(flag) {
	case 0: return long_opts[*index].val;
	case -1: /* End of option processing */ break;
	case ':': crm_debug_2("Missing argument"); crm_help('?', 1); break;
	case '?': crm_help('?', *index?0:1); break;
	}
	return flag;
	}
	#endif

	if(crm_short_options) {
	return getopt(argc, argv, crm_short_options);
	}

	return -1;
	}

	void crm_help(char cmd, int exit_code)
	{
	int i = 0;
	FILE *stream = (exit_code ? stderr : stdout);

	if(cmd == 'v' \|\| cmd == '$') {
	fprintf(stream, "Pacemaker %s\n", VERSION);
	fprintf(stream, "Written by Andrew Beekhof\n");
	goto out;
	}

	if(cmd == '!') {
	fprintf(stream, "Pacemaker %s (Build: %s): %s\n", VERSION, BUILD_VERSION, CRM_FEATURES);
	goto out;
	}

	fprintf(stream, "%s - %s\n", crm_system_name, crm_app_description);

	if(crm_app_usage) {
	fprintf(stream, "Usage: %s %s\n", crm_system_name, crm_app_usage);
	}

	if(crm_long_options) {
	fprintf(stream, "Options:\n");
	for(i = 0; crm_long_options[i].name != NULL; i++) {
	if(crm_long_options[i].flags & pcmk_option_hidden) {

	} else if(crm_long_options[i].flags & pcmk_option_paragraph) {
	fprintf(stream, "%s\n\n", crm_long_options[i].desc);

	} else if(crm_long_options[i].flags & pcmk_option_example) {
	fprintf(stream, "\t#%s\n\n", crm_long_options[i].desc);

	} else if(crm_long_options[i].val == '-' && crm_long_options[i].desc) {
	fprintf(stream, "%s\n", crm_long_options[i].desc);

	} else {
	fprintf(stream, " -%c, --%s%c%s\t%s\n", crm_long_options[i].val, crm_long_options[i].name,
	crm_long_options[i].has_arg?'=':' ',crm_long_options[i].has_arg?"value":"",
	crm_long_options[i].desc?crm_long_options[i].desc:"");
	}
	}

	} else if(crm_short_options) {
	fprintf(stream, "Usage: %s - %s\n", crm_system_name, crm_app_description);
	for(i = 0; crm_short_options[i] != 0; i++) {
	int has_arg = FALSE;

	if(crm_short_options[i+1] == ':') {
	has_arg = TRUE;
	}

	fprintf(stream, " -%c %s\n", crm_short_options[i], has_arg?"{value}":"");
	if(has_arg) {
	i++;
	}
	}
	}

	fprintf(stream, "\nReport bugs to %s\n", PACKAGE_BUGREPORT);

	out:
	if(exit_code >= 0) {
	exit(exit_code);
	}
	}

	#include <../../tools/attrd.h>
	gboolean attrd_update_delegate(IPC_Channel cluster, char command, const char host, const char name, const char value, const char section, const char set, const char dampen, const char user_name)
	{
	gboolean success = FALSE;
	const char *reason = "Cluster connection failed";

	/* remap common aliases */
	if(safe_str_eq(section, "reboot")) {
	section = XML_CIB_TAG_STATUS;

	} else if(safe_str_eq(section, "forever")) {
	section = XML_CIB_TAG_NODES;
	}

	if(cluster == NULL) {
	reason = "No connection to the cluster";

	} else {
	xmlNode *update = create_xml_node(NULL, __FUNCTION__);
	crm_xml_add(update, F_TYPE, T_ATTRD);
	crm_xml_add(update, F_ORIG, crm_system_name);

	if(name == NULL && command == 'U') {
	command = 'R';
	}

	switch(command) {
	case 'D':
	case 'U':
	case 'v':
	crm_xml_add(update, F_ATTRD_TASK, "update");
	crm_xml_add(update, F_ATTRD_ATTRIBUTE, name);
	break;
	case 'R':
	crm_xml_add(update, F_ATTRD_TASK, "refresh");
	break;
	case 'q':
	crm_xml_add(update, F_ATTRD_TASK, "query");
	break;
	}

	crm_xml_add(update, F_ATTRD_VALUE, value);
	crm_xml_add(update, F_ATTRD_DAMPEN, dampen);
	crm_xml_add(update, F_ATTRD_SECTION, section);
	crm_xml_add(update, F_ATTRD_HOST, host);
	crm_xml_add(update, F_ATTRD_SET, set);
	#if ENABLE_ACL
	if (user_name) {
	crm_xml_add(update, F_ATTRD_USER, user_name);
	}
	#endif

	success = send_ipc_message(cluster, update);
	free_xml(update);
	}

	if(success) {
	crm_debug("Sent update: %s=%s for %s", name, value, host?host:"localhost");
	return TRUE;
	}

	crm_info("Could not send update: %s=%s for %s", name, value, host?host:"localhost");
	return FALSE;
	}

	gboolean attrd_lazy_update(char command, const char host, const char name, const char value, const char section, const char set, const char dampen)
	{
	int max = 5;
	gboolean updated = FALSE;
	static IPC_Channel *cluster = NULL;

	while(updated == 0 && max > 0) {
	if(cluster == NULL) {
	crm_info("Connecting to cluster... %d retries remaining", max);
	cluster = init_client_ipc_comms_nodispatch(T_ATTRD);
	}

	if(cluster != NULL) {
	updated = attrd_update(cluster, command, host, name, value, section, set, dampen);
	}

	if(updated == 0) {
	cluster = NULL;
	sleep(2);
	max--;
	}
	}

	return updated;
	}

	gboolean attrd_update_no_mainloop(int connection, char command, const char host, const char name, const char value, const char section, const char set, const char *dampen)
	{
	int max = 5;
	gboolean updated = FALSE;
	static IPC_Channel *cluster = NULL;

	if(connection && *connection == 0 && cluster) {
	crm_info("Forcing a new connection to the cluster");
	cluster = NULL;
	}

	while(updated == 0 && max > 0) {
	if(cluster == NULL) {
	crm_info("Connecting to cluster... %d retries remaining", max);
	cluster = init_client_ipc_comms_nodispatch(T_ATTRD);
	}

	if(connection) {
	if(cluster != NULL) {
	*connection = cluster->ops->get_recv_select_fd(cluster);
	} else {
	*connection = 0;
	}
	}

	if(cluster != NULL) {
	updated = attrd_update(cluster, command, host, name, value, section, set, dampen);
	}

	if(updated == 0) {
	cluster = NULL;
	sleep(2);
	max--;
	}
	}
	return updated;
	}

	#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
	static void
	append_digest(lrm_op_t op, xmlNode update, const char version, const char magic, int level)
	{
	/* this will enable us to later determine that the
	* resource's parameters have changed and we should force
	* a restart
	*/
	char *digest = NULL;
	xmlNode *args_xml = NULL;

	if(op->params == NULL) {
	return;
	}

	args_xml = create_xml_node(NULL, XML_TAG_PARAMS);
	g_hash_table_foreach(op->params, hash2field, args_xml);
	filter_action_parameters(args_xml, version);
	digest = calculate_operation_digest(args_xml, version);

	#if 0
	if(level < crm_log_level
	&& op->interval == 0
	&& crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) {
	char *digest_source = dump_xml_unformatted(args_xml);
	do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n",
	digest, ID(update), magic, digest_source);
	crm_free(digest_source);
	}
	#endif
	crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest);

	free_xml(args_xml);
	crm_free(digest);
	}

	xmlNode *
	create_operation_update(
	xmlNode parent, lrm_op_t op, const char caller_version, int target_rc, const char origin, int level)
	{
	char *magic = NULL;
	const char *task = NULL;
	xmlNode *xml_op = NULL;
	char *op_id = NULL;
	char *local_user_data = NULL;
	gboolean dc_munges_migrate_ops = (compare_version(caller_version, "3.0.3") < 0);

	CRM_CHECK(op != NULL, return NULL);
	do_crm_log(level, "%s: Updating resouce %s after %s %s op (interval=%d)",
	origin, op->rsc_id, op_status2text(op->op_status), op->op_type, op->interval);

	if(op->op_status == LRM_OP_CANCELLED) {
	crm_debug_3("Ignoring cancelled op");
	return NULL;
	}

	crm_debug_3("DC version: %s", caller_version);

	task = op->op_type;
	/* remap the task name under various scenarios
	* this makes life easier for the PE when its trying determin the current state
	*/
	if(crm_str_eq(task, "reload", TRUE)) {
	if(op->op_status == LRM_OP_DONE) {
	task = CRMD_ACTION_START;
	} else {
	task = CRMD_ACTION_STATUS;
	}

	} else if(dc_munges_migrate_ops
	&& crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) {
	/* if the migrate_from fails it will have enough info to do the right thing */
	if(op->op_status == LRM_OP_DONE) {
	task = CRMD_ACTION_STOP;
	} else {
	task = CRMD_ACTION_STATUS;
	}

	} else if(dc_munges_migrate_ops
	&& op->op_status == LRM_OP_DONE
	&& crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) {
	task = CRMD_ACTION_START;

	} else if(crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) {
	const char *n_type = crm_meta_value(op->params, "notify_type");
	const char *n_task = crm_meta_value(op->params, "notify_operation");
	CRM_LOG_ASSERT(n_type != NULL);
	CRM_LOG_ASSERT(n_task != NULL);
	op_id = generate_notify_key(op->rsc_id, n_type, n_task);

	/* these are not yet allowed to fail */
	op->op_status = LRM_OP_DONE;
	op->rc = 0;
	}

	if (op_id == NULL) {
	op_id = generate_op_key(op->rsc_id, task, op->interval);
	}

	xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id);
	if(xml_op != NULL) {
	crm_log_xml(LOG_DEBUG, "Replacing existing entry", xml_op);

	} else {
	xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP);
	}

	if(op->user_data == NULL) {
	crm_debug("Generating fake transition key for:"
	" %s_%s_%d %d from %s",
	op->rsc_id, op->op_type, op->interval, op->call_id,
	op->app_name);
	local_user_data = generate_transition_key(-1, op->call_id, target_rc, FAKE_TE_ID);
	op->user_data = local_user_data;
	}

	magic = generate_transition_magic(op->user_data, op->op_status, op->rc);

	crm_xml_add(xml_op, XML_ATTR_ID, op_id);
	crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task);
	crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin);
	crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
	crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
	crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic);

	crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
	crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
	crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
	crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval);

	if(compare_version("2.1", caller_version) <= 0) {
	if(op->t_run \|\| op->t_rcchange \|\| op->exec_time \|\| op->queue_time) {
	crm_debug_2("Timing data (%s_%s_%d): last=%lu change=%lu exec=%lu queue=%lu",
	op->rsc_id, op->op_type, op->interval,
	op->t_run, op->t_rcchange, op->exec_time, op->queue_time);

	if(op->interval == 0) {
	crm_xml_add_int(xml_op, "last-run", op->t_run);
	}
	crm_xml_add_int(xml_op, "last-rc-change", op->t_rcchange);
	crm_xml_add_int(xml_op, "exec-time", op->exec_time);
	crm_xml_add_int(xml_op, "queue-time", op->queue_time);
	}
	}

	if(crm_str_eq(op->op_type, CRMD_ACTION_MIGRATE, TRUE)
	\|\| crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) {
	/*
	* Record migrate_source and migrate_target always for migrate ops.
	*/
	const char *name = XML_LRM_ATTR_MIGRATE_SOURCE;
	crm_xml_add(xml_op, name, crm_meta_value(op->params, name));

	name = XML_LRM_ATTR_MIGRATE_TARGET;
	crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
	}

	append_digest(op, xml_op, caller_version, magic, LOG_DEBUG);

	if(local_user_data) {
	crm_free(local_user_data);
	op->user_data = NULL;
	}
	crm_free(magic);
	crm_free(op_id);
	return xml_op;
	}

	void
	free_lrm_op(lrm_op_t *op)
	{
	g_hash_table_destroy(op->params);
	crm_free(op->user_data);
	crm_free(op->output);
	crm_free(op->rsc_id);
	crm_free(op->op_type);
	crm_free(op->app_name);
	crm_free(op);
	}

	#if ENABLE_ACL
	void
	determine_request_user(char *user, IPC_Channel channel, xmlNode request, const char field)
	{
	/* Get our internal validation out of the way first */
	CRM_CHECK(user != NULL && channel != NULL && field != NULL, return);

	if(*user == NULL) {
	/* Figure out who our peer is and cache it... */
	struct passwd *pwent = getpwuid(channel->farside_uid);
	if(pwent == NULL) {
	crm_perror(LOG_ERR, "Cannot get password entry of uid: %d", channel->farside_uid);
	} else {
	*user = crm_strdup(pwent->pw_name);
	}
	}

	/* If our peer is a privileged user, we might be doing something on behalf of someone else */
	if(is_privileged(*user) == FALSE) {
	/* We're not a privileged user, set or overwrite any existing value for $field */
	crm_xml_replace(request, field, *user);

	} else if(crm_element_value(request, field) == NULL) {
	/* Even if we're privileged, make sure there is always a value set */
	crm_xml_replace(request, field, *user);

	/* } else { Legal delegation */
	}

	crm_debug_2("Processing msg for user '%s'", crm_element_value(request, field));
	}
	#endif
	diff --git a/pengine/pengine.c b/pengine/pengine.c
	index 0d9224b218..45c37b4b6c 100644
	--- a/pengine/pengine.c
	+++ b/pengine/pengine.c
	@@ -1,297 +1,297 @@
	/*
	* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This software is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* General Public License for more details.
	*
	* You should have received a copy of the GNU General Public
	* License along with this library; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#include <crm_internal.h>

	#include <sys/param.h>

	#include <crm/crm.h>
	#include <crm/cib.h>
	#include <crm/msg_xml.h>
	#include <crm/common/xml.h>
	#include <crm/common/msg.h>

	#include <glib.h>

	#include <crm/pengine/status.h>
	#include <pengine.h>
	#include <allocate.h>
	#include <lib/pengine/utils.h>
	#include <utils.h>

	xmlNode * do_calculations(
	pe_working_set_t data_set, xmlNode xml_input, ha_time_t *now);

	gboolean show_scores = FALSE;
	int scores_log_level = LOG_DEBUG_2;
	gboolean show_utilization = FALSE;
	int utilization_log_level = LOG_DEBUG_2;
	extern int transition_id;

	#define get_series() was_processing_error?1:was_processing_warning?2:3

	typedef struct series_s
	{
	int id;
	const char *name;
	const char *param;
	int wrap;
	} series_t;

	series_t series[] = {
	{ 0, "pe-unknown", "_dont_match_anything_", -1 },
	{ 0, "pe-error", "pe-error-series-max", -1 },
	{ 0, "pe-warn", "pe-warn-series-max", 200 },
	{ 0, "pe-input", "pe-input-series-max", 400 },
	};

	gboolean
	process_pe_message(xmlNode msg, xmlNode xml_data, IPC_Channel *sender)
	{
	gboolean send_via_disk = FALSE;
	const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
	const char *op = crm_element_value(msg, F_CRM_TASK);
	const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE);

	crm_debug_3("Processing %s op (ref=%s)...", op, ref);

	if(op == NULL){
	/* error */

	} else if(strcasecmp(op, CRM_OP_HELLO) == 0) {
	/* ignore */

	} else if(safe_str_eq(crm_element_value(msg, F_CRM_MSG_TYPE),
	XML_ATTR_RESPONSE)) {
	/* ignore */

	} else if(sys_to == NULL \|\| strcasecmp(sys_to, CRM_SYSTEM_PENGINE) != 0) {
	crm_debug_3("Bad sys-to %s", crm_str(sys_to));
	return FALSE;

	} else if(strcasecmp(op, CRM_OP_PECALC) == 0) {
	int seq = -1;
	int series_id = 0;
	int series_wrap = 0;
	char *filename = NULL;
	char *graph_file = NULL;
	const char *value = NULL;
	pe_working_set_t data_set;
	xmlNode *converted = NULL;
	xmlNode *reply = NULL;
	gboolean process = TRUE;
	#if HAVE_BZLIB_H
	gboolean compress = TRUE;
	#else
	gboolean compress = FALSE;
	#endif

	crm_config_error = FALSE;
	crm_config_warning = FALSE;

	was_processing_error = FALSE;
	was_processing_warning = FALSE;

	graph_file = crm_strdup(CRM_STATE_DIR"/graph.XXXXXX");
	graph_file = mktemp(graph_file);

	set_working_set_defaults(&data_set);

	converted = copy_xml(xml_data);
	if(cli_config_update(&converted, NULL, TRUE) == FALSE) {
	data_set.graph = create_xml_node(NULL, XML_TAG_GRAPH);
	crm_xml_add_int(data_set.graph, "transition_id", 0);
	crm_xml_add_int(data_set.graph, "cluster-delay", 0);
	process = FALSE;
	}

	if(process) {
	do_calculations(&data_set, converted, NULL);
	}

	series_id = get_series();
	series_wrap = series[series_id].wrap;
	value = pe_pref(data_set.config_hash, series[series_id].param);

	if(value != NULL) {
	series_wrap = crm_int_helper(value, NULL);
	if(errno != 0) {
	series_wrap = series[series_id].wrap;
	}

	} else {
	crm_config_warn("No value specified for cluster"
	" preference: %s",
	series[series_id].param);
	}

	seq = get_last_sequence(PE_STATE_DIR, series[series_id].name);

	data_set.input = NULL;
	reply = create_reply(msg, data_set.graph);
	CRM_ASSERT(reply != NULL);

	filename = generate_series_filename(
	PE_STATE_DIR, series[series_id].name, seq, compress);
	crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename);
	crm_xml_add_int(reply, "graph-errors", was_processing_error);
	crm_xml_add_int(reply, "graph-warnings", was_processing_warning);
	crm_xml_add_int(reply, "config-errors", crm_config_error);
	crm_xml_add_int(reply, "config-warnings", crm_config_warning);

	if(send_ipc_message(sender, reply) == FALSE) {
	if(sender && sender->ops->get_chan_status(sender) == IPC_CONNECT) {
	send_via_disk = TRUE;
	crm_err("Answer could not be sent via IPC, send via the disk instead");
	- crm_info("Writing the TE graph to %s", graph_file);
	+ crm_notice("Writing the TE graph to %s", graph_file);
	if(write_xml_file(data_set.graph, graph_file, FALSE) < 0) {
	- crm_err("TE graph could not be written to disk");
	+ crm_err("TE graph could not be written to disk");
	}
	} else {
	crm_info("Peer disconnected, discarding transition graph");
	}
	}

	free_xml(reply);
	cleanup_alloc_calculations(&data_set);

	if(series_wrap != 0) {
	write_xml_file(xml_data, filename, compress);
	write_last_sequence(PE_STATE_DIR, series[series_id].name,
	seq+1, series_wrap);
	}

	if(was_processing_error) {
	crm_err("Transition %d:"
	" ERRORs found during PE processing."
	" PEngine Input stored in: %s",
	transition_id, filename);

	} else if(was_processing_warning) {
	crm_warn("Transition %d:"
	" WARNINGs found during PE processing."
	" PEngine Input stored in: %s",
	transition_id, filename);

	} else {
	- crm_info("Transition %d: PEngine Input stored in: %s",
	- transition_id, filename);
	+ crm_notice("Transition %d: PEngine Input stored in: %s",
	+ transition_id, filename);
	}

	if(crm_config_error) {
	- crm_info("Configuration ERRORs found during PE processing."
	- " Please run \"crm_verify -L\" to identify issues.");
	+ crm_notice("Configuration ERRORs found during PE processing."
	+ " Please run \"crm_verify -L\" to identify issues.");

	} else if(crm_config_warning) {
	- crm_info("Configuration WARNINGs found during PE processing."
	- " Please run \"crm_verify -L\" to identify issues.");
	+ crm_notice("Configuration WARNINGs found during PE processing."
	+ " Please run \"crm_verify -L\" to identify issues.");
	}

	if(send_via_disk) {
	reply = create_reply(msg, NULL);
	crm_xml_add(reply, F_CRM_TGRAPH, graph_file);
	crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename);
	CRM_ASSERT(reply != NULL);
	if(send_ipc_message(sender, reply) == FALSE) {
	crm_err("Answer could not be sent");
	}
	free_xml(reply);
	}

	free_xml(converted);
	crm_free(graph_file);
	crm_free(filename);

	} else if(strcasecmp(op, CRM_OP_QUIT) == 0) {
	crm_warn("Received quit message, terminating");
	exit(0);
	}

	return TRUE;
	}

	xmlNode *
	do_calculations(pe_working_set_t data_set, xmlNode xml_input, ha_time_t *now)
	{
	GListPtr gIter = NULL;
	- int rsc_log_level = LOG_NOTICE;
	+ int rsc_log_level = LOG_INFO;
	/* pe_debug_on(); */

	CRM_ASSERT(xml_input \|\| is_set(data_set->flags, pe_flag_have_status));

	if(is_set(data_set->flags, pe_flag_have_status) == FALSE) {
	set_working_set_defaults(data_set);
	data_set->input = xml_input;
	data_set->now = now;
	if(data_set->now == NULL) {
	data_set->now = new_ha_date(TRUE);
	}
	} else {
	crm_trace("Already have status - reusing");
	}

	crm_debug_5("Calculate cluster status");
	stage0(data_set);

	gIter = data_set->resources;
	for(; gIter != NULL; gIter = gIter->next) {
	resource_t rsc = (resource_t)gIter->data;

	if(is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) {
	continue;
	}
	rsc->fns->print(rsc, NULL, pe_print_log, &rsc_log_level);
	}

	crm_trace("Applying placement constraints");
	stage2(data_set);

	crm_trace("Create internal constraints");
	stage3(data_set);

	crm_trace("Check actions");
	stage4(data_set);

	crm_trace("Allocate resources");
	stage5(data_set);

	crm_trace("Processing fencing and shutdown cases");
	stage6(data_set);

	crm_trace("Applying ordering constraints");
	stage7(data_set);

	crm_trace("Create transition graph");
	stage8(data_set);

	crm_trace("=#=#=#=#= Summary =#=#=#=#=");
	crm_trace("\t========= Set %d (Un-runnable) =========", -1);
	if(crm_log_level > LOG_DEBUG) {
	gIter = data_set->actions;
	for(; gIter != NULL; gIter = gIter->next) {
	action_t action = (action_t)gIter->data;
	if(is_set(action->flags, pe_action_optional) == FALSE
	&& is_set(action->flags, pe_action_runnable) == FALSE
	&& is_set(action->flags, pe_action_pseudo) == FALSE) {
	log_action(LOG_DEBUG_2, "\t", action, TRUE);
	}
	}
	}

	return data_set->graph;
	}
	diff --git a/tools/crm_report.in b/tools/crm_report.in
	index cd6e63fd1d..15df3ec7ba 100755
	--- a/tools/crm_report.in
	+++ b/tools/crm_report.in
	@@ -1,400 +1,429 @@
	#!/bin/sh

	# Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public
	# License as published by the Free Software Foundation; either
	# version 2.1 of the License, or (at your option) any later version.
	#
	# This software is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# General Public License for more details.
	#
	# You should have received a copy of the GNU General Public
	# License along with this library; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	#


	# Note the quotes around `$TEMP': they are essential!
	TEMP=`getopt \
	- -o hv?xl:f:t:n:T:Lpc:dSACHu:MV \
	- --long help,cts:,node:,nodes:,from:,to:logfile:,as-directory,single-node,cluster:,user:,version,features \
	- -n 'pcmk_report' -- "$@"`
	+ -o hv?xl:f:t:n:T:Lpc:dSACHu:MVs \
	+ --long help,cts:,cts-log:,node:,nodes:,from:,to:logfile:,as-directory,single-node,cluster:,user:,version,features \
	+ -n 'crm_report' -- "$@"`
	eval set -- "$TEMP"


	times=""
	tests=""
	nodes=""
	compress=1
	cluster="any"
	ssh_user="root"
	search_logs=1
	report_data=`dirname $0`


	extra_logs=""
	-sanitize_patterns="passw.*"
	+sanitize_patterns=""
	log_patterns="CRIT: ERROR:"

	usage() {

	cat<<EOF
	usage: `basename $0` -f {YYYY-M-D H:M:S} [-t {YYYY-M-D H:M:S}] [optional options] [dest]

	-v increase verbosity
	-f, --from time time to start from: YYYY-M-D H:M:S
	-t, --to time time to finish at (default: now)
	-T, --cts test CTS test or set of tests to extract
	--cts-log CTS master logfile
	-n, --nodes nodes node names for this cluster
	only needed if the cluster is not active on the current machine
	accepts both -n "a b" and -n a -n b
	-l, --logfile file log file to collect, normally this will be determined automatically
	-p patt additional regular expression to match variables to be removed
	(default: "passw.*")
	-L patt additional regular expression to match in log files for analysis
	(default: $log_patterns)
	-M collect only the logs specified by -l
	-S, --single-node single node operation; don't try to start report collectors on other nodes
	-c, --cluster type force the cluster type (corosync,openais,heartbeat,logmaster)
	-A, --openais force the cluster type to be OpenAIS
	-C, --corosync force the cluster type to be CoroSync
	-H, --heartbeat force the cluster type to be Heartbeat
	-u, --user user ssh username for cluster nodes (default: root)
	dest a custom destination directory
	EOF
	}

	case "$1" in
	-v\|--version) echo "@VERSION@ - @BUILD_VERSION@"; exit 0;;
	--features) echo "@VERSION@ - @BUILD_VERSION@: @PKG_FEATURES@"; exit 0;;
	-h\|--help) usage; exit 0;;
	esac

	# Prefer helpers in the same directory if they exist, to simplify development
	if [ ! -f $report_data/report.common ]; then
	report_data=@datadir@/@PACKAGE@
	else
	echo "Using local helpers"
	fi

	. $report_data/report.common

	while true; do
	case "$1" in
	-x) set -x; shift;;
	-v) verbose=`expr $verbose + 1`; shift;;
	-T\|--cts) tests="$tests $2"; shift; shift;;
	--cts-log) ctslog="$2"; shift; shift;;
	-f\|--from) start_time=`get_time "$2"`; shift; shift;;
	-t\|--to) end_time=`get_time "$2"`; shift; shift;;
	-n\|--node\|--nodes) nodes="$nodes $2"; shift; shift;;
	-S\|--single-node) nodes="$nodes $host"; shift;;
	-E\|-l\|--logfile) extra_logs="$extra_logs $2"; shift; shift;;
	-p) sanitize_patterns="$sanitize_patterns $2"; shift; shift;;
	-L) log_patterns="$log_patterns `echo $2 \| sed 's/ /\\\W/g'`"; shift; shift;;
	-d\|--as-directory) compress=0; shift;;
	-A\|--openais) cluster="openais"; shift;;
	-C\|--corosync) cluster="corosync"; shift;;
	-H\|--heartbeat) cluster="heartbeat"; shift;;
	-c\|--cluster) cluster="$2"; shift; shift;;
	-u\|--user) ssh_user="$2"; shift; shift;;
	-v\|--version) echo "@VERSION@ - @BUILD_VERSION@"; exit 0; shift;;
	--features) echo "@VERSION@ - @BUILD_VERSION@: @CRM_FEATURES@"; exit 0; shift;;
	-M) search_logs=0; shift;;
	--) DESTDIR=$2; break;;
	-h\|--help) usage; exit 0;;
	+ # Options for compatability with hb_report
	+ -s) shift;;
	+
	*) echo "Unknown argument: $1"; usage; exit 1;;
	esac
	done


	collect_data() {
	label="$1"
	start=`expr $2 - 10`
	end=`expr $3 + 10`
	masterlog=$4

	if [ "x$DESTDIR" != x ]; then
	debug "Using custom scratch dir: $DESTDIR"
	l_base=$DESTDIR
	r_base=$DESTDIR
	else
	l_base=$HOME/$label
	r_base=$label
	fi
	mkdir -p $l_base

	if [ "x$masterlog" != "x" ]; then
	dumplogset "$masterlog" $start $end > "$l_base/$HALOG_F"
	fi

	cat<<EOF>$l_base/.env
	LABEL="$label"
	REPORT_HOME="$r_base"
	REPORT_MASTER="$host"
	LOG_START=$start
	LOG_END=$end
	REMOVE=1
	SANITIZE="$sanitize_patterns"
	CLUSTER=$cluster
	LOG_PATTERNS="$log_patterns"
	EXTRA_LOGS="$extra_logs"
	SEARCH_LOGS=$search_logs
	verbose=$verbose
	EOF

	for node in $nodes; do
	if [ `uname -n` = $node ]; then
	cat $l_base/.env $report_data/report.common $report_data/report.collector > $r_base/collector
	bash $r_base/collector
	else
	cat $l_base/.env $report_data/report.common $report_data/report.collector \
	\| ssh -l $ssh_user -T $node -- "mkdir -p $r_base; cat > $r_base/collector; bash $r_base/collector" \| (cd $l_base && tar xf -)
	fi
	done

	analyze $l_base > $l_base/$ANALYSIS_F
	if [ -f $l_base/$HALOG_F ]; then
	node_events $l_base/$HALOG_F > $l_base/$EVENTS_F
	fi

	for node in $nodes; do
	cat $l_base/$node/$ANALYSIS_F >> $l_base/$ANALYSIS_F
	if [ -s $l_base/$node/$EVENTS_F ]; then
	cat $l_base/$node/$EVENTS_F >> $l_base/$EVENTS_F
	elif [ -s $l_base/$HALOG_F ]; then
	awk "\$4==\"$nodes\"" $l_base/$EVENTS_F >> $l_base/$n/$EVENTS_F
	fi
	done

	log " "
	if [ $compress = 1 ]; then
	fname=`shrink $l_base`
	rm -rf $l_base
	log "Collected results are available in $fname"
	log " "
	log "Please create a bug entry at"
	log " http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker"
	log "Include a description of your problem and attach this tarball"
	log " "
	log "Thank you for taking time to create this report."
	else
	log "Collected results are available in $l_base"
	fi
	log " "
	}

	#
	# check if files have same content in the cluster
	#
	cibdiff() {
	d1=`dirname $1`
	d2=`dirname $2`
	if [ -f $d1/RUNNING -a -f $d2/RUNNING ] \|\|
	[ -f $d1/STOPPED -a -f $d2/STOPPED ]; then
	if which crm_diff > /dev/null 2>&1; then
	crm_diff -c -n $1 -o $2
	else
	info "crm_diff(8) not found, cannot diff CIBs"
	fi
	else
	echo "can't compare cibs from running and stopped systems"
	fi
	}

	diffcheck() {
	[ -f "$1" ] \|\| {
	echo "$1 does not exist"
	return 1
	}
	[ -f "$2" ] \|\| {
	echo "$2 does not exist"
	return 1
	}
	case `basename $1` in
	$CIB_F) cibdiff $1 $2;;
	$B_CONF) diff -u $1 $2;; # confdiff?
	*) diff -u $1 $2;;
	esac
	}

	#
	# remove duplicates if files are same, make links instead
	#
	consolidate() {
	for n in $NODES; do
	if [ -f $1/$2 ]; then
	rm $1/$n/$2
	else
	mv $1/$n/$2 $1
	fi
	ln -s ../$2 $1/$n
	done
	}

	analyze_one() {
	rc=0
	node0=""
	for n in $NODES; do
	if [ "$node0" ]; then
	diffcheck $1/$node0/$2 $1/$n/$2
	rc=$(($rc+$?))
	else
	node0=$n
	fi
	done
	return $rc
	}

	analyze() {
	flist="$HOSTCACHE $MEMBERSHIP_F $CIB_F $CRM_MON_F $B_CONF logd.cf $SYSINFO_F"
	for f in $flist; do
	printf "Diff $f... "
	ls $1/*/$f >/dev/null 2>&1 \|\| {
	echo "no $1/*/$f :/"
	continue
	}
	if analyze_one $1 $f; then
	echo "OK"
	[ "$f" != $CIB_F ] && consolidate $1 $f
	else
	echo ""
	fi
	done
	}

	do_cts() {
	- if [ x$ctslog = x ]; then
	- ctslog=`findmsg 1 "CTS: Stack:"`
	- fi
	- if [ x$ctslog = x ]; then
	- fatal "No CTS control file detected"
	- fi
	-
	- if [ -z "$nodes" ]; then
	- debug "Using CTS control file: $ctslog"
	- nodes=`grep CTS: $ctslog \| grep -v debug: \| grep " \* " \| sed s:.\\\::g \| sort -u \| tr '\\n' ' '`
	- fi
	-
	test_sets=`echo $tests \| tr ',' ' '`
	for test_set in $test_sets; do
	+
	+ start_time=0
	start_test=`echo $test_set \| tr '-' ' ' \| awk '{print $1}'`
	+
	+ end_time=0
	end_test=`echo $test_set \| tr '-' ' ' \| awk '{print $2}'`

	if [ x$end_test = x ]; then
	msg="Extracting test $start_test"
	label="CTS-`date +"%a-%d-%b-%Y"`-$start_test"
	end_test=`expr $start_test + 1`
	else
	- msg="Extracting set $start_test to $end_test..."
	+ msg="Extracting set $start_test to $end_test"
	label="CTS-`date +"%a-%d-%b-%Y"`-$start_test-$end_test"
	end_test=`expr $end_test + 1`
	fi

	if [ $start_test = 0 ]; then
	start_pat="BEGINNING [0-9].* TESTS"
	else
	start_pat="Running test.\[ $start_test\]"
	fi
	- ctslog=`findmsg 1 "$start_pat"`
	+
	+ if [ x$ctslog = x ]; then
	+ ctslog=`findmsg 1 "$start_pat"`
	+ fi
	+ if [ x$ctslog = x ]; then
	+ fatal "No CTS control file detected"
	+ fi
	+
	line=`grep -n "$start_pat" $ctslog \| tail -1 \| sed 's/:.*//'`
	- start_time=`linetime $ctslog $line`
	+ if [ ! -z "$line" ]; then
	+ start_time=`linetime $ctslog $line`
	+ fi

	- ctslog=`findmsg 1 "Running test.\[ $end_test\]"`
	line=`grep -n "Running test.\[ $end_test\]" $ctslog \| tail -1 \| sed 's/:.*//'`
	- end_time=`linetime $ctslog $line`
	+ if [ ! -z "$line" ]; then
	+ end_time=`linetime $ctslog $line`
	+ fi

	+ if [ -z "$nodes" ]; then
	+ debug "Using CTS control file: $ctslog"
	+ nodes=`grep CTS: $ctslog \| grep -v debug: \| grep " \* " \| sed s:.\\\::g \| sort -u \| tr '\\n' ' '`
	+ fi
	+
	if [ $end_time -lt $start_time ]; then
	debug "Test didn't complete, grabbing everything up to now"
	end_time=`date +%s`
	fi

	- log "$msg (`time2str $start_time` to `time2str $end_time`)"
	- collect_data $label $start_time $end_time $ctslog
	+ if [ $start_time != 0 ];then
	+ log "$msg (`time2str $start_time` to `time2str $end_time`)"
	+ collect_data $label $start_time $end_time $ctslog
	+ else
	+ fatal "$msg failed: not found"
	+ fi
	done
	}

	getnodes() {
	if [ -z $1 ]; then
	cluster=`get_cluster_type`
	else
	cluster=$1
	fi

	cluster_cf=`find_cluster_cf $cluster`
	# 1. Live
	if
	ps -ef \| egrep -qs [c]rmd
	then
	debug "Querying CRM for nodes"
	cibadmin -Ql -o nodes \| awk '
	/type="normal"/ {
	for( i=1; i<=NF; i++ )
	if( $i~/^uname=/ ) {
	sub("uname=.","",$i);
	sub("\".*","",$i);
	print $i;
	next;
	}
	}
	'

	# 2. hostcache
	elif [ -f $HA_STATE_DIR/hostcache ]; then
	debug "Reading nodes from $HA_STATE_DIR/hostcache"
	awk '{print $1}' $HA_STATE_DIR/hostcache

	# 3. ha.cf
	elif [ "x$cluster" = "xheartbeat" ]; then
	debug "Reading nodes from $cluster_cf"
	getcfvar $cluster node $cluster_cf

	else
	# Look in the logs...
	logfile=`findmsg 1 "crm_update_peer"`
	debug "Reading nodes from $logfile"
	if [ ! -z "$logfile" ]; then
	grep crm_update_peer: $logfile \| sed s/.*crm_update_peer// \| sed s/://g \| awk '{print $2}' \| grep -v "(null)" \| sort -u \| tr '\n' ' '
	fi
	fi
	}

	if [ "x$tests" != "x" ]; then
	do_cts

	elif [ "x$start_time" != "x" ]; then
	masterlog=""
	+
	+ if [ -z "$sanitize_patterns" ]; then
	+ log "WARNING: The tarball produced by this program may contain"
	+ log " sensitive information such as passwords."
	+ log ""
	+ log "We will attempt to remove such information if you use the"
	+ log "-p option. For example: -p \"pass.\" -p \"user.\""
	+ log ""
	+ log "However, doing this may reduce the ability for the recipients"
	+ log "to diagnose issues and generally provide assistance."
	+ log ""
	+ log "IT IS YOUR RESPONSIBILITY TO PROTECT SENSITIVE DATA FROM EXPOSURE"
	+ log ""
	+ fi
	+
	if [ -z "$nodes" ]; then
	nodes=`getnodes $cluster`
	log "Calculated node list: $nodes"
	fi

	if [ -z "$nodes" ]; then
	fatal "Cannot determine node list, please specify manually with --nodes"
	fi

	if
	echo $nodes \| grep -qs $host
	then
	debug "We are a cluster node"
	else
	debug "We are a log master"
	masterlog=`findmsg 1 "crmd\\\|CTS"`
	fi


	if [ -z $end_time ]; then
	end_time=`perl -e 'print time()'`
	fi
	label="pcmk-`date +"%a-%d-%b-%Y"`"
	log "Collecting data from $nodes (`time2str $start_time` to `time2str $end_time`)"
	collect_data $label $start_time $end_time $masterlog
	else
	- fatal "Not sure what to do, no tests or times to extract"
	+ fatal "Not sure what to do, no tests or time ranges to extract"
	fi

	diff --git a/tools/hb2openais.sh.in b/tools/hb2openais.sh.in
	index 2acd9816bc..6723c3d789 100755
	--- a/tools/hb2openais.sh.in
	+++ b/tools/hb2openais.sh.in
	@@ -1,804 +1,804 @@
	#!/bin/bash

	# Copyright (C) 2008,2009 Dejan Muhamedagic <dmuhamedagic@suse.de>
	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public
	# License as published by the Free Software Foundation; either
	# version 2.1 of the License, or (at your option) any later version.
	#
	# This software is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# General Public License for more details.
	#
	# You should have received a copy of the GNU General Public
	# License along with this library; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	#

	. @sysconfdir@/ha.d/shellfuncs
	# utillib.sh moved (sigh!)
	# cluster-glue doesn't make its shared data dir available
	# we guess, and guess that that's safe, that the datadir is the same
	testdirs="@datadir@/cluster-glue $HA_NOARCHBIN/utillib.sh"
	for d in $testdirs; do
	if [ -f $d/utillib.sh ]; then
	NOARCH_DIR=$d
	break
	fi
	done
	test -f $NOARCH_DIR/utillib.sh \|\| {
	echo "sorry, could not find utillib.sh in $testdirs"
	exit 1
	}

	. $NOARCH_DIR/utillib.sh
	. $NOARCH_DIR/ha_cf_support.sh

	PROG=`basename $0`
	PROGDIR=`dirname $0`

	# the default syslog facility is not (yet) exported by heartbeat
	# to shell scripts
	#
	DEFAULT_HA_LOGFACILITY="daemon"
	export DEFAULT_HA_LOGFACILITY

	: ${SSH_OPTS="-T"}

	usage() {
	cat<<EOF

	usage: $PROG [-UF] [-u user] [-T directory] [revert]

	-U: skip upgrade the CIB to v1.0
	-F: force conversion despite it being done beforehand
	-u user: a user to sudo with (otherwise, you'd
	have to run this as root)
	-C: force conversion to corosync (default is openais)
	-T directory: a directory containing ha.cf/logd.cf/cib.xml/hostcache
	(use for testing); with this option files are not
	copied to other nodes and there are no destructive
	commands executed; you may run as unprivileged uid

	EOF
	exit
	}

	SUDO_USER=""
	MYSUDO=""
	TEST_DIR=""
	FORCE=""
	UPGRADE="1"
	while getopts UFCu:T:h o; do
	case "$o" in
	h) usage;;
	U) UPGRADE="";;
	F) FORCE=1;;
	u) SUDO_USER="$OPTARG";;
	T) TEST_DIR="$OPTARG";;
	C) COROSYNC="1";;
	[?]) usage;;
	esac
	done
	shift $(($OPTIND-1))
	[ $# -gt 1 ] && usage
	[ "$TEST_DIR" ] && [ $# -ne 0 ] && usage

	if [ "$COROSYNC" -o -d /etc/corosync ]; then
	AIS_CONF=/etc/corosync/corosync.conf
	AIS_KEYF=/etc/corosync/authkey
	KEYGEN_PROG="corosync-keygen"
	COROSYNC=1
	else
	AIS_CONF=/etc/ais/openais.conf
	AIS_KEYF=/etc/ais/authkey
	KEYGEN_PROG="ais-keygen"
	COROSYNC=""
	fi
	AIS_CONF_BASE=`basename $AIS_CONF`
	PRODUCT=`basename $AIS_CONF_BASE .conf`
	AUTHENTICATION=on
	MAXINTERFACE=2
	MCASTPORT=5405
	RRP_MODE=active
	SUPPORTED_RESPAWNS="pingd evmsd"

	PY_HELPER=$HA_BIN/hb2openais-helper.py
	CRM_VARLIB=$HA_VARLIB/crm
	CIB=$CRM_VARLIB/cib.xml
	CIBSIG=$CRM_VARLIB/cib.xml.sig
	CIBLAST=$CRM_VARLIB/cib.xml.last
	CIBLAST_SIG=$CRM_VARLIB/cib.xml.sig.last
	HOSTCACHE=$HA_VARLIB/hostcache
	HB_UUID=$HA_VARLIB/hb_uuid
	DONE_F=$HA_VARRUN/heartbeat/.$PROG.conv_done
	BACKUPDIR=/var/tmp/`basename $PROG .sh`.backup
	RM_FILES=" $CIBSIG $CIBLAST $CIBLAST_SIG"
	REMOTE_RM_FILES=" $CIB $RM_FILES"
	BACKUP_FILES=" $AIS_CONF $AIS_KEYF $REMOTE_RM_FILES "
	DIST_FILES=" $AIS_CONF $AIS_KEYF $DONE_F "
	MAN_TARF=/var/tmp/`basename $PROG .sh`.tar.gz

	if [ "$TEST_DIR" ]; then
	cp $TEST_DIR/cib.xml $TEST_DIR/cib-out.xml
	CIB=$TEST_DIR/cib-out.xml
	HOSTCACHE=$TEST_DIR/hostcache
	HA_CF=$TEST_DIR/ha.cf
	AIS_CONF=$TEST_DIR/$AIS_CONF_BASE
	if [ "$SUDO_USER" ]; then
	warning "-u option ignored when used with -T"
	fi
	else
	ps -ef \| grep -wqs [c]rmd &&
	fatal "you must first stop heartbeat on _all_ nodes"
	if [ "$SUDO_USER" ]; then
	MYSUDO="sudo -u $SUDO_USER"
	fi
	fi

	CIB_file=$CIB
	CONF=$HA_CF
	LOGD_CF=`findlogdcf $TEST_DIR /etc $HA_DIR`
	export CIB_file LOGD_CF

	prerequisites() {
	test -f $HA_CF \|\|
	fatal "$HA_CF does not exist: cannot proceed"
	iscfvartrue crm \|\| grep -w "^crm" $HA_CF \| grep -wqs respawn \|\|
	fatal "crm is not enabled: we cannot convert v1 configurations"
	$DRY test -f $CIB \|\|
	fatal "CIB $CIB does not exist: cannot proceed"
	[ "$FORCE" ] && rm -f "$DONE_F"
	if [ -f "$DONE_F" ]; then
	info "Conversion to OpenAIS already done, exiting"
	exit 0
	fi
	}
	# some notes about unsupported stuff
	unsupported() {
	respawned_progs=`awk '/^respawn/{print $3}' $HA_CF \|while read p; do basename $p; done`
	grep -qs "^serial" $HA_CF &&
	warning "serial media is not supported by OpenAIS"
	for prog in $respawned_progs; do
	case $prog in
	mgmtd\|pingd\|evmsd) : these are fine
	;;
	*)
	warning "program $prog is being controlled by heartbeat (thru respawn)"
	warning "you have to find another way of running it"
	;;
	esac
	done
	}
	#
	# find nodes for this cluster
	#
	getnodes() {
	# 1. hostcache
	if [ -f $HOSTCACHE ]; then
	awk '{print $1}' $HOSTCACHE
	return
	fi
	# 2. ha.cf
	getcfvar node
	}
	#
	# does ssh work?
	#
	testsshuser() {
	if [ "$2" ]; then
	ssh -T -o Batchmode=yes $2@$1 true 2>/dev/null
	else
	ssh -T -o Batchmode=yes $1 true 2>/dev/null
	fi
	}
	findsshuser() {
	for u in "" $TRY_SSH; do
	rc=0
	for n in `getnodes`; do
	[ "$node" = "$WE" ] && continue
	testsshuser $n $u \|\| {
	rc=1
	break
	}
	done
	if [ $rc -eq 0 ]; then
	echo $u
	return 0
	fi
	done
	return 1
	}
	important() {
	echo "IMPORTANT: $*" >&2
	}
	newportinfo() {
	important "the multicast port number on $1 is set to $2"
	important "please update your firewall rules (if any)"
	}
	changemediainfo() {
	important "$PRODUCT uses multicast for communication"
	important "please make sure that your network infrastructure supports it"
	}
	multicastinfo() {
	info "multicast for $PRODUCT ring $1 set to $2:$3"
	}
	netaddrinfo() {
	info "network address for $PRODUCT ring $1 set to $2"
	}
	backup_files() {
	[ "$TEST_DIR" ] && return
	info "backing up $BACKUP_FILES to $BACKUPDIR"
	$DRY mkdir $BACKUPDIR \|\| {
	echo sorry, could not create $BACKUPDIR directory
	echo please cleanup
	exit 1
	}
	if [ -z "$DRY" ]; then
	tar cf - $BACKUP_FILES \| gzip > $BACKUPDIR/$WE.tar.gz \|\| {
	echo sorry, could not create $BACKUPDIR/$WE.tar.gz
	exit 1
	}
	else
	$DRY "tar cf - $BACKUP_FILES \| gzip > $BACKUPDIR/$WE.tar.gz"
	fi
	}
	revert() {
	[ "$TEST_DIR" ] && return
	test -d $BACKUPDIR \|\| {
	echo sorry, there is no $BACKUPDIR directory
	echo cannot revert
	exit 1
	}
	info "restoring $BACKUP_FILES from $BACKUPDIR/$WE.tar.gz"
	gzip -dc $BACKUPDIR/$WE.tar.gz \| (cd / && tar xf -) \|\| {
	echo sorry, could not unpack $BACKUPDIR/$WE.tar.gz
	exit 1
	}
	}
	pls_press_enter() {
	[ "$TEST_DIR" ] && return
	cat<<EOF

	Please press enter to continue or ^C to exit ...
	EOF
	read junk
	echo ""
	}
	introduction() {
	cat<<EOF

	This is a Heartbeat to OpenAIS conversion tool.

	* IMPORTANT * IMPORTANT * IMPORTANT * IMPORTANT * IMPORTANT *

	Please read this and don't proceed before understanding what
	we try to do and what is required.

	1. You need to know your cluster in detail. This program will
	inform you on changes it makes. It is up to you to verify
	that the changes are meaningful. We will also probably ask
	some questions now and again.

	2. This procedure is supposed to be run on one node only.
	Although the main cluster configuration (the CIB) is
	automatically replicated, there are some things which have to
	be copied by other means. For that to work, we need sshd
	running on all nodes and root access working.

	3. Do not run this procedure on more than one node!
	EOF
	pls_press_enter
	cat<<EOF
	The procedure consists of two parts: the OpenAIS
	configuration and the Pacemaker/CRM CIB configuration.

	The first part is obligatory. The second part may be skipped
	unless your cluster configuration requires changes due to the
	change from Heartbeat to OpenAIS.

	We will try to analyze your configuration and let you know
	whether the CIB configuration should be changed as well.
	However, you will still have a choice to skip the CIB
	mangling part in case you want to do that yourself.

	The next step is to create the OpenAIS configuration. If you
	want to leave, now is the time to interrupt the program.
	EOF
	pls_press_enter
	}
	confirm() {
	while :; do
	printf "$1 (y/n) "
	read ans
	if echo $ans \| grep -iqs '^[yn]'; then
	echo $ans \| grep -iqs '^y'
	return $?
	else
	echo Please answer with y or n
	fi
	done
	}
	want_to_proceed() {
	[ "$TEST_DIR" ] && return 0
	confirm "Do you want to proceed?"
	}
	intro_part2() {
	cat<<EOF

	The second part of the configuration deals with the CIB.
	According to our analysis (you should have seen some
	messages), this step is necessary.
	EOF
	want_to_proceed \|\| return
	}

	gethbmedia() {
	grep "^[bum]cast" $HA_CF
	}
	pl_ipcalc() {
	perl -e '
	# stolen from internet!
	my $ipaddr=$ARGV[0];
	my $nmask=$ARGV[1];
	my @addrarr=split(/\./,$ipaddr);
	my ( $ipaddress ) = unpack( "N", pack( "C4",@addrarr ) );
	my @maskarr=split(/\./,$nmask);
	my ( $netmask ) = unpack( "N", pack( "C4",@maskarr ) );
	# Calculate network address by logical AND operation of addr &
	# netmask
	# and convert network address to IP address format
	my $netadd = ( $ipaddress & $netmask );
	my @netarr=unpack( "C4", pack( "N",$netadd ) );
	my $netaddress=join(".",@netarr);
	print "$netaddress\n";
	' $1 $2
	}
	get_if_val() {
	test "$1" \|\| return
	awk -v key=$1 '
	{ for( i=1; i<=NF; i++ )
	if( match($i,key) ) {
	sub(key,"",$i);
	print $i
	exit
	}
	}'
	}
	netaddress() {
	ip=`ifconfig $1 \| grep 'inet addr:' \| get_if_val addr:`
	mask=`ifconfig $1 \| grep 'Mask:' \| get_if_val Mask:`
	if test "$mask"; then
	pl_ipcalc $ip $mask
	else
	warning "could not get the network mask for interface $1"
	fi
	}

	sw=0
	do_tabs() {
	for i in `seq $sw`; do printf "\t"; done
	}
	newstanza() {
	do_tabs
	printf "%s {\n" $1
	- let sw=sw+1
	+ sw=$(($sw+1))
	}
	endstanza() {
	- let sw=sw-1
	+ sw=$(($sw-1))
	do_tabs
	printf "}\n"
	}
	setvalue() {
	name=$1
	val=$2
	test "$val" \|\| {
	warning "sorry, no value set for $name"
	}
	do_tabs
	echo "$name: $val"
	}
	setcomment() {
	do_tabs
	echo "# $*"
	}
	setdebug() {
	[ "$HA_LOGLEVEL" = debug ] &&
	echo "on" \|\| echo "off"
	}

	WE=`uname -n` # who am i?

	if [ "$1" = revert ]; then
	revert
	exit
	fi

	test -d $BACKUPDIR &&
	fatal "please remove the backup directory: $BACKUPDIR"

	prerequisites

	introduction

	backup_files

	unsupported

	# 1. Generate the openais.conf

	prochbmedia() {
	while read media_type iface address rest; do
	info "Processing interface $iface of type $media_type ..."
	case "$media_type" in
	ucast\|bcast) mcastaddr=226.94.1.1 ;;
	mcast) mcastaddr=$address ;;
	esac
	if [ -z "$local_mcastport" ]; then
	local_mcastport="$MCASTPORT"
	fi
	netaddress="`netaddress $iface`"
	if [ "$netaddress" ]; then
	let local_mcastport=$local_mcastport+1
	newportinfo $iface $local_mcastport
	echo "$netaddress" "$mcastaddr" "$local_mcastport"
	else
	warning "cannot process interface $iface!"
	fi
	done
	}

	openaisconf() {

	info "Generating $AIS_CONF from $HA_CF ..."

	# the totem stanza

	cpunum=`grep -c ^processor /proc/cpuinfo`
	setcomment "Generated by hb2openais on `date`"
	setcomment "Please read the $AIS_CONF_BASE.5 manual page"

	[ "$COROSYNC" ] &&
	setvalue compatibility whitetank

	newstanza aisexec
	setcomment "Run as root - this is necessary to be able to manage resources with Pacemaker"
	setvalue user root
	setvalue group root
	endstanza

	newstanza service
	setcomment "Load the Pacemaker Cluster Resource Manager"
	setvalue name pacemaker
	setvalue ver 0
	if uselogd; then
	setvalue use_logd yes
	important "Make sure that the logd service is started (chkconfig logd on)"
	fi
	if grep -qs "^respawn.*mgmtd" $HA_CF; then
	setvalue use_mgmtd yes
	fi
	endstanza

	newstanza totem
	setvalue version 2
	setcomment "How long before declaring a token lost (ms)"
	setvalue token 1000
	setcomment "How many token retransmits before forming a new configuration"
	setvalue token_retransmits_before_loss_const 20
	setcomment "How long to wait for join messages in the membership protocol (ms)"
	setvalue join 50
	setcomment "How long to wait for consensus to be achieved before"
	setcomment "starting a new round of membership configuration (ms)"
	setvalue consensus 1200
	setcomment "Turn off the virtual synchrony filter"
	setvalue vsftype none
	setcomment "Number of messages that may be sent by one processor on receipt of the token"
	setvalue max_messages 20
	setcomment "Limit generated nodeids to 31-bits (positive signed integers)"
	setvalue clear_node_high_bit yes
	setcomment "Enable encryption"
	setvalue secauth $AUTHENTICATION
	if [ "$AUTHENTICATION" = on ]; then
	setvalue threads $cpunum
	else
	setvalue threads 0
	fi
	setcomment "Optionally assign a fixed node id (integer)"
	setcomment "nodeid: 1234"
	ring=0
	gethbmedia \| prochbmedia \|
	sort -u \|
	while read network addr port; do
	if [ $ring -ge $MAXINTERFACE ]; then
	warning "$PRODUCT supports only $MAXINTERFACE rings!"
	info "consider bonding interfaces"
	warning "skipping communication link on $network"
	setcomment "$network skipped: too many rings"
	continue
	fi
	newstanza interface
	setvalue ringnumber $ring
	setvalue bindnetaddr $network
	netaddrinfo $ring $network
	multicastinfo $ring $addr $port
	setvalue mcastport $port
	setvalue mcastaddr $addr
	- let ring=$ring+1
	+ ring=$(($ring+1))
	endstanza
	done
	mediacnt=`gethbmedia 2>/dev/null \| prochbmedia 2>/dev/null \| sort -u \| wc -l`
	if [ $mediacnt -ge 2 ]; then
	setvalue rrp_mode $RRP_MODE
	fi
	changemediainfo
	endstanza

	# the logging stanza

	getlogvars
	# enforce some syslog facility
	[ "$COROSYNC" ] &&
	TO_FILE=to_logfile \|\|
	TO_FILE=to_file
	debugsetting=`setdebug`
	newstanza logging
	setvalue debug $debugsetting
	setvalue fileline off
	setvalue to_stderr no
	setvalue timestamp off
	if [ "$HA_LOGFILE" ]; then
	setvalue $TO_FILE yes
	setvalue logfile $HA_LOGFILE
	else
	setvalue $TO_FILE no
	fi
	if [ "$HA_LOGFACILITY" ]; then
	setvalue to_syslog yes
	setvalue syslog_facility $HA_LOGFACILITY
	else
	setvalue to_syslog no
	fi
	newstanza logger_subsys
	setvalue subsys AMF
	setvalue debug $debugsetting
	endstanza
	endstanza

	newstanza amf
	setvalue mode disabled
	endstanza

	}

	if [ -z "$DRY" ]; then
	openaisconf > $AIS_CONF \|\|
	fatal "cannot create $AIS_CONF"
	grep -wqs interface $AIS_CONF \|\|
	fatal "no media found in $HA_CF"
	else
	openaisconf
	fi

	[ "$AIS_KEYF" ] && {
	info "Generating a key for OpenAIS authentication ..."
	if [ "$TEST_DIR" ]; then
	echo would run: $DRY $KEYGEN_PROG
	else
	$DRY $KEYGEN_PROG \|\|
	fatal "cannot generate the key using $KEYGEN_PROG"
	fi
	}

	# remove various files which could get in a way

	if [ -z "$TEST_DIR" ]; then
	$DRY rm -f $RM_FILES
	fi

	fixcibperms() {
	[ "$TEST_DIR" ] && return
	uid=`ls -ldn $CRM_VARLIB \| awk '{print $3}'`
	gid=`ls -ldn $CRM_VARLIB \| awk '{print $4}'`
	$DRY $MYSUDO chown $uid:$gid $CIB
	}
	upgrade_cib() {
	$DRY $MYSUDO cibadmin --upgrade --force
	$DRY $MYSUDO crm_verify -V -x $CIB_file
	}
	py_proc_cib() {
	tmpfile=`maketempfile`
	$MYSUDO sh -c "python $PY_HELPER $* <$CIB >$tmpfile" \|\|
	fatal "cannot process cib: $PY_HELPER $*"
	$DRY $MYSUDO mv $tmpfile $CIB
	}
	set_property() {
	py_proc_cib set_property $*
	}

	# remove the nodes section from the CIB
	py_proc_cib set_node_ids
	info "Edited the nodes' ids in the CIB"

	numnodes=`getnodes \| wc -w`
	[ $numnodes -eq 2 ] &&
	set_property no-quorum-policy ignore

	set_property expected-nodes $numnodes overwrite

	info "Done converting ha.cf to $AIS_CONF_BASE"
	important "Please check the resulting $AIS_CONF"
	important "and in particular interface stanzas and logging."
	important "If you find problems, please edit $AIS_CONF now!"
	#
	# first part done (openais), on to the CIB

	analyze_cib() {
	info "Analyzing the CIB..."
	$MYSUDO sh -c "python $PY_HELPER analyze_cib <$CIB"
	}
	check_respawns() {
	rc=1
	for p in $SUPPORTED_RESPAWNS; do
	grep -qs "^respawn.*$p" $HA_CF && {
	info "a $p resource has to be created"
	rc=0
	}
	done
	return $rc
	}

	part2() {
	intro_part2 \|\| return 0
	opts="-c $HA_CF"
	[ "$TEST_DIR" ] && opts="-T $opts"
	py_proc_cib $opts convert_cib
	info "Processed the CIB successfully"
	}
	# make the user believe that something's happening :)
	some_dots_idle() {
	[ "$TEST_DIR" ] && return
	cnt=0
	printf "$2 ."
	while [ $cnt -lt $1 ]; do
	sleep 1
	printf "."
	ctn=$((cnt+1))
	done
	echo
	}
	print_dc() {
	crm_mon -1 \| awk '/Current DC/{print $3}'
	}
	dcidle() {
	dc=`$MYSUDO print_dc`
	if [ "$dc" = "$WE" ]; then
	maxcnt=60 cnt=0
	while [ $cnt -lt $maxcnt ]; do
	stat=`$MYSUDO crmadmin -S $dc`
	echo $stat \| grep -qs S_IDLE && break
	[ "$1" = "-v" ] && echo $stat
	sleep 1
	printf "."
	cnt=$((cnt+1))
	done
	echo $stat \| grep -qs S_IDLE
	else
	some_dots_idle 10 #just wait for 10 seconds
	fi
	}
	wait_crm() {
	[ "$TEST_DIR" ] && return
	cnt=10
	dc=""
	while [ -z "$dc" -a $cnt -gt 0 ]; do
	dc=`$MYSUDO print_dc`
	cnt=$((cnt-1))
	done

	if [ x = x"$dc" ]; then
	echo "sorry, no dc found/elected"
	exit 1
	fi
	dcidle
	}
	manage_cluster() {
	if [ "$TEST_DIR" ]; then
	echo would run: /etc/init.d/openais $1
	else
	$DRY /etc/init.d/openais $1
	fi
	}
	tune_ocfs2() {
	cat<<EOF
	The ocfs2 metadata has to change to reflect the cluster stack
	change. To do that, we have to start the cluster stack on
	this node.
	EOF
	pls_press_enter
	py_proc_cib manage_ocfs2 stop
	manage_cluster start
	some_dots_idle 10 "waiting for crm to start"
	if $DRY wait_crm; then
	for fsdev; do
	info "converting the ocfs2 meta-data on $fsdev"
	if [ "$TEST_DIR" ]; then
	echo would run: tunefs.ocfs2 --update-cluster-stack -y $fsdev
	else
	$DRY tunefs.ocfs2 --update-cluster-stack -y $fsdev
	fi
	done
	else
	fatal "could not start pacemaker; please check the logs"
	fi
	manage_cluster stop
	py_proc_cib manage_ocfs2 start
	}
	convert_csm() {
	info "converting all EVMS2 CSM containers"
	if [ "$TEST_DIR" ]; then
	echo would run: /usr/sbin/csm-converter --scan
	else
	$DRY /usr/sbin/csm-converter --scan \|\|
	fatal "CSM conversion failed! Aborting"
	fi
	}

	analyze_cib
	rc=$?
	[ $rc -gt 1 ] && fatal "error while analyzing CIB"
	if [ $rc -eq 1 ] \|\| check_respawns; then
	part2
	else
	info "No need to process CIB further"
	fi

	# upgrade the CIB to v1.0
	if [ "$UPGRADE" ]; then
	upgrade_cib
	info "Upgraded the CIB to v1.0"
	else
	info "Skipped upgrading the CIB to v1.0"
	important "You should do this sooner rather than later!"
	fi
	fixcibperms

	convert_csm
	ocfs2_devs=`$MYSUDO sh -c "python $PY_HELPER $opts print_ocfs2_devs <$CIB"`
	[ "$ocfs2_devs" ] &&
	tune_ocfs2 $ocfs2_devs

	[ "$TEST_DIR" ] && exit

	$DRY touch $DONE_F

	# finally, copy files to all nodes
	info "Copying files to other nodes ..."
	info "(please provide root password if prompted)"
	ssh_opts="-l root $SSH_OPTS"
	rc=0
	for node in `getnodes`; do
	[ "$node" = "$WE" ] &&
	continue
	if [ "$DRY" ]; then
	$DRY "(cd / && tar cf - $DIST_FILES) \|
	ssh $ssh_opts $node \"rm -f $REMOTE_RM_FILES &&
	cd / && tar xf -\""
	else
	echo "Copying to node $node ..."
	(cd / && tar cf - $DIST_FILES) \|
	ssh $ssh_opts $node "rm -f $REMOTE_RM_FILES &&
	cd / && tar xf -"
	- let rc=$rc+$?
	+ rc=$(($rc+$?))
	fi
	done
	info "Done transfering files"
	if [ $rc -ne 0 ]; then
	warning "we could not update some ssh nodes"
	important "before starting the cluster stack on those nodes:"
	important "copy and unpack $MAN_TARF (from the / directory)"
	important "and execute: rm -f $REMOTE_RM_FILES"
	(cd / && tar cf - $DIST_FILES \| gzip > $MAN_TARF)
	fi
	diff --git a/tools/report.collector b/tools/report.collector
	index c708723372..930047e433 100644
	--- a/tools/report.collector
	+++ b/tools/report.collector
	@@ -1,651 +1,654 @@
	# Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
	# Almost everything as part of hb_report
	# Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
	# Cleanups, refactoring, extensions
	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public
	# License as published by the Free Software Foundation; either
	# version 2.1 of the License, or (at your option) any later version.
	#
	# This software is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# General Public License for more details.
	#
	# You should have received a copy of the GNU General Public
	# License along with this library; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	#

	if
	echo $REPORT_HOME \| grep -qs '^/'
	then
	- debug "Using full path to working directory"
	+ debug "Using full path to working directory: $REPORT_HOME"
	else
	- debug "Canonicalizing working directory path"
	REPORT_HOME="$HOME/$REPORT_HOME"
	+ debug "Canonicalizing working directory path: $REPORT_HOME"
	fi

	findlogdcf() {
	for f in \
	`test -x $CRM_DAEMON_DIR/ha_logd &&
	which strings > /dev/null 2>&1 &&
	strings $CRM_DAEMON_DIR/ha_logd \| grep 'logd\.cf'` \
	`for d; do echo $d/logd.cf $d/ha_logd.cf; done`
	do
	if [ -f "$f" ]; then
	echo $f
	debug "Located logd.cf at: $f"
	return 0
	fi
	done
	debug "Could not determine logd.cf location"
	return 1
	}

	#
	# find files newer than a and older than b
	#
	isnumber() {
	echo "$" \| grep -qs '^[0-9][0-9]$'
	}

	touchfile() {
	t=`mktemp` &&
	perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' &&
	echo $t
	}

	find_files_clean() {
	[ -z "$from_stamp" ] \|\| rm -f "$from_stamp"
	[ -z "$to_stamp" ] \|\| rm -f "$to_stamp"
	from_stamp=""
	to_stamp=""
	}

	find_files() {
	dirs=$1
	from_time=$2
	to_time=$3
	isnumber "$from_time" && [ "$from_time" -gt 0 ] \|\| {
	warning "sorry, can't find files in [ $1 ] based on time if you don't supply time"
	return
	}
	trap find_files_clean 0
	if ! from_stamp=`touchfile $from_time`; then
	warning "sorry, can't create temporary file for find_files"
	return
	fi
	findexp="-newer $from_stamp"
	if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then
	if ! to_stamp=`touchfile $to_time`; then
	warning "sorry, can't create temporary file for find_files"
	find_files_clean
	return
	fi
	findexp="$findexp ! -newer $to_stamp"
	fi
	find $dirs -type f $findexp
	find_files_clean
	trap "" 0
	}

	#
	# check permissions of files/dirs
	#
	pl_checkperms() {
	perl -e '
	# check permissions and ownership
	# uid and gid are numeric
	# everything must match exactly
	# no error checking! (file should exist, etc)
	($filename, $perms, $in_uid, $in_gid) = @ARGV;
	($mode,$uid,$gid) = (stat($filename))[2,4,5];
	$p=sprintf("%04o", $mode & 07777);
	$p ne $perms and exit(1);
	$uid ne $in_uid and exit(1);
	$gid ne $in_gid and exit(1);
	' $*
	}

	num_id() {
	getent $1 $2 \| awk -F: '{print $3}'
	}

	chk_id() {
	[ "$2" ] && return 0
	echo "$1: id not found"
	return 1
	}

	check_perms() {
	while read type f p uid gid; do
	[ -$type $f ] \|\| {
	echo "$f wrong type or doesn't exist"
	continue
	}
	n_uid=`num_id passwd $uid`
	chk_id "$uid" "$n_uid" \|\| continue
	n_gid=`num_id group $gid`
	chk_id "$gid" "$n_gid" \|\| continue
	pl_checkperms $f $p $n_uid $n_gid \|\| {
	echo "wrong permissions or ownership for $f:"
	ls -ld $f
	}
	done
	}

	#
	# coredumps
	#
	findbinary() {
	random_binary=`which cat 2>/dev/null` # suppose we are lucky
	binary=`gdb $random_binary $1 < /dev/null 2>/dev/null \|
	grep 'Core was generated' \| awk '{print $5}' \|
	sed "s/^.//;s/[.':]*$//"`
	if [ x = x"$binary" ]; then
	debug "Could not detect the program name for core $1 from the gdb output; will try with file(1)"
	binary=$(file $1 \| awk '/from/{
	for( i=1; i<=NF; i++ )
	if( $i == "from" ) {
	print $(i+1)
	break
	}
	}')
	binary=`echo $binary \| tr -d "'"`
	binary=$(echo $binary \| tr -d '`')
	if [ "$binary" ]; then
	binary=`which $binary 2>/dev/null`
	fi
	fi
	if [ x = x"$binary" ]; then
	warning "Could not find the program path for core $1"
	return
	fi
	fullpath=`which $binary 2>/dev/null`
	if [ x = x"$fullpath" ]; then
	if [ -x $CRM_DAEMON_DIR/$binary ]; then
	echo $CRM_DAEMON_DIR/$binary
	debug "Found the program at $CRM_DAEMON_DIR/$binary for core $1"
	else
	warning "Could not find the program path for core $1"
	fi
	else
	echo $fullpath
	debug "Found the program at $fullpath for core $1"
	fi
	}

	getbt() {
	which gdb > /dev/null 2>&1 \|\| {
	warning "Please install gdb to get backtraces"
	return
	}
	for corefile; do
	absbinpath=`findbinary $corefile`
	[ x = x"$absbinpath" ] && continue
	echo "====================== start backtrace ======================"
	ls -l $corefile
	gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \
	$absbinpath $corefile 2>/dev/null
	echo "======================= end backtrace ======================="
	done
	}

	getconfig() {
	target=$1; shift;

	for cf in $*; do
	- if [ -f "$cf" ]; then
	- cp -p "$cf" $target/
	+ if [ -e "$cf" ]; then
	+ cp -a "$cf" $target/
	fi
	done

	crm_uuid -r > $target/$HB_UUID_F 2>&1

	if
	ps -ef \| egrep -qs [c]rmd
	then
	crm_mon -1 2>&1 \| grep -v '^Last upd' > $target/$CRM_MON_F
	- cibadmin -Ql > $target/${CIB_F}.live
	+ cibadmin -Ql 2>/dev/null > $target/${CIB_F}.live
	crm_node -p > $target/$MEMBERSHIP_F 2>&1
	echo "$host" > $target/RUNNING
	else
	echo "$host" > $target/STOPPED
	fi

	if [ -f "$target/$CIB_F" ]; then
	crm_verify -V -x $target/$CIB_F >$target/$CRM_VERIFY_F 2>&1
	CIB_file=$target/$CIB_F crm configure show >$target/$CIB_TXT_F 2>&1
	fi
	}

	#
	# remove values of sensitive attributes
	#
	# this is not proper xml parsing, but it will work under the
	# circumstances
	sanitize_xml_attrs() {
	sed $(
	for patt in $SANITIZE; do
	echo "-e /name=\"$patt\"/s/value=\"[^\"]\"/value=\"***\"/"
	done
	)
	}

	sanitize_hacf() {
	awk '
	$1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; }
	{print}
	'
	}

	sanitize_one_clean() {
	[ -z "$tmp" ] \|\| rm -f "$tmp"
	tmp=""
	[ -z "$ref" ] \|\| rm -f "$ref"
	ref=""
	}

	sanitize() {
	file=$1
	compress=""
	if [ -z $SANITIZE ]; then
	return
	fi
	echo $file \| grep -qs 'gz$' && compress=gzip
	echo $file \| grep -qs 'bz2$' && compress=bzip2
	if [ "$compress" ]; then
	decompress="$compress -dc"
	else
	compress=cat
	decompress=cat
	fi
	trap sanitize_one_clean 0
	tmp=`mktemp`
	ref=`mktemp`
	if [ -z "$tmp" -o -z "$ref" ]; then
	sanitize_one_clean
	fatal "cannot create temporary files"
	fi
	touch -r $file $ref # save the mtime
	if [ "`basename $file`" = ha.cf ]; then
	sanitize_hacf
	else
	$decompress \| sanitize_xml_attrs \| $compress
	fi < $file > $tmp
	mv $tmp $file
	# note: cleaning $tmp up is still needed even after it's renamed
	# because its temp directory is still there.

	touch -r $ref $file
	sanitize_one_clean
	trap "" 0
	}

	pickfirst() {
	for x; do
	which $x >/dev/null 2>&1 && {
	echo $x
	return 0
	}
	done
	return 1
	}

	#
	# get some system info
	#
	distro() {
	if
	which lsb_release >/dev/null 2>&1
	then
	lsb_release -d
	debug "Using lsb_release for distribution info"
	return
	fi

	relf=`ls /etc/debian_version 2>/dev/null` \|\|
	relf=`ls /etc/slackware-version 2>/dev/null` \|\|
	relf=`ls -d /etc/*-release 2>/dev/null` && {
	for f in $relf; do
	test -f $f && {
	echo "`ls $f` `cat $f`"
	debug "Found `echo $relf \| tr '\n' ' '` distribution release file(s)"
	return
	}
	done
	}
	warning "No lsb_release, no /etc/*-release, no /etc/debian_version: no distro information"
	}

	pkg_ver() {
	if which dpkg >/dev/null 2>&1 ; then
	pkg_mgr="deb"
	elif which rpm >/dev/null 2>&1 ; then
	pkg_mgr="rpm"
	elif which pkg_info >/dev/null 2>&1 ; then
	pkg_mgr="pkg_info"
	elif which pkginfo >/dev/null 2>&1 ; then
	pkg_mgr="pkginfo"
	else
	warning "Unknown package manager"
	return
	fi
	debug "The package manager is: $pkg_mgr"
	echo "The package manager is: $pkg_mgr"

	# for Linux .deb based systems
	case $pkg_mgr in
	deb)
	dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W
	for pkg in $*; do
	if dpkg-query -W $pkg 2>/dev/null ; then
	debug "Verifying installation of: $pkg"
	echo "Verifying installation of: $pkg"
	debsums -s $pkg 2>/dev/null
	fi
	done
	;;
	rpm)
	rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n'
	for pkg in $*; do
	if rpm -q $pkg >/dev/null 2>&1 ; then
	debug "Verifying installation of: $pkg"
	echo "Verifying installation of: $pkg"
	rpm --verify $pkg
	fi
	done
	;;
	pkg_info)
	pkg_info
	;;
	pkginfo)
	pkginfo \| awk '{print $3}' # format?
	;;
	esac
	}

	getbacktraces() {
	debug "Looking for backtraces: $*"
	flist=$(
	for f in `find_files "$CRM_CORE_DIRS" $1 $2`; do
	bf=`basename $f`
	test `expr match $bf core` -gt 0 &&
	echo $f
	done)
	if [ "$flist" ]; then
	log "Found core files: `echo $flist \| tr '\n' ' '`"
	getbt "$flist"
	fi
	}

	getpeinputs() {
	flist=$(
	find_files $PE_STATE_DIR $1 $2 \| sed "s,`dirname $PE_STATE_DIR`/,,g"
	)
	if [ "$flist" ]; then
	(cd `dirname $PE_STATE_DIR` && tar cf - $flist) \| (cd $3 && tar xf -)
	debug "found `echo $flist \| wc -w` pengine input files in $PE_STATE_DIR"
	fi
	}

	#
	# some basic system info and stats
	#
	sys_info() {
	cluster=$1; shift
	echo "Platform: `uname`"
	echo "Kernel release: `uname -r`"
	echo "Architecture: `uname -m`"
	if [ `uname` = Linux ]; then
	echo "Distribution: `distro`"
	fi

	cibadmin --version 2>&1
	cibadmin -! 2>&1
	case $1 in
	openais)
	: echo "openais version: how?"
	;;
	corosync)
	/usr/sbin/corosync -v 2>&1
	;;
	heartbeat)
	heartbeat version: `$CRM_DAEMON_DIR/heartbeat -V` 2>&1
	;;
	esac

	- # TODO: Get cluster-glue build version
	+ # Cluster glue version hash (if available)
	+ stonith -V
	+
	+ # Resource agents version hash
	echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`"

	pkg_ver $*
	}

	sys_stats() {
	set -x
	uname -n
	uptime
	ps axf
	ps auxw
	top -b -n 1
	ifconfig -a
	ip addr list
	netstat -i
	arp -an
	test -d /proc && {
	cat /proc/cpuinfo
	}
	lsscsi
	lspci
	mount
	df
	set +x
	}

	dlm_dump() {
	if which dlm_tool >/dev/null 2>&1 ; then
	echo NOTICE - Lockspace overview:
	dlm_tool ls
	dlm_tool ls \| grep name \|
	while read X N ; do
	echo NOTICE - Lockspace $N:
	dlm_tool lockdump $N
	done
	echo NOTICE - Lockspace history:
	dlm_tool dump
	fi
	}

	iscfvarset() {
	test "`getcfvar $1 $2`"
	}

	iscfvartrue() {
	getcfvar $1 $2 $3 \| egrep -qsi "^(true\|y\|yes\|on\|1)"
	}

	uselogd() {
	cf_file=$2
	case $1 in
	heartbeat)
	iscfvartrue $1 use_logd $cf_file && return 0 # if use_logd true
	iscfvarset $1 logfacility $cf_file \|\|
	iscfvarset $1 logfile $cf_file \|\|
	iscfvarset $1 debugfile $cf_file \|\|
	return 0 # or none of the log options set
	false
	;;
	*)
	iscfvartrue $1 use_logd $cf_file
	;;
	esac
	}

	get_logfile() {
	cf_type=$1
	cf_file="$2"
	cf_logd="$3"
	facility_var="logfacility"

	if [ -f "$cf_logd" ]; then
	if uselogd; then
	cf_file="$cf_logd"
	cf_type="logd"
	fi
	fi

	debug "Reading $cf_type log settings"
	case $cf_type in
	openais\|corosync)
	debug "Reading log settings from $cf_file"
	if iscfvartrue $cf_type to_syslog $cf_file; then
	facility_var=syslog_facility
	elif iscfvartrue $cf_type to_file $cf_file; then
	logfile=`getcfvar $cf_type logfile $cf_file syslog_facility`
	fi
	;;
	heartbeat\|logd)
	debug "Reading log settings from $cf_file"
	if
	iscfvartrue $cf_type debug $cf_file
	then
	logfile=`getcfvar $cf_type debugfile $cf_file`
	else
	logfile=`getcfvar $cf_type logfile $cf_file`
	fi
	;;
	*) debug "Unknown cluster type: $cf_type"
	echo "/var/log/messages"
	;;
	esac

	if [ "x$logfile" != "x" -a -f "$logfile" ]; then
	echo $logfile

	else
	facility=`getcfvar $cf_type $facility_var $cf_file`
	[ "" = "$facility" ] && facility="daemon"
	if [ "none" = "$facility" ]; then
	fatal "No logging is configured"
	fi
	msg="Mark:pcmk:`perl -e 'print time()'`"
	logger -p $facility.info $msg >/dev/null 2>&1
	findmsg 1 "$msg"
	fi
	}

	essential_files() {
	cat<<EOF
	d $HA_STATE_DIR 0755 root root
	d $PE_STATE_DIR 0750 hacluster haclient
	d $CRM_STATE_DIR 0750 hacluster haclient
	EOF
	case $1 in
	openais\|corosync)
	;;
	heartbeat)
	cat<<EOF
	d $HA_STATE_DIR/ccm 0750 hacluster haclient
	EOF
	;;
	esac
	}

	debug "Initializing $host subdir"
	mkdir -p $REPORT_HOME/$host
	cd $REPORT_HOME/$host

	case $CLUSTER in
	any) cluster=`get_cluster_type`;;
	*) cluster=$CLUSTER;;
	esac

	logd_cf=`findlogdcf`
	cluster_cf=`find_cluster_cf $cluster`
	if [ $SEARCH_LOGS = 1 ]; then
	logfile=`get_logfile $cluster "$cluster_cf" "$logd_cf"`
	fi
	debug "Config: $cluster $cluster_cf $logd_cf $logfile"

	sys_info $cluster $PACKAGES > $SYSINFO_F
	essential_files $cluster \| check_perms > $PERMISSIONS_F 2>&1
	-getconfig "$REPORT_HOME/$host" "$cluster_cf" "$logd_cf" "$HA_STATE_DIR/crm/$CIB_F" "$HA_STATE_DIR/hostcache"
	-
	-dlm_dump > $DLM_DUMP_F 2>&1
	-sys_stats > $SYSSTATS_F 2>&1
	+getconfig "$REPORT_HOME/$host" "$cluster_cf" "$logd_cf" "$HA_STATE_DIR/crm/$CIB_F" "$HA_STATE_DIR/hostcache" "/etc/drbd.conf" "/etc/drbd.d"

	getpeinputs $LOG_START $LOG_END $REPORT_HOME/$host
	getbacktraces $LOG_START $LOG_END > $REPORT_HOME/$host/$BT_F

	dc=`crm_mon -1 2>/dev/null \| awk '/Current DC/ {print $3}'`
	if [ "$host" = "$dc" ]; then
	echo "$host" > DC
	fi

	+dlm_dump > $DLM_DUMP_F 2>&1
	+sys_stats > $SYSSTATS_F 2>&1
	+
	debug "Sanitizing files"
	#
	# replace sensitive info with '****'
	#
	for f in `basename $cluster_cf` $CIB_F $CIB_TXT_F $CIB_F.live pengine/*; do
	if [ -f "$f" ]; then
	sanitize $f
	fi
	done

	# Grab logs
	#debug "Gathering logs: $logfile $EXTRA_LOGS"
	trap '[ -z "$pattfile" ] \|\| rm -f "$pattfile"' 0
	pattfile=`mktemp` \|\| fatal "cannot create temporary files"
	for p in $LOG_PATTERNS; do
	echo "$p"
	done > $pattfile

	for l in $logfile $EXTRA_LOGS; do
	b=`basename $l`
	if [ ! -f "$l" ]; then
	# Not a file
	continue

	elif [ -f "$b" ]; then
	# We already have it
	continue
	fi
	dumplogset "$l" $LOG_START $LOG_END > "$b"
	echo "Log patterns $host:" > $ANALYSIS_F
	cat $b \| grep -f $pattfile >> $ANALYSIS_F
	done

	rm -f $pattfile
	trap "" 0

	# Purge files containing no information
	for f in `ls -1`; do
	if [ -d "$f" ]; then
	continue
	elif [ ! -s "$f" ]; then
	debug "Removing empty file: $f"
	rm -f $f
	fi
	done

	# Parse for events
	for l in $logfile $EXTRA_LOGS; do
	node_events `basename $logfile` > $EVENTS_F

	# Link the first logfile to a standard name if it doesn't yet exist
	if [ ! -e $HALOG_F ]; then
	ln -s `basename $l` $HALOG_F
	fi
	done

	if [ "$REPORT_MASTER" != "$host" ]; then
	debug "Streaming report back to $REPORT_MASTER"
	(cd $REPORT_HOME && tar cf - $host)
	if [ "$REMOVE" = "1" ]; then
	cd
	rm -rf $REPORT_HOME
	fi
	fi

	diff --git a/tools/report.common b/tools/report.common
	index b700a46df0..991c165110 100644
	--- a/tools/report.common
	+++ b/tools/report.common
	@@ -1,629 +1,633 @@
	# Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
	# Almost everything as part of hb_report
	# Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
	# Cleanups, refactoring, extensions
	#
	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public
	# License as published by the Free Software Foundation; either
	# version 2.1 of the License, or (at your option) any later version.
	#
	# This software is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# General Public License for more details.
	#
	# You should have received a copy of the GNU General Public
	# License along with this library; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	#

	host=`uname -n`
	shorthost=`echo $host \| sed s:\\\\..*::`
	if [ -z $verbose ]; then
	verbose=0
	fi

	# Target Files
	EVENTS_F=events.txt
	ANALYSIS_F=analysis.txt
	DESCRIPTION_F=description.txt
	HALOG_F=cluster-log.txt
	BT_F=backtraces.txt
	SYSINFO_F=sysinfo.txt
	SYSSTATS_F=sysstats.txt
	DLM_DUMP_F=dlm_dump.txt
	CRM_MON_F=crm_mon.txt
	MEMBERSHIP_F=members.txt
	HB_UUID_F=hb_uuid.txt
	HOSTCACHE=hostcache
	CRM_VERIFY_F=crm_verify.txt
	PERMISSIONS_F=permissions.txt
	CIB_F=cib.xml
	CIB_TXT_F=cib.txt

	EVENT_PATTERNS="
	state do_state_transition
	membership pcmk_peer_update.*(lost\|memb):
	-quorum crmd.ais.disp.quorum.(lost\|ac?quir)
	+quorum crmd.crm_update_quorum\|crmd.ais.disp.*quorum.(lost\|ac?quir)
	pause Process.pause.detected
	resources lrmd.*rsc:(start\|stop)
	-stonith stonithd.*(requests\|(Succeeded\|Failed).to.STONITH\|result=)
	-start_stop Executive.Service.RELEASE\|crm_shutdown:.Requesting.shutdown\|pcmk_shutdown:.Shutdown.complete
	+stonith te_fence_node\|stonith-ng.log_oper.report\|stonithd.*(requests\|(Succeeded\|Failed).to.STONITH\|result=)
	+start_stop Starting.heartbeat\|Corosync.Cluster.Engine\|corosync.*Initializing.transport\|Executive.Service.RELEASE\|crm_shutdown:.Requesting.shutdown\|pcmk_shutdown:.Shutdown.complete
	"

	PACKAGES="pacemaker pacemaker-libs libpacemaker3
	pacemaker-pygui pacemaker-pymgmt pymgmt-client
	openais libopenais2 libopenais3 corosync libcorosync4
	resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord
	heartbeat heartbeat-common heartbeat-resources libheartbeat2
	ocfs2-tools ocfs2-tools-o2cb ocfs2console
	ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace
	drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace
	drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen
	lvm2 lvm2-clvm cmirrord
	libdlm libdlm2 libdlm3
	hawk ruby lighttpd
	kernel-default kernel-pae kernel-xen
	glibc
	"

	#
	# keep the user posted
	#

	log() {
	printf "%-10s $*\n" "$shorthost:" 1>&2
	}

	debug() {
	if [ $verbose -gt 0 ]; then
	log "Debug: $*"
	fi
	}

	info() {
	log "$*"
	}

	warning() {
	log "WARN: $*"
	}

	fatal() {
	log "ERROR: $*"
	exit 1
	}

	detect_host() {
	local_state_dir=/var
	if [ -d $local_state_dir/run/crm ]; then
	CRM_STATE_DIR=$local_state_dir/run/crm
	else
	for d in `find / -type d -name run`; do
	if [ -d $d/crm ]; then
	CRM_STATE_DIR=$d/crm
	local_state_dir=`dirname $d`
	fi
	done
	fi
	if [ ! -d $CRM_STATE_DIR ]; then
	fatal "Non-standard Pacemaker installation: State directory not found"
	fi

	debug "Machine state directory: $local_state_dir"
	debug "State files located in: $CRM_STATE_DIR"

	if [ -d $local_state_dir/lib/pengine ]; then
	PE_STATE_DIR=$local_state_dir/lib/pengine
	else
	for d in `find / -type d -name pengine`; do
	PE_STATE_DIR=$d
	break
	done
	fi
	if [ -z $PE_STATE_DIR ]; then
	fatal "Non-standard Pacemaker installation: Policy Engine directory not found"
	fi
	debug "PE files located in: $PE_STATE_DIR"

	HA_STATE_DIR=$local_state_dir/lib/heartbeat
	if [ ! -d $HA_STATE_DIR ]; then
	# TODO: Go looking
	fatal "Non-standard Heartbeat installation: Heartbeat state directory not found"
	fi
	debug "Heartbeat state files located in: $HA_STATE_DIR"


	CRM_CORE_DIRS=""
	for d in $HA_STATE_DIR/cores $local_state_dir/lib/corosync $local_state_dir/lib/openais; do
	if [ -d $d ]; then
	CRM_CORE_DIRS="$CRM_CORE_DIRS $d"
	fi
	done
	debug "Core files located under: $CRM_CORE_DIRS"

	for d in /usr/lib/heartbeat /usr/lib64/heartbeat; do
	if [ -f $d/crmd ]; then
	CRM_DAEMON_DIR=$d
	break
	fi
	done
	if [ ! -d $CRM_DAEMON_DIR ]; then
	for d in `find / -type d -name heartbeat`; do
	if [ -f $d/crmd ]; then
	CRM_DAEMON_DIR=$d
	break
	fi
	done
	fi

	if [ ! -d $CRM_DAEMON_DIR ]; then
	for f in `find / -type f -name crmd`; do
	if [ -f $f ]; then
	CRM_DAEMON_DIR=`basename $f`
	fi
	done
	fi
	if [ ! -d $CRM_DAEMON_DIR ]; then
	fatal "Non-standard Pacemaker installation: daemons not found"
	fi
	debug "Pacemaker daemons located under: $CRM_DAEMON_DIR"
	}

	time2str() {
	perl -e "use POSIX; print strftime('%x %X',localtime($1));"
	}

	get_time() {
	perl -e "\$time='$*';" -e '
	eval "use Date::Parse";
	if (!$@) {
	print str2time($time);
	} else {
	eval "use Date::Manip";
	if (!$@) {
	print UnixDate(ParseDateString($time), "%s");
	}
	}
	'
	}

	get_time_() {
	warning "No time format specified for: $*"
	}

	get_time_syslog() {
	awk '{print $1,$2,$3}'
	}

	get_time_legacy() {
	awk '{print $2}' \| sed 's/_/ /'
	}

	get_time_format() {
	t=0 l="" func=""
	trycnt=10
	while [ $trycnt -gt 0 ] && read l; do
	t=$(get_time `echo $l \| get_time_syslog`)
	if [ "$t" ]; then
	func="syslog"
	break
	fi
	t=$(get_time `echo $l \| get_time_legacy`)
	if [ "$t" ]; then
	func="legacy"
	break
	fi
	trycnt=$(($trycnt-1))
	done
	#debug "Logfile uses the $func time format"
	echo $func
	}

	linetime() {
	format=`get_time_format < $1`
	l=`tail -n +$2 $1 \| grep ":[0-5][0-9]:" \| head -1 \| get_time_$format`
	get_time "$l"
	}

	# Find pattern in a logfile somewhere
	# Return $max ordered results by age (newest first)
	findmsg() {
	max=$1
	pattern=$2
	logfiles=""
	syslogdirs="/var/log /var/logs /var/syslog /var/adm /var/log/ha /var/log/cluster"

	for d in $syslogdirs; do
	if [ -d $d ]; then
	logfiles=`grep -l -e "$pattern" $d/*` && break
	fi
	done 2>/dev/null

	if [ "x$logfiles" != "x" ]; then
	list=`ls -t $logfiles \| head -n $max \| tr '\n' ' '`
	echo $list
	debug "Pattern \'$pattern\' found in: [ $list ]"
	else
	debug "Pattern \'$pattern\' not found anywhere"
	fi
	}

	node_events() {
	Epatt=`echo "$EVENT_PATTERNS" \|
	while read title p; do [ -n "$p" ] && echo -n "\|$p"; done \|
	sed 's/.//'
	`
	grep -E "$Epatt" $1
	}

	pickfirst() {
	for x; do
	which $x >/dev/null 2>&1 && {
	echo $x
	return 0
	}
	done
	return 1
	}

	shrink() {
	src=$*
	target=$1.tar
	tar_options=cf

	variant=`pickfirst bzip2 gzip false`
	case $variant in
	bz*)
	tar_options="jcf"
	target="$target.bz2"
	;;
	gz*)
	tar_options="zcf"
	target="$target.gz"
	;;
	*)
	warning "Could not find a compression program, the resulting tarball may be huge"
	;;
	esac

	tar $tar_options $target $src >/dev/null 2>&1
	echo $target
	}

	findln_by_time() {
	local logf=$1
	local tm=$2
	local first=1
	local last=`wc -l < $logf`
	while [ $first -le $last ]; do
	mid=$((($last+$first)/2))
	trycnt=10
	while [ $trycnt -gt 0 ]; do
	tmid=`linetime $logf $mid`
	[ "$tmid" ] && break
	warning "cannot extract time: $logf:$mid; will try the next one"
	trycnt=$(($trycnt-1))
	# shift the whole first-last segment
	first=$(($first-1))
	last=$(($last-1))
	mid=$((($last+$first)/2))
	done
	if [ -z "$tmid" ]; then
	warning "giving up on log..."
	return
	fi
	if [ $tmid -gt $tm ]; then
	last=$(($mid-1))
	elif [ $tmid -lt $tm ]; then
	first=$(($mid+1))
	else
	break
	fi
	done
	echo $mid
	}

	dumplog() {
	local logf=$1
	local from_line=$2
	local to_line=$3
	[ "$from_line" ] \|\|
	return
	tail -n +$from_line $logf \|
	if [ "$to_line" ]; then
	head -$(($to_line-$from_line+1))
	else
	cat
	fi
	}

	#
	# find log/set of logs which are interesting for us
	#
	#
	# find log slices
	#

	find_decompressor() {
	if echo $1 \| grep -qs 'bz2$'; then
	echo "bzip2 -dc"
	elif echo $1 \| grep -qs 'gz$'; then
	echo "gzip -dc"
	else
	echo "cat"
	fi
	}
	#
	# check if the log contains a piece of our segment
	#
	is_our_log() {
	local logf=$1
	local from_time=$2
	local to_time=$3

	local cat=`find_decompressor $logf`
	local format=`$cat $logf \| get_time_format`
	local first_time=$(get_time "`$cat $logf \| head -1 \| get_time_$format`")
	local last_time=$(get_time "`$cat $logf \| tail -1 \| get_time_$format`")

	if [ x = "x$first_time" -o x = "x$last_time" ]; then
	return 0 # skip (empty log?)
	fi
	if [ $from_time -gt $last_time ]; then
	# we shouldn't get here anyway if the logs are in order
	return 2 # we're past good logs; exit
	fi
	if [ $from_time -ge $first_time ]; then
	return 3 # this is the last good log
	fi
	# have to go further back
	if [ x = "x$to_time" -o $to_time -ge $first_time ]; then
	return 1 # include this log
	else
	return 0 # don't include this log
	fi
	}
	#
	# go through archived logs (timewise backwards) and see if there
	# are lines belonging to us
	# (we rely on untouched log files, i.e. that modify time
	# hasn't been changed)
	#
	arch_logs() {
	local logf=$1
	local from_time=$2
	local to_time=$3

	# look for files such as: ha-log-20090308 or
	# ha-log-20090308.gz (.bz2) or ha-log.0, etc
	ls -t $logf $logf*[0-9z] 2>/dev/null \|
	while read next_log; do
	is_our_log $next_log $from_time $to_time
	case $? in
	0) ;; # noop, continue
	1) echo $next_log # include log and continue
	debug "Found log $next_log"
	;;
	2) break;; # don't go through older logs!
	3) echo $next_log # include log and continue
	debug "Found log $next_log"
	break
	;; # don't go through older logs!
	esac
	done
	}

	#
	# print part of the log
	#
	drop_tmp_file() {
	[ -z "$tmp" ] \|\| rm -f "$tmp"
	}

	print_logseg() {
	local logf=$1
	local from_time=$2
	local to_time=$3

	# uncompress to a temp file (if necessary)
	local cat=`find_decompressor $logf`
	if [ "$cat" != "cat" ]; then
	tmp=`mktemp`
	$cat $logf > $tmp
	trap drop_tmp_file 0
	sourcef=$tmp
	else
	sourcef=$logf
	tmp=""
	fi

	if [ "$from_time" = 0 ]; then
	FROM_LINE=1
	else
	FROM_LINE=`findln_by_time $sourcef $from_time`
	fi
	if [ -z "$FROM_LINE" ]; then
	warning "couldn't find line for time $from_time; corrupt log file?"
	return
	fi

	TO_LINE=""
	if [ "$to_time" != 0 ]; then
	TO_LINE=`findln_by_time $sourcef $to_time`
	if [ -z "$TO_LINE" ]; then
	warning "couldn't find line for time $to_time; corrupt log file?"
	return
	fi
	fi
	- dumplog $sourcef $FROM_LINE $TO_LINE
	- log "Including segment [$FROM_LINE-$TO_LINE] from $logf"
	+ if [ $FROM_LINE -lt $TO_LINE ]; then
	+ dumplog $sourcef $FROM_LINE $TO_LINE
	+ log "Including segment [$FROM_LINE-$TO_LINE] from $logf"
	+ else
	+ log "Segment from $logf finished before it started, line: $FROM_LINE to $TO_LINE"
	+ fi
	drop_tmp_file
	trap "" 0
	}

	#
	# find log/set of logs which are interesting for us
	#
	dumplogset() {
	local logf=$1
	local from_time=$2
	local to_time=$3

	local logf_set=`arch_logs $logf $from_time $to_time`
	if [ x = "x$logf_set" ]; then
	return
	fi

	local num_logs=`echo "$logf_set" \| wc -l`
	local oldest=`echo $logf_set \| awk '{print $NF}'`
	local newest=`echo $logf_set \| awk '{print $1}'`
	local mid_logfiles=`echo $logf_set \| awk '{for(i=NF-1; i>1; i--) print $i}'`

	# the first logfile: from $from_time to $to_time (or end)
	# logfiles in the middle: all
	# the last logfile: from beginning to $to_time (or end)
	case $num_logs in
	1) print_logseg $newest $from_time $to_time;;
	*)
	print_logseg $oldest $from_time 0
	for f in $mid_logfiles; do
	`find_decompressor $f` $f
	debug "including complete $f logfile"
	done
	print_logseg $newest 0 $to_time
	;;
	esac
	}

	# cut out a stanza
	getstanza() {
	awk -v name="$1" '
	!in_stanza && NF==2 && /^[a-z][a-z][[:space:]]{/ { # stanza start
	if ($1 == name)
	in_stanza = 1
	}
	in_stanza { print }
	in_stanza && NF==1 && $1 == "}" { exit }
	'
	}
	# supply stanza in $1 and variable name in $2
	# (stanza is optional)
	getcfvar() {
	cf_type=$1; shift;
	cf_var=$1; shift;
	cf_file=$*

	[ -f "$cf_file" ] \|\| return
	case $cf_type in
	corosync\|openais)
	sed 's/#.*//' < $cf_file \|
	if [ $# -eq 2 ]; then
	getstanza "$cf_var"
	shift 1
	else
	cat
	fi \|
	awk -v varname="$cf_var" '
	NF==2 && match($1,varname":$")==1 { print $2; exit; }
	'
	;;
	heartbeat)
	sed 's/#.*//' < $cf_file \|
	grep -w "^$cf_var" \|
	sed 's/^[^[:space:]][[:space:]]//'

	;;
	logd)
	sed 's/#.*//' < $cf_file \|
	grep -w "^$cf_var" \|
	sed 's/^[^[:space:]][[:space:]]//'

	;;
	esac
	}

	#
	# figure out the cluster type, depending on the process list
	# and existence of configuration files
	#
	get_cluster_type() {
	if
	ps -ef \| egrep -qs '[c]orosync'
	then
	stack="corosync"
	elif
	ps -ef \| egrep -qs '[a]isexec'
	then
	stack="openais"
	elif
	- ps -ef \| egrep -qs '[h]eartbeat'
	+ ps -ef \| grep -v -e grep -e "eartbeat/[clasp]" \| egrep -qs '[h]eartbeat'
	then
	stack="heartbeat"

	# Now we're guessing...

	# TODO: Technically these could be anywhere :-/
	elif [ -f /etc/corosync/corosync.conf ]; then
	stack="corosync"

	elif [ -f /etc/ais/openais.conf ]; then
	stack="openais"

	else
	stack="heartbeat"
	fi

	- debug "Detected the $stack cluster stack"
	+ debug "Detected the '$stack' cluster stack"
	echo $stack
	}

	find_cluster_cf() {
	case $1 in
	corosync)
	best_size=0
	best_file=""

	# TODO: Technically these could be anywhere :-/
	for cf in /etc/ais/openais.conf /etc/corosync/corosync.conf; do
	if [ -f $cf ]; then
	size=`wc -l $cf \| awk '{print $1}'`
	if [ $size -gt $best_size ]; then
	best_size=$size
	best_file=$cf
	fi
	fi
	done
	echo "$best_file"
	;;
	openais)
	# TODO: Technically it could be anywhere :-/
	cf="/etc/ais/openais.conf"
	if [ -f $cf ]; then
	echo "$cf"
	fi
	;;
	heartbeat)
	cf="/etc/ha.d/ha.cf"
	if [ -f $cf ]; then
	echo "$cf"
	fi
	;;
	*)
	warning "Unknown cluster type: $1"
	;;
	esac
	}

	#
	# check for the major prereq for a) parameter parsing and b)
	# parsing logs
	#
	t=`get_time "12:00"`
	if [ "$t" = "" ]; then
	fatal "please install the perl Date::Parse module"
	fi

	detect_host

File Metadata

Mime Type: text/x-diff
Expires: Wed, Jun 25, 5:41 AM (1 d, 1 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1952343
Default Alt Text: (417 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions