No OneTemporary
Actions

Size

70 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/heartbeat/galera b/heartbeat/galera
	index 270bdaf1b..4f341ceef 100755
	--- a/heartbeat/galera
	+++ b/heartbeat/galera
	@@ -1,975 +1,977 @@
	#!/bin/sh
	#
	# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
	# All Rights Reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of version 2 of the GNU General Public License as
	# published by the Free Software Foundation.
	#
	# This program is distributed in the hope that it would be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	# Further, this software is distributed without any warranty that it is
	# free of the rightful claim of any third person regarding infringement
	# or the like. Any license provided herein, whether implied or
	# otherwise, applies only to this software file. Patent licenses, if
	# any, provided herein do not apply to combinations of this program with
	# other software, or any other product whatsoever.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write the Free Software Foundation,
	# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	#

	##
	# README.
	#
	# This agent only supports being configured as a multistate Master
	# resource.
	#
	# Slave vs Master role:
	#
	# During the 'Slave' role, galera instances are in read-only mode and
	# will not attempt to connect to the cluster. This role exists only as
	# a means to determine which galera instance is the most up-to-date. The
	# most up-to-date node will be used to bootstrap a galera cluster that
	# has no current members.
	#
	# The galera instances will only begin to be promoted to the Master role
	# once all the nodes in the 'wsrep_cluster_address' connection address
	# have entered read-only mode. At that point the node containing the
	# database that is most current will be promoted to Master. Once the first
	# Master instance bootstraps the galera cluster, the other nodes will be
	# promoted to Master as well.
	#
	# Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3
	#
	# pcs resource create db galera enable_creation=true \
	# wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master
	#
	# By setting the 'enable_creation' option, the database will be automatically
	# generated at startup. The meta attribute 'master-max=3' means that all 3
	# nodes listed in the wsrep_cluster_address list will be allowed to connect
	# to the galera cluster and perform replication.
	#
	# NOTE: If you have more nodes in the pacemaker cluster then you wish
	# to have in the galera cluster, make sure to use location contraints to prevent
	# pacemaker from attempting to place a galera instance on a node that is
	# not in the 'wsrep_cluster_address" list.
	#
	##

	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
	. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
	-. ${OCF_FUNCTIONS_DIR}/mysql-common.sh

	-NODENAME=$(ocf_attribute_target)
	+if [ "$__OCF_ACTION" != "meta-data" ]; then
	+ . ${OCF_FUNCTIONS_DIR}/mysql-common.sh
	+ NODENAME=$(ocf_attribute_target)
	+fi

	# It is common for some galera instances to store
	# check user that can be used to query status
	# in this file
	if [ -f "/etc/sysconfig/clustercheck" ]; then
	. /etc/sysconfig/clustercheck
	elif [ -f "/etc/default/clustercheck" ]; then
	. /etc/default/clustercheck
	fi

	#######################################################################

	usage() {
	cat <<UEND
	usage: $0 (start\|stop\|validate-all\|meta-data\|monitor\|promote\|demote)

	$0 manages a galera Database as an HA resource.

	The 'start' operation starts the database.
	The 'stop' operation stops the database.
	The 'status' operation reports whether the database is running
	The 'monitor' operation reports whether the database seems to be working
	The 'promote' operation makes this mysql server run as master
	The 'demote' operation makes this mysql server run as slave
	The 'validate-all' operation reports whether the parameters are valid

	UEND
	}

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="galera">
	<version>1.0</version>

	<longdesc lang="en">
	Resource script for managing galara database.
	</longdesc>
	<shortdesc lang="en">Manages a galara instance</shortdesc>
	<parameters>

	<parameter name="binary" unique="0" required="0">
	<longdesc lang="en">
	Location of the MySQL server binary
	</longdesc>
	<shortdesc lang="en">MySQL server binary</shortdesc>
	<content type="string" default="${OCF_RESKEY_binary_default}" />
	</parameter>

	<parameter name="client_binary" unique="0" required="0">
	<longdesc lang="en">
	Location of the MySQL client binary
	</longdesc>
	<shortdesc lang="en">MySQL client binary</shortdesc>
	<content type="string" default="${OCF_RESKEY_client_binary_default}" />
	</parameter>

	<parameter name="config" unique="0" required="0">
	<longdesc lang="en">
	Configuration file
	</longdesc>
	<shortdesc lang="en">MySQL config</shortdesc>
	<content type="string" default="${OCF_RESKEY_config_default}" />
	</parameter>

	<parameter name="datadir" unique="0" required="0">
	<longdesc lang="en">
	Directory containing databases
	</longdesc>
	<shortdesc lang="en">MySQL datadir</shortdesc>
	<content type="string" default="${OCF_RESKEY_datadir_default}" />
	</parameter>

	<parameter name="user" unique="0" required="0">
	<longdesc lang="en">
	User running MySQL daemon
	</longdesc>
	<shortdesc lang="en">MySQL user</shortdesc>
	<content type="string" default="${OCF_RESKEY_user_default}" />
	</parameter>

	<parameter name="group" unique="0" required="0">
	<longdesc lang="en">
	Group running MySQL daemon (for logfile and directory permissions)
	</longdesc>
	<shortdesc lang="en">MySQL group</shortdesc>
	<content type="string" default="${OCF_RESKEY_group_default}"/>
	</parameter>

	<parameter name="log" unique="0" required="0">
	<longdesc lang="en">
	The logfile to be used for mysqld.
	</longdesc>
	<shortdesc lang="en">MySQL log file</shortdesc>
	<content type="string" default="${OCF_RESKEY_log_default}"/>
	</parameter>

	<parameter name="pid" unique="0" required="0">
	<longdesc lang="en">
	The pidfile to be used for mysqld.
	</longdesc>
	<shortdesc lang="en">MySQL pid file</shortdesc>
	<content type="string" default="${OCF_RESKEY_pid_default}"/>
	</parameter>

	<parameter name="socket" unique="0" required="0">
	<longdesc lang="en">
	The socket to be used for mysqld.
	</longdesc>
	<shortdesc lang="en">MySQL socket</shortdesc>
	<content type="string" default="${OCF_RESKEY_socket_default}"/>
	</parameter>

	<parameter name="enable_creation" unique="0" required="0">
	<longdesc lang="en">
	If the MySQL database does not exist, it will be created
	</longdesc>
	<shortdesc lang="en">Create the database if it does not exist</shortdesc>
	<content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
	</parameter>

	<parameter name="additional_parameters" unique="0" required="0">
	<longdesc lang="en">
	Additional parameters which are passed to the mysqld on startup.
	(e.g. --skip-external-locking or --skip-grant-tables)
	</longdesc>
	<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
	<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
	</parameter>


	<parameter name="wsrep_cluster_address" unique="0" required="1">
	<longdesc lang="en">
	The galera cluster address. This takes the form of:
	gcomm://node,node,node

	Only nodes present in this node list will be allowed to start a galera instance.
	The galera node names listed in this address are expected to match valid
	pacemaker node names. If both names need to differ, you must provide a
	mapping in option cluster_host_map.
	</longdesc>
	<shortdesc lang="en">Galera cluster address</shortdesc>
	<content type="string" default=""/>
	</parameter>

	<parameter name="cluster_host_map" unique="0" required="0">
	<longdesc lang="en">
	A mapping of pacemaker node names to galera node names.

	To be used when both pacemaker and galera names need to differ,
	(e.g. when galera names map to IP from a specific network interface)
	This takes the form of:
	pcmk1:node.1.galera;pcmk2:node.2.galera;pcmk3:node.3.galera

	where the galera resource started on node pcmk1 would be named
	node.1.galera in the wsrep_cluster_address
	</longdesc>
	<shortdesc lang="en">Pacemaker to Galera name mapping</shortdesc>
	<content type="string" default=""/>
	</parameter>

	<parameter name="check_user" unique="0" required="0">
	<longdesc lang="en">
	Cluster check user.
	</longdesc>
	<shortdesc lang="en">MySQL test user</shortdesc>
	<content type="string" default="root" />
	</parameter>

	<parameter name="check_passwd" unique="0" required="0">
	<longdesc lang="en">
	Cluster check user password
	</longdesc>
	<shortdesc lang="en">check password</shortdesc>
	<content type="string" default="" />
	</parameter>

	</parameters>

	<actions>
	<action name="start" timeout="120s" />
	<action name="stop" timeout="120s" />
	<action name="status" timeout="60s" />
	<action name="monitor" depth="0" timeout="30s" interval="20s" />
	<action name="monitor" role="Master" depth="0" timeout="30s" interval="10s" />
	<action name="monitor" role="Slave" depth="0" timeout="30s" interval="30s" />
	<action name="promote" timeout="300s" />
	<action name="demote" timeout="120s" />
	<action name="validate-all" timeout="5s" />
	<action name="meta-data" timeout="5s" />
	</actions>
	</resource-agent>
	END
	}

	get_option_variable()
	{
	local key=$1

	$MYSQL $MYSQL_OPTIONS_CHECK -e "SHOW VARIABLES like '$key';" \| tail -1
	}

	get_status_variable()
	{
	local key=$1

	$MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" \| tail -1
	}

	set_bootstrap_node()
	{
	local node=$(ocf_attribute_target $1)

	${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true"
	}

	clear_bootstrap_node()
	{
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D
	}

	is_bootstrap()
	{
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" --quiet 2>/dev/null

	}

	set_no_grastate()
	{
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -v "true"
	}

	clear_no_grastate()
	{
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -D
	}

	is_no_grastate()
	{
	local node=$(ocf_attribute_target $1)
	${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" --quiet 2>/dev/null
	}

	clear_last_commit()
	{
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D
	}

	set_last_commit()
	{
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1
	}

	get_last_commit()
	{
	local node=$(ocf_attribute_target $1)

	if [ -z "$node" ]; then
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
	else
	${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
	fi
	}

	clear_safe_to_bootstrap()
	{
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -D
	}

	set_safe_to_bootstrap()
	{
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -v $1
	}

	get_safe_to_bootstrap()
	{
	local node=$(ocf_attribute_target $1)

	if [ -z "$node" ]; then
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null
	else
	${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null
	fi
	}

	wait_for_sync()
	{
	local state=$(get_status_variable "wsrep_local_state")

	ocf_log info "Waiting for database to sync with the cluster. "
	while [ "$state" != "4" ]; do
	sleep 1
	state=$(get_status_variable "wsrep_local_state")
	done
	ocf_log info "Database synced."
	}

	is_primary()
	{
	cluster_status=$(get_status_variable "wsrep_cluster_status")
	if [ "$cluster_status" = "Primary" ]; then
	return 0
	fi

	if [ -z "$cluster_status" ]; then
	ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
	else
	ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}"
	fi
	return 1
	}

	is_readonly()
	{
	local res=$(get_option_variable "read_only")

	if ! ocf_is_true "$res"; then
	return 1
	fi

	cluster_status=$(get_status_variable "wsrep_cluster_status")
	if ! [ "$cluster_status" = "Disconnected" ]; then
	return 1
	fi

	return 0
	}

	master_exists()
	{
	if [ "$__OCF_ACTION" = "demote" ]; then
	# We don't want to detect master instances during demote.
	# 1. we could be detecting ourselves as being master, which is no longer the case.
	# 2. we could be detecting other master instances that are in the process of shutting down.
	# by not detecting other master instances in "demote" we are deferring this check
	# to the next recurring monitor operation which will be much more accurate
	return 1
	fi
	# determine if a master instance is already up and is healthy
	crm_mon --as-xml \| grep "resource.id=\"${INSTANCE_ATTR_NAME}\".role=\"Master\".active=\"true\".orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
	return $?
	}

	clear_master_score()
	{
	local node=$(ocf_attribute_target $1)
	if [ -z "$node" ]; then
	$CRM_MASTER -D
	else
	$CRM_MASTER -D -N $node
	fi
	}

	set_master_score()
	{
	local node=$(ocf_attribute_target $1)

	if [ -z "$node" ]; then
	$CRM_MASTER -v 100
	else
	$CRM_MASTER -N $node -v 100
	fi
	}

	promote_everyone()
	{

	for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" \| sed 's/gcomm:\/\///g' \| tr -d ' ' \| tr -s ',' ' '); do
	local pcmk_node=$(galera_to_pcmk_name $node)
	if [ -z "$pcmk_node" ]; then
	ocf_log err "Could not determine pacemaker node from galera name <${node}>."
	return
	else
	node=$pcmk_node
	fi

	set_master_score $node
	done
	}

	greater_than_equal_long()
	{
	# there are values we need to compare in this script
	# that are too large for shell -gt to process
	echo \| awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' \| grep -q "true"
	}

	galera_to_pcmk_name()
	{
	local galera=$1
	if [ -z "$OCF_RESKEY_cluster_host_map" ]; then
	echo $galera
	else
	echo "$OCF_RESKEY_cluster_host_map" \| tr ';' '\n' \| tr -d ' ' \| sed 's/:/ /' \| awk -F' ' '$2=="'"$galera"'" {print $1;exit}'
	fi
	}

	pcmk_to_galera_name()
	{
	local pcmk=$1
	if [ -z "$OCF_RESKEY_cluster_host_map" ]; then
	echo $pcmk
	else
	echo "$OCF_RESKEY_cluster_host_map" \| tr ';' '\n' \| tr -d ' ' \| sed 's/:/ /' \| awk -F' ' '$1=="'"$pcmk"'" {print $2;exit}'
	fi
	}


	detect_first_master()
	{
	local best_commit=0
	local last_commit=0
	local missing_nodes=0
	local nodes=""
	local nodes_recovered=""
	local all_nodes
	local best_node_gcomm
	local best_node
	local safe_to_bootstrap

	all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" \| sed 's/gcomm:\/\///g' \| tr -d ' ' \| tr -s ',' ' ')
	best_node_gcomm=$(echo "$all_nodes" \| sed 's/^.* $.*$$/\1/')
	best_node=$(galera_to_pcmk_name $best_node_gcomm)
	if [ -z "$best_node" ]; then
	ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>."
	return
	fi

	# avoid selecting a recovered node as bootstrap if possible
	for node in $all_nodes; do
	local pcmk_node=$(galera_to_pcmk_name $node)
	if [ -z "$pcmk_node" ]; then
	ocf_log err "Could not determine pacemaker node from galera name <${node}>."
	return
	else
	node=$pcmk_node
	fi

	if is_no_grastate $node; then
	nodes_recovered="$nodes_recovered $node"
	else
	nodes="$nodes $node"
	fi
	done

	for node in $nodes_recovered $nodes; do
	safe_to_bootstrap=$(get_safe_to_bootstrap $node)

	if [ "$safe_to_bootstrap" = "1" ]; then
	# Galera marked the node as safe to boostrap during shutdown. Let's just
	# pick it as our bootstrap node.
	ocf_log info "Node <${node}> is marked as safe to bootstrap."
	best_node=$node

	# We don't need to wait for the other nodes to report state in this case
	missing_nodes=0
	break
	fi

	last_commit=$(get_last_commit $node)

	if [ -z "$last_commit" ]; then
	ocf_log info "Waiting on node <${node}> to report database status before Master instances can start."
	missing_nodes=1
	continue
	fi

	# this means -1, or that no commit has occured yet.
	if [ "$last_commit" = "18446744073709551615" ]; then
	last_commit="0"
	fi

	greater_than_equal_long "$last_commit" "$best_commit"
	if [ $? -eq 0 ]; then
	best_node=$(ocf_attribute_target $node)
	best_commit=$last_commit
	fi

	done

	if [ $missing_nodes -eq 1 ]; then
	return
	fi

	ocf_log info "Promoting $best_node to be our bootstrap node"
	set_master_score $best_node
	set_bootstrap_node $best_node
	}

	detect_safe_to_bootstrap()
	{
	local safe_to_bootstrap=""

	if [ -f ${OCF_RESKEY_datadir}/grastate.dat ]; then
	ocf_log info "attempting to read safe_to_bootstrap flag from ${OCF_RESKEY_datadir}/grastate.dat"
	safe_to_bootstrap=$(sed -n 's/^safe_to_bootstrap:\s$.$$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat)
	fi

	if [ "$safe_to_bootstrap" = "1" ] \|\| [ "$safe_to_bootstrap" = "0" ]; then
	set_safe_to_bootstrap $safe_to_bootstrap
	else
	clear_safe_to_bootstrap
	fi
	}

	detect_last_commit()
	{
	local last_commit
	local recover_args="--defaults-file=$OCF_RESKEY_config \
	--pid-file=$OCF_RESKEY_pid \
	--socket=$OCF_RESKEY_socket \
	--datadir=$OCF_RESKEY_datadir \
	--user=$OCF_RESKEY_user"
	local recovery_file_regex='s/.WSREP\:.position\srecovery.--log_error='\''$[^'\'']$'\''./\1/p'
	local recovered_position_regex='s/.WSREP\:\s[R\|r]ecovered\sposition.\:$.$\s$/\1/p'

	# codership/galera#354
	# Some ungraceful shutdowns can leave an empty gvwstate.dat on
	# disk. This will prevent galera to join the cluster if it is
	# configured to attempt PC recovery. Removing that file makes the
	# node fall back to the normal, unoptimized joining process.
	if [ -f ${OCF_RESKEY_datadir}/gvwstate.dat ] && \
	[ ! -s ${OCF_RESKEY_datadir}/gvwstate.dat ]; then
	ocf_log warn "empty ${OCF_RESKEY_datadir}/gvwstate.dat detected, removing it to prevent PC recovery failure at next restart"
	rm -f ${OCF_RESKEY_datadir}/gvwstate.dat
	fi

	ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
	last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat \| sed -n 's/^seqno.\s$.$\s*$/\1/p')"
	if [ -z "$last_commit" ] \|\| [ "$last_commit" = "-1" ]; then
	local tmp=$(mktemp)
	chown $OCF_RESKEY_user:$OCF_RESKEY_group $tmp

	# if we pass here because grastate.dat doesn't exist,
	# try not to bootstrap from this node if possible
	if [ ! -f ${OCF_RESKEY_datadir}/grastate.dat ]; then
	set_no_grastate
	fi

	ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"

	${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null

	last_commit="$(cat $tmp \| sed -n $recovered_position_regex \| tail -1)"
	if [ -z "$last_commit" ]; then
	# Galera uses InnoDB's 2pc transactions internally. If
	# server was stopped in the middle of a replication, the
	# recovery may find a "prepared" XA transaction in the
	# redo log, and mysql won't recover automatically

	local recovery_file="$(cat $tmp \| sed -n $recovery_file_regex)"
	if [ -e $recovery_file ]; then
	cat $recovery_file \| grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null
	if [ $? -eq 0 ]; then
	# we can only rollback the transaction, but that's OK
	# since the DB will get resynchronized anyway
	ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover"
	${OCF_RESKEY_binary} $recover_args --wsrep-recover \
	--tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null

	last_commit="$(cat $tmp \| sed -n $recovered_position_regex \| tail -1)"
	if [ ! -z "$last_commit" ]; then
	ocf_log warn "State recovered. force SST at next restart for full resynchronization"
	rm -f ${OCF_RESKEY_datadir}/grastate.dat
	# try not to bootstrap from this node if possible
	set_no_grastate
	fi
	fi
	fi
	fi
	rm -f $tmp
	fi

	if [ ! -z "$last_commit" ]; then
	ocf_log info "Last commit version found: $last_commit"
	set_last_commit $last_commit
	return $OCF_SUCCESS
	else
	ocf_exit_reason "Unable to detect last known write sequence number"
	clear_last_commit
	return $OCF_ERR_GENERIC
	fi
	}

	# For galera, promote is really start
	galera_promote()
	{
	local rc
	local extra_opts
	local bootstrap
	local safe_to_bootstrap
	master_exists
	if [ $? -eq 0 ]; then
	# join without bootstrapping
	extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}"
	else
	bootstrap=$(is_bootstrap)

	if ocf_is_true $bootstrap; then
	# The best node for bootstrapping wasn't cleanly shutdown. Allow
	# bootstrapping anyways
	if [ "$(get_safe_to_bootstrap)" = "0" ]; then
	sed -ie 's/^$safe_to_bootstrap:$ 0/\1 1/' ${OCF_RESKEY_datadir}/grastate.dat
	fi
	ocf_log info "Node <${NODENAME}> is bootstrapping the cluster"
	extra_opts="--wsrep-cluster-address=gcomm://"
	else
	ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected."
	clear_last_commit
	return $OCF_ERR_GENERIC
	fi
	fi

	galera_monitor
	if [ $? -eq $OCF_RUNNING_MASTER ]; then
	if ocf_is_true $bootstrap; then
	promote_everyone
	clear_bootstrap_node
	ocf_log info "boostrap node already up, promoting the rest of the galera instances."
	fi
	clear_safe_to_bootstrap
	clear_last_commit
	return $OCF_SUCCESS
	fi

	# last commit/safe_to_bootstrap flag are no longer relevant once promoted
	clear_last_commit
	clear_safe_to_bootstrap

	mysql_common_prepare_dirs
	mysql_common_start "$extra_opts"
	rc=$?
	if [ $rc != $OCF_SUCCESS ]; then
	return $rc
	fi

	galera_monitor
	rc=$?
	if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
	ocf_exit_reason "Failed initial monitor action"
	return $rc
	fi

	is_readonly
	if [ $? -eq 0 ]; then
	ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration."
	return $OCF_ERR_GENERIC
	fi

	is_primary
	if [ $? -ne 0 ]; then
	ocf_exit_reason "Failure. Master instance started, but is not in Primary mode."
	return $OCF_ERR_GENERIC
	fi

	if ocf_is_true $bootstrap; then
	promote_everyone
	clear_bootstrap_node
	# clear attribute no-grastate. if last shutdown was
	# not clean, we cannot be extra-cautious by requesting a SST
	# since this is the bootstrap node
	clear_no_grastate
	ocf_log info "Bootstrap complete, promoting the rest of the galera instances."
	else
	# if this is not the bootstrap node, make sure this instance
	# syncs with the rest of the cluster before promotion returns.
	wait_for_sync
	# sync is done, clear info about last startup
	clear_no_grastate
	fi

	ocf_log info "Galera started"
	return $OCF_SUCCESS
	}

	galera_demote()
	{
	mysql_common_stop
	rc=$?
	if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
	ocf_exit_reason "Failed to stop Master galera instance during demotion to Master"
	return $rc
	fi

	# if this node was previously a bootstrap node, that is no longer the case.
	clear_bootstrap_node
	clear_last_commit
	clear_no_grastate
	clear_safe_to_bootstrap

	# Clear master score here rather than letting pacemaker do so once
	# demote finishes. This way a promote cannot take place right
	# after this demote even if pacemaker is requested to do so. It
	# will first have to run a start/monitor op, to reprobe the state
	# of the other galera nodes and act accordingly.
	clear_master_score

	# record last commit for next promotion
	detect_safe_to_bootstrap
	detect_last_commit
	rc=$?
	return $rc
	}

	galera_start()
	{
	local rc
	local galera_node

	galera_node=$(pcmk_to_galera_name $NODENAME)
	if [ -z "$galera_node" ]; then
	ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>."
	return $OCF_ERR_CONFIGURED
	fi

	echo $OCF_RESKEY_wsrep_cluster_address \| grep -q -F $galera_node
	if [ $? -ne 0 ]; then
	ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}> to start this galera instance"
	return $OCF_ERR_CONFIGURED
	fi

	galera_monitor
	if [ $? -eq $OCF_RUNNING_MASTER ]; then
	ocf_exit_reason "master galera instance started outside of the cluster's control"
	return $OCF_ERR_GENERIC
	fi

	mysql_common_prepare_dirs

	detect_safe_to_bootstrap
	detect_last_commit
	rc=$?
	if [ $rc -ne $OCF_SUCCESS ]; then
	return $rc
	fi

	master_exists
	if [ $? -eq 0 ]; then
	ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster."
	set_master_score $NODENAME
	else
	clear_master_score
	detect_first_master
	fi

	return $OCF_SUCCESS
	}

	galera_monitor()
	{
	local rc
	local galera_node
	local status_loglevel="err"

	# Set loglevel to info during probe
	if ocf_is_probe; then
	status_loglevel="info"
	fi

	mysql_common_status $status_loglevel
	rc=$?

	if [ $rc -eq $OCF_NOT_RUNNING ]; then
	last_commit=$(get_last_commit $node)
	if [ -n "$last_commit" ]; then
	# if last commit is set, this instance is considered started in slave mode
	rc=$OCF_SUCCESS
	master_exists
	if [ $? -ne 0 ]; then
	detect_first_master
	else
	# a master instance exists and is healthy, promote this
	# local read only instance
	# so it can join the master galera cluster.
	set_master_score
	fi
	fi
	return $rc
	elif [ $rc -ne $OCF_SUCCESS ]; then
	return $rc
	fi

	# if we make it here, mysql is running. Check cluster status now.
	galera_node=$(pcmk_to_galera_name $NODENAME)
	if [ -z "$galera_node" ]; then
	ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>."
	return $OCF_ERR_CONFIGURED
	fi

	echo $OCF_RESKEY_wsrep_cluster_address \| grep -q -F $galera_node
	if [ $? -ne 0 ]; then
	ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
	return $OCF_ERR_GENERIC
	fi

	is_primary
	if [ $? -eq 0 ]; then

	if ocf_is_probe; then
	# restore master score during probe
	# if we detect this is a master instance
	set_master_score
	fi
	rc=$OCF_RUNNING_MASTER
	else
	ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
	rc=$OCF_ERR_GENERIC
	fi

	return $rc
	}

	galera_stop()
	{
	local rc
	# make sure the process is stopped
	mysql_common_stop
	rc=$1

	clear_safe_to_bootstrap
	clear_last_commit
	clear_master_score
	clear_bootstrap_node
	clear_no_grastate
	return $rc
	}

	galera_validate()
	{
	if ! ocf_is_ms; then
	ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
	return $OCF_ERR_CONFIGURED
	fi

	if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then
	ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value."
	return $OCF_ERR_CONFIGURED
	fi

	mysql_common_validate
	}

	case "$1" in
	meta-data) meta_data
	exit $OCF_SUCCESS;;
	usage\|help) usage
	exit $OCF_SUCCESS;;
	esac

	galera_validate
	rc=$?
	LSB_STATUS_STOPPED=3
	if [ $rc -ne 0 ]; then
	case "$1" in
	stop) exit $OCF_SUCCESS;;
	monitor) exit $OCF_NOT_RUNNING;;
	status) exit $LSB_STATUS_STOPPED;;
	*) exit $rc;;
	esac
	fi

	if [ -z "${OCF_RESKEY_check_passwd}" ]; then
	# This value is automatically sourced from /etc/sysconfig/checkcluster if available
	OCF_RESKEY_check_passwd=${MYSQL_PASSWORD}
	fi
	if [ -z "${OCF_RESKEY_check_user}" ]; then
	# This value is automatically sourced from /etc/sysconfig/checkcluster if available
	OCF_RESKEY_check_user=${MYSQL_USERNAME}
	fi
	: ${OCF_RESKEY_check_user="root"}

	MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}"
	if [ -n "${OCF_RESKEY_check_passwd}" ]; then
	MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
	fi

	# This value is automatically sourced from /etc/sysconfig/checkcluster if available
	if [ -n "${MYSQL_HOST}" ]; then
	MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}"
	fi

	# This value is automatically sourced from /etc/sysconfig/checkcluster if available
	if [ -n "${MYSQL_PORT}" ]; then
	MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}"
	fi



	# What kind of method was invoked?
	case "$1" in
	start) galera_start;;
	stop) galera_stop;;
	status) mysql_common_status err;;
	monitor) galera_monitor;;
	promote) galera_promote;;
	demote) galera_demote;;
	validate-all) exit $OCF_SUCCESS;;

	*) usage
	exit $OCF_ERR_UNIMPLEMENTED;;
	esac

	# vi:sw=4:ts=4:et:
	diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
	index 9ff49e075..54a16c941 100755
	--- a/heartbeat/rabbitmq-cluster
	+++ b/heartbeat/rabbitmq-cluster
	@@ -1,549 +1,551 @@
	#!/bin/sh
	#
	# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
	# All Rights Reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of version 2 of the GNU General Public License as
	# published by the Free Software Foundation.
	#
	# This program is distributed in the hope that it would be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	# Further, this software is distributed without any warranty that it is
	# free of the rightful claim of any third person regarding infringement
	# or the like. Any license provided herein, whether implied or
	# otherwise, applies only to this software file. Patent licenses, if
	# any, provided herein do not apply to combinations of this program with
	# other software, or any other product whatsoever.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write the Free Software Foundation,
	# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	#

	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
	. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

	#######################################################################

	RMQ_SERVER=/usr/sbin/rabbitmq-server
	RMQ_CTL=/usr/sbin/rabbitmqctl
	RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
	RMQ_PID_DIR="/var/run/rabbitmq"
	RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid"
	RMQ_LOG_DIR="/var/log/rabbitmq"
	-NODENAME=$(ocf_attribute_target)
	+if [ "$__OCF_ACTION" != "meta-data" ]; then
	+ NODENAME=$(ocf_attribute_target)
	+fi

	# this attr represents the current active local rmq node name.
	# when rmq stops or the node is fenced, this attr disappears
	RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}"
	# this attr represents the last known active local rmq node name
	# when rmp stops or the node is fenced, the attr stays forever so
	# we can continue to map an offline pcmk node to it's rmq node name
	# equivalent.
	RMQ_CRM_ATTR_COOKIE_LAST_KNOWN="rmq-node-attr-last-known-${OCF_RESOURCE_INSTANCE}"

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="rabbitmq-cluster">
	<version>1.0</version>

	<longdesc lang="en">
	Starts cloned rabbitmq cluster instance. NB: note that this RA
	cannot be spawned across a mix of pacemaker and pacemaker-remote nodes.
	Only on pacemaker or pacemaker-remote nodes exclusively.
	</longdesc>
	<shortdesc lang="en">rabbitmq clustered</shortdesc>

	<parameters>
	<parameter name="set_policy" unique="1">
	<longdesc lang="en">
	Policy string to pass to 'rabbitmqctl set_policy' right after bootstrapping the first rabbitmq instance.
	</longdesc>
	<shortdesc lang="en">rabbitmqctl set_policy args</shortdesc>
	<content type="string" default="" />
	</parameter>

	</parameters>

	<actions>
	<action name="start" timeout="100s" />
	<action name="stop" timeout="90s" />
	<action name="monitor" timeout="40s" interval="10s" depth="0" />
	<action name="meta-data" timeout="10s" />
	<action name="validate-all" timeout="20s" />
	</actions>
	</resource-agent>
	END
	}

	#######################################################################

	rmq_usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|notify\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	rmq_wipe_data()
	{
	rm -rf $RMQ_DATA_DIR > /dev/null 2>&1
	}

	rmq_local_node()
	{

	local node_name=$($RMQ_CTL status 2>&1 \| sed -n -e "s/^.[S\|s]tatus of node $.$\s.*$/\1/p" \| tr -d "'")

	if [ -z "$node_name" ]; then
	node_name=$(cat /etc/rabbitmq/rabbitmq-env.conf 2>/dev/null \| grep "\s*RABBITMQ_NODENAME=" \| awk -F= '{print $2}')
	fi

	echo "$node_name"
	}

	rmq_join_list()
	{
	local join_list=$(cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" \| grep "$RMQ_CRM_ATTR_COOKIE" \| sed -n -e "s/^.value=.$.$\".*$/\1/p")
	# If join_list is empty we want to check if there are any remote nodes
	# where rabbitmq is allowed to run (i.e. nodes without the crmd=online selector)
	if [ -z "$join_list" ]; then
	# Get all the nodes written in the ATTR_COOKIE no matter if
	# they are online or not. This will be one line per node like
	# rabbit@overcloud-rabbit-0
	# rabbit@overcloud-rabbit-1
	# ...
	local remote_join_list=$(cibadmin -Q --xpath "//node_state//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" \| grep "$RMQ_CRM_ATTR_COOKIE" \| sed -n -e "s/^.value=.$.$\".*$/\1/p")
	# The following expression prepares a filter like '-e overcloud-rabbit-0 -e overcloud-rabbit-1 -e ...'
	local filter=$(crm_mon -r --as-xml \| xmllint --format --xpath "//nodes//node[@online='true' and @standby='false']/@name" - \| xargs -n1 echo \| awk -F= '{print "-e "$2}')
	# export the intersection which gives us only the nodes that
	# a) wrote their namein the cib attrd
	# b) run on nodes where pacemaker_remote is enabled
	join_list="$(echo $remote_join_list \| grep $filter)"
	fi

	echo $join_list
	}

	rmq_write_nodename()
	{
	local node_name=$(rmq_local_node)

	if [ -z "$node_name" ]; then
	ocf_log err "Failed to determine rabbitmq node name, exiting"
	exit $OCF_ERR_GENERIC
	fi

	# store the pcmknode to rmq node mapping as a transient attribute. This allows
	# us to retrieve the join list with a simple xpath.
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name"

	# the pcmknode to rmq node mapping as a permanent attribute as well. this lets
	# us continue to map offline nodes to their equivalent rmq node name
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --name "$RMQ_CRM_ATTR_COOKIE_LAST_KNOWN" -v "$node_name"
	}

	rmq_delete_nodename()
	{
	# remove node-name
	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -D
	}

	prepare_dir () {
	if [ ! -d ${1} ] ; then
	mkdir -p ${1}
	chown -R rabbitmq:rabbitmq ${1}
	chmod 755 ${1}
	fi
	}

	remove_pid () {
	rm -f ${RMQ_PID_FILE} > /dev/null 2>&1
	}

	rmq_monitor() {
	local rc

	$RMQ_CTL cluster_status > /dev/null 2>&1
	rc=$?
	case "$rc" in
	0)
	ocf_log debug "RabbitMQ server is running normally"
	rmq_write_nodename

	return $OCF_SUCCESS
	;;
	2\|68\|69\|70\|75\|78)
	ocf_log info "RabbitMQ server is not running"
	rmq_delete_nodename
	return $OCF_NOT_RUNNING
	;;
	*)
	ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc"
	rmq_delete_nodename
	return $OCF_ERR_GENERIC
	;;
	esac
	}

	rmq_init_and_wait()
	{
	local rc

	prepare_dir $RMQ_PID_DIR
	prepare_dir $RMQ_LOG_DIR
	remove_pid

	# the server startup script uses this environment variable
	export RABBITMQ_PID_FILE="$RMQ_PID_FILE"

	setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" &

	ocf_log info "Waiting for server to start"
	$RMQ_CTL wait $RMQ_PID_FILE
	rc=$?
	if [ $rc -ne $OCF_SUCCESS ]; then
	remove_pid
	ocf_log info "rabbitmq-server start failed: $rc"
	return $OCF_ERR_GENERIC
	fi

	rmq_monitor
	return $?
	}

	rmq_set_policy()
	{
	$RMQ_CTL set_policy "$@" > /dev/null 2>&1
	}

	rmq_start_first()
	{
	local rc

	ocf_log info "Bootstrapping rabbitmq cluster"
	rmq_wipe_data
	rmq_init_and_wait
	rc=$?

	if [ $rc -eq 0 ]; then
	rc=$OCF_SUCCESS
	ocf_log info "cluster bootstrapped"

	if [ -n "$OCF_RESKEY_set_policy" ]; then
	# do not quote set_policy, we are passing in arguments
	rmq_set_policy $OCF_RESKEY_set_policy > /dev/null 2>&1
	if [ $? -ne 0 ]; then
	ocf_log err "Failed to set policy: $OCF_RESKEY_set_policy"
	rc=$OCF_ERR_GENERIC
	else
	ocf_log info "Policy set: $OCF_RESKEY_set_policy"
	fi
	fi

	else
	ocf_log info "failed to bootstrap cluster. Check SELINUX policy"
	rc=$OCF_ERR_GENERIC
	fi

	return $rc
	}

	rmq_is_clustered()
	{
	$RMQ_CTL eval 'rabbit_mnesia:is_clustered().' \| grep -q true
	}

	rmq_join_existing()
	{
	local join_list="$1"
	local rc=$OCF_ERR_GENERIC

	ocf_log info "Joining existing cluster with [ $(echo $join_list \| tr '\n' ' ') ] nodes."
	rmq_init_and_wait
	if [ $? -ne 0 ]; then
	return $OCF_ERR_GENERIC
	fi

	if rmq_is_clustered; then
	ocf_log info "Successfully re-joined existing rabbitmq cluster automatically"
	return $OCF_SUCCESS
	fi

	# unconditionally join the cluster
	$RMQ_CTL stop_app > /dev/null 2>&1
	for node in $(echo "$join_list"); do
	ocf_log info "Attempting to join cluster with target node $node"
	$RMQ_CTL join_cluster $node
	if [ $? -eq 0 ]; then
	ocf_log info "Joined cluster by connecting to node $node, starting app"
	$RMQ_CTL start_app
	rc=$?
	if [ $rc -ne 0 ]; then
	ocf_log err "'$RMQ_CTL start_app' failed"
	fi
	break;
	fi
	done

	if [ "$rc" -ne 0 ]; then
	ocf_log info "Join process incomplete, shutting down."
	return $OCF_ERR_GENERIC
	fi

	ocf_log info "Successfully joined existing rabbitmq cluster"
	return $OCF_SUCCESS
	}

	rmq_forget_cluster_node_remotely() {
	local running_cluster_nodes="$1"
	local node_to_forget="$2"

	ocf_log info "Forgetting $node_to_forget via nodes [ $(echo $running_cluster_nodes \| tr '\n' ' ') ]."
	for running_cluster_node in $running_cluster_nodes; do
	$RMQ_CTL -n $running_cluster_node forget_cluster_node $node_to_forget
	if [ $? = 0 ]; then
	ocf_log info "Succeeded forgetting $node_to_forget via $running_cluster_node."
	return
	else
	ocf_log err "Failed to forget node $node_to_forget via $running_cluster_node."
	fi
	done
	}

	rmq_notify() {
	node_list="${OCF_RESKEY_CRM_meta_notify_stop_uname}"
	mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"


	# When notifications are on, this agent is going to "forget" nodes once they
	# leave the cluster. This is thought to resolve some issues where rabbitmq
	# blocks trying to sync with an offline node after a fencing action occurs.
	if ! [ "${mode}" = "post-stop" ]; then
	return $OCF_SUCCESS
	fi

	rmq_monitor
	if [ $? -ne $OCF_SUCCESS ]; then
	# only run forget when we are for sure active
	return $OCF_SUCCESS
	fi

	# forget each stopped rmq instance in the provided pcmk node in the list.
	for node in $(echo "$node_list"); do
	local rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $(ocf_attribute_target $node) -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)"
	if [ -z "$rmq_node" ]; then
	ocf_log warn "Unable to map pcmk node $node to a known rmq node."
	continue
	fi
	ocf_log notice "Forgetting stopped node $rmq_node"
	$RMQ_CTL forget_cluster_node $rmq_node
	if [ $? -ne 0 ]; then
	ocf_log warn "Unable to forget offline node $rmq_node."
	fi
	done
	return $OCF_SUCCESS
	}

	rmq_start() {
	local join_list=""
	local rc

	rmq_monitor
	if [ $? -eq $OCF_SUCCESS ]; then
	return $OCF_SUCCESS
	fi

	join_list=$(rmq_join_list)

	# No join list means no active instances are up. This instance
	# is the first, so it needs to bootstrap the rest
	if [ -z "$join_list" ]; then
	rmq_start_first
	rc=$?
	return $rc
	fi

	# Try to join existing cluster
	ocf_log info "wiping data directory before joining"
	local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)"

	rmq_stop
	rmq_wipe_data
	rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node"
	rmq_join_existing "$join_list"
	rc=$?

	if [ $rc -ne 0 ]; then
	ocf_log info "node failed to join even after reseting local data. Check SELINUX policy"
	return $OCF_ERR_GENERIC
	fi

	# Restore users, user permissions, and policies (if any)
	BaseDataDir=`dirname $RMQ_DATA_DIR`
	$RMQ_CTL eval "
	%% Run only if Mnesia is ready.
	lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
	begin
	Restore = fun(Table, PostprocessFun, Filename) ->
	case file:consult(Filename) of
	{error, _} ->
	ok;
	{ok, [Result]} ->
	lists:foreach(fun(X) -> mnesia:dirty_write(Table, PostprocessFun(X)) end, Result),
	file:delete(Filename)
	end
	end,

	%% Restore users

	Upgrade = fun
	({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5};
	({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D}
	end,

	Downgrade = fun
	({internal_user, A, B, C}) -> {internal_user, A, B, C};
	({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C};
	%% Incompatible scheme, so we will loose user's password ('B' value) during conversion.
	%% Unfortunately, this case will require manual intervention - user have to run:
	%% rabbitmqctl change_password <A> <somenewpassword>
	({internal_user, A, B, C, _}) -> {internal_user, A, B, C}
	end,

	%% Check db scheme first
	[WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]),
	case WildPattern of
	%% Version < 3.6.0
	{internal_user,'_','_','_'} ->
	Restore(rabbit_user, Downgrade, \"$BaseDataDir/users.erl\");
	%% Version >= 3.6.0
	{internal_user,'_','_','_','_'} ->
	Restore(rabbit_user, Upgrade, \"$BaseDataDir/users.erl\")
	end,

	NoOp = fun(X) -> X end,

	%% Restore user permissions
	Restore(rabbit_user_permission, NoOp, \"$BaseDataDir/users_perms.erl\"),

	%% Restore policies
	Restore(rabbit_runtime_parameters, NoOp, \"$BaseDataDir/policies.erl\")
	end.
	"
	return $OCF_SUCCESS
	}

	rmq_stop() {
	# Backup users, user permissions, and policies
	BaseDataDir=`dirname $RMQ_DATA_DIR`
	$RMQ_CTL eval "
	%% Run only if Mnesia is still available.
	lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
	begin
	Backup = fun(Table, SelectPattern, Filter, Filename) ->
	Result = case catch mnesia:dirty_select(Table, [{SelectPattern, [Filter], ['\\\$_']}]) of
	{'EXIT', _} -> [];
	Any -> Any
	end,
	Result /= [] andalso file:write_file(Filename, io_lib:fwrite(\"~p.~n\", [Result]))
	end,

	%% Backup users
	%% Check db scheme first
	[WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]),
	UsersSelectPattern = case WildPattern of
	%% Version < 3.6.0
	{internal_user,'_','_','_'} -> {internal_user, '\\\$1', '_', '_'};
	%% Version >= 3.6.0
	{internal_user,'_','_','_','_'} -> {internal_user, '\\\$1', '_', '_', '_'}
	end,
	Backup(rabbit_user, UsersSelectPattern, {'/=', '\\\$1', <<\"guest\">>}, \"$BaseDataDir/users.erl\"),

	%% Backup user permissions
	Backup(rabbit_user_permission, {'\\\$1', {'\\\$2', '\\\$3','\\\$4'}, '\\\$5'}, {'/=', '\\\$3', <<\"guest\">>}, \"$BaseDataDir/users_perms.erl\"),

	%% Backup policies
	Backup(rabbit_runtime_parameters, {runtime_parameters, {'_', '\\\$1', '_'}, '_'}, {'==', '\\\$1', <<\"policy\">>}, \"$BaseDataDir/policies.erl\")
	end.
	"

	rmq_monitor
	if [ $? -eq $OCF_NOT_RUNNING ]; then
	return $OCF_SUCCESS
	fi

	$RMQ_CTL stop
	rc=$?

	if [ $rc -ne 0 ]; then
	ocf_log err "rabbitmq-server stop command failed: $RMQ_CTL stop, $rc"
	return $rc
	fi

	#TODO add kill logic
	stop_wait=1
	while [ $stop_wait = 1 ]; do
	rmq_monitor
	rc=$?
	if [ "$rc" -eq $OCF_NOT_RUNNING ]; then
	stop_wait=0
	break
	elif [ "$rc" -ne $OCF_SUCCESS ]; then
	ocf_log info "rabbitmq-server stop failed: $rc"
	exit $OCF_ERR_GENERIC
	fi
	sleep 1
	done

	remove_pid
	return $OCF_SUCCESS
	}

	rmq_validate() {
	check_binary $RMQ_SERVER
	check_binary $RMQ_CTL

	# This resource only makes sense as a clone right now. at some point
	# we may want to verify the following.
	#TODO verify cloned
	#TODO verify ordered=true

	# Given that this resource does the cluster join explicitly,
	# having a cluster_nodes list in the static config file will
	# likely conflict with this agent.
	#TODO verify no cluster list in rabbitmq conf
	#cat /etc/rabbitmq/rabbitmq.config \| grep "cluster_nodes"

	return $OCF_SUCCESS
	}

	case $__OCF_ACTION in
	meta-data) meta_data
	exit $OCF_SUCCESS
	;;
	start) rmq_start;;
	stop) rmq_stop;;
	monitor) rmq_monitor;;
	validate-all) rmq_validate;;
	notify) rmq_notify;;
	usage\|help) rmq_usage
	exit $OCF_SUCCESS
	;;
	*) rmq_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac
	rc=$?
	ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
	exit $rc

	diff --git a/heartbeat/redis.in b/heartbeat/redis.in
	index d5eb8f664..ddc62d8a7 100644
	--- a/heartbeat/redis.in
	+++ b/heartbeat/redis.in
	@@ -1,709 +1,711 @@
	#!@BASH_SHELL@
	#
	# Resource agent script for redis server.
	#
	# Copyright (c) 2013 Patrick Hemmer <patrick.hemmer@gmail.com>
	# All Rights Reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of version 2 of the GNU General Public License as
	# published by the Free Software Foundation.
	#
	# This program is distributed in the hope that it would be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	# Further, this software is distributed without any warranty that it is
	# free of the rightful claim of any third person regarding infringement
	# or the like. Any license provided herein, whether implied or
	# otherwise, applies only to this software file. Patent licenses, if
	# any, provided herein do not apply to combinations of this program with
	# other software, or any other product whatsoever.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write the Free Software Foundation,
	# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	#

	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
	. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

	: ${OCF_RESKEY_bin:=/usr/bin/redis-server}
	: ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli}
	: ${OCF_RESKEY_user:=redis}
	: ${OCF_RESKEY_rundir:=/var/run/redis}
	: ${OCF_RESKEY_pidfile_name:=redis-server.pid}
	: ${OCF_RESKEY_socket_name:=redis.sock}
	: ${OCF_RESKEY_port:=6379}
	: ${OCF_RESKEY_tunnel_host:=127.0.0.1}

	if [ -z "$OCF_RESKEY_config" ]; then
	if [ -f "/etc/redis.conf" ]; then
	OCF_RESKEY_config="/etc/redis.conf"
	else
	OCF_RESKEY_config="/etc/redis/redis.conf"
	fi
	fi

	CHECK_SLAVE_STATE=0

	REDIS_CHECK_DUMP="/usr/bin/redis-check-dump"
	REDIS_SERVER="$OCF_RESKEY_bin"
	REDIS_CLIENT="$OCF_RESKEY_client_bin"
	REDIS_CONFIG="$OCF_RESKEY_config"
	REDIS_USER="$OCF_RESKEY_user"
	REDIS_RUNDIR="$OCF_RESKEY_rundir"
	REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name"
	REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name"
	REDIS_REPLICATION_PORT="$OCF_RESKEY_port"

	if ! [ -f $REDIS_CHECK_DUMP ]; then
	REDIS_CHECK_DUMP="$(which redis-check-dump 2>/dev/null)"
	fi
	if [ -z "$REDIS_CHECK_DUMP" ]; then
	REDIS_CHECK_DUMP="$(which redis-check-rdb 2>/dev/null)"
	fi

	if [ -r "$REDIS_CONFIG" ]; then
	REDIS_DUMP_DIR="$(grep "^\s*dir\s" < "$REDIS_CONFIG" \| awk '{ print $2 }' 2>/dev/null)"
	REDIS_DUMP_FILE="$(grep "^\s*dbfilename\s" < "$REDIS_CONFIG" \| awk '{ print $2 }' 2>/dev/null)"
	fi
	: ${REDIS_DUMP_DIR:=/var/lib/redis/}
	: ${REDIS_DUMP_FILE:=dump.rdb}

	redis_meta_data() {
	cat <<EOI
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="redis">
	<version>1.0</version>

	<longdesc lang="en">
	Resource agent script for redis server.

	This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config.
	When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000.
	</longdesc>

	<shortdesc lang="en">Redis server</shortdesc>

	<parameters>
	<parameter name="bin" unique="0" required="0">
	<longdesc lang="en">
	Path to \`redis-server\`
	</longdesc>
	<shortdesc lang="en">Path to \`redis-server\`</shortdesc>
	<content type="string" default="${OCF_RESKEY_bin}" />
	</parameter>

	<parameter name="client_bin" unique="0" required="0">
	<longdesc lang="en">
	Path to \`redis-cli\`
	</longdesc>
	<shortdesc lang="en">Path to \`redis-cli\`</shortdesc>
	<content type="string" default="${OCF_RESKEY_client_bin}" />
	</parameter>

	<parameter name="config" unique="1" required="0">
	<longdesc lang="en">
	Path to 'redis.conf'
	</longdesc>
	<shortdesc lang="en">Path to 'redis.conf'</shortdesc>
	<content type="string" default="${OCF_RESKEY_config}" />
	</parameter>

	<parameter name="user" unique="0" required="0">
	<longdesc lang="en">
	User to run redis as
	</longdesc>
	<shortdesc lang="en">Redis user</shortdesc>
	<content type="string" default="${OCF_RESKEY_user}" />
	</parameter>

	<parameter name="rundir" unique="1" required="0">
	<longdesc lang="en">
	Directory to store socket and pid file in
	</longdesc>
	<shortdesc lang="en">Redis var/run dir</shortdesc>
	<content type="string" default="${OCF_RESKEY_rundir}"/>
	</parameter>

	<parameter name="pidfile_name" unique="0" required="0">
	<longdesc lang="en">
	The filename to use for the pidfile. Will be created in the rundir.
	Should only be a basename, not a full path.
	</longdesc>
	<shortdesc lang="en">Redis pidfile name</shortdesc>
	<content type="string" default="${OCF_RESKEY_pidfile_name}"/>
	</parameter>

	<parameter name="socket_name" unique="0" required="0">
	<longdesc lang="en">
	The filename to use for the socket. Will be crated in the rundir.
	Should only be a basename, not a full path.
	</longdesc>
	<shortdesc lang="en">Redis socket name</shortdesc>
	<content type="string" default="${OCF_RESKEY_socket_name}"/>
	</parameter>

	<parameter name="port" unique="0" required="0">
	<longdesc lang="en">
	Port for replication client to connect to on remote server
	</longdesc>
	<shortdesc lang="en">Replication port</shortdesc>
	<content type="string" default="${OCF_RESKEY_port}"/>
	</parameter>

	<parameter name="tunnel_host" unique="0" required="0">
	<longdesc lang="en">
	When replication traffic is tunnelled, this is the host to target
	to forward outgoing traffic to the redis master. The resource
	agent configures the redis slave to target the master via
	tunnel_host:tunnel_port.

	Note that in order to enable replication traffic tunneling,
	parameter {tunnel_port_map} must be populated.
	</longdesc>
	<shortdesc lang="en">Tunnel host for replication traffic</shortdesc>
	<content type="string" default="${OCF_RESKEY_tunnel_host}"/>
	</parameter>

	<parameter name="tunnel_port_map" unique="0" required="0">
	<longdesc lang="en">
	A mapping of pacemaker node names to redis port number.

	To be used when redis servers need to tunnel replication traffic.
	On every node where the redis resource is running, the redis server
	listens to a different port. Each redis server can access its peers
	for replication traffic via a tunnel accessible at {tunnel_host}:port.

	The mapping the form of:
	pcmk1-name:port-for-redis1;pcmk2-name:port-for-redis2;pcmk3-name:port-for-redis3

	where the redis resource started on node pcmk1-name would listen on
	port port-for-redis1
	</longdesc>
	<shortdesc lang="en">Mapping of Redis server name to redis port</shortdesc>
	<content type="string" default=""/>
	</parameter>

	<parameter name="wait_last_known_master" unique="0" required="0">
	<longdesc lang="en">
	During redis cluster bootstrap, wait for the last known master to be
	promoted before allowing any other instances in the cluster to be
	promoted. This lessens the risk of data loss when persistent data
	is in use.
	</longdesc>
	<shortdesc lang="en">Wait for last known master</shortdesc>
	<content type="boolean" default="false"/>
	</parameter>
	</parameters>

	<actions>
	<action name="start" timeout="120s" />
	<action name="stop" timeout="120s" />
	<action name="status" timeout="60s" />
	<action name="monitor" depth="0" timeout="60s" interval="45s" />
	<action name="monitor" role="Master" depth="0" timeout="60s" interval="20s" />
	<action name="monitor" role="Slave" depth="0" timeout="60s" interval="60s" />
	<action name="promote" timeout="120s" />
	<action name="demote" timeout="120s" />
	<action name="notify" timeout="90s" />
	<action name="validate-all" timeout="5s" />
	<action name="meta-data" timeout="5s" />
	</actions>
	</resource-agent>
	EOI
	}

	INSTANCE_ATTR_NAME=$(echo "${OCF_RESOURCE_INSTANCE}" \| awk -F : '{print $1}')
	CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication"
	MASTER_HOST=""
	MASTER_ACTIVE_CACHED=""
	MASTER_ACTIVE=""

	master_is_active()
	{
	if [ -z "$MASTER_ACTIVE_CACHED" ]; then
	# determine if a master instance is already up and is healthy
	crm_mon --as-xml \| grep "resource.id=\"${OCF_RESOURCE_INSTANCE}\".role=\"Master\".active=\"true\".orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
	MASTER_ACTIVE=$?
	MASTER_ACTIVE_CACHED="true"
	fi
	return $MASTER_ACTIVE
	}

	set_master()
	{
	MASTER_HOST="$1"
	${CRM_ATTR_REPL_INFO} -v "$1" -q
	}

	last_known_master()
	{
	if [ -z "$MASTER_HOST" ]; then
	MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)"
	fi
	echo "$MASTER_HOST"
	}

	crm_master_reboot() {
	local node
	node=$(ocf_attribute_target)
	"${HA_SBIN_DIR}/crm_master" -N "$node" -l reboot "$@"
	}

	calculate_score()
	{
	perf_score="$1"
	connected_clients="$2"

	if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then
	# only set perferred score by slave_priority if
	# we are not waiting for the last known master. Otherwise
	# we want the agent to have complete control over the scoring.
	perf_score=""
	connected_clients="0"
	fi

	if [[ -z "$perf_score" ]]; then
	if [[ "$(last_known_master)" == "$NODENAME" ]]; then
	perf_score=1000
	else
	perf_score=1
	fi
	fi
	perf_score=$(( perf_score + connected_clients ))
	echo "$perf_score"
	}

	set_score()
	{
	local score
	local last_master

	score="$1"

	if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then
	last_master="$(last_known_master)"
	if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then
	ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted"
	return
	fi
	fi

	ocf_log debug "monitor: Setting master score to '$score'"
	crm_master_reboot -v "$score"
	}

	redis_client() {
	ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $*"
	if [ -n "$clientpasswd" ]; then
	"$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" \| sed 's/\r//'
	else
	"$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" \| sed 's/\r//'
	fi
	}

	simple_status() {
	local pid

	if ! [ -f "$REDIS_PIDFILE" ]; then
	return $OCF_NOT_RUNNING
	fi

	pid="$(<"$REDIS_PIDFILE")"
	pidof "$REDIS_SERVER" \| grep -q "\<$pid\>" \|\| return $OCF_NOT_RUNNING

	ocf_log debug "monitor: redis-server running under pid $pid"

	return $OCF_SUCCESS
	}

	redis_monitor() {
	local res
	local master_name
	local last_known_master_port

	simple_status
	res=$?
	if (( res != OCF_SUCCESS )); then
	return $res
	fi

	typeset -A info
	while read line; do
	[[ "$line" == "#"* ]] && continue
	[[ "$line" != ":" ]] && continue
	IFS=':' read -r key value <<< "$line"
	info[$key]="$value"
	done < <(redis_client info)
	if [[ -z "${info[role]}" ]]; then
	ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`"
	return $OCF_ERR_GENERIC
	fi

	if ocf_is_ms; then
	# Here we see if a score has already been set.
	# If score isn't set we the redis setting 'slave_priority'.
	# If that isn't set, we default to 1000 for a master, and 1 for slave.
	# We then add 1 for each connected client
	score="$(crm_master_reboot -G --quiet 2>/dev/null)"
	if [[ -z "$score" ]]; then
	score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}")
	set_score "$score"
	fi

	if [[ "${info[role]}" == "master" ]]; then
	if ocf_is_probe; then
	set_master "$NODENAME"
	fi
	return $OCF_RUNNING_MASTER
	fi

	if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then
	if [[ "${info[master_link_status]}" != "up" ]]; then
	ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})"
	return $OCF_ERR_GENERIC
	fi
	if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then
	if [ -n "${OCF_RESKEY_tunnel_port_map}" ]; then
	master_name=$(port_to_redis_node ${info[master_port]})
	last_known_master_port=$(redis_node_to_port $(last_known_master))
	if [[ "${info[master_host]}" != "${OCF_RESKEY_tunnel_host}" ]] \|\|
	[[ "${info[master_port]}" != "${last_known_master_port}" ]]; then
	ocf_log err "monitor: Slave mode current tunnelled connection to redis server does not match running master. tunnelled='${info[master_host]}:${info[master_port]} (${master_name})', running='$(last_known_master)'"
	return $OCF_ERR_GENERIC
	fi
	else
	ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)"
	return $OCF_ERR_GENERIC
	fi
	fi
	fi
	fi
	return $OCF_SUCCESS
	}

	redis_node_to_port()
	{
	local node=$1
	echo "$OCF_RESKEY_tunnel_port_map" \| tr ';' '\n' \| tr -d ' ' \| sed 's/:/ /' \| awk -F' ' '$1=="'"$node"'" {print $2;exit}'
	}

	port_to_redis_node()
	{
	local port=$1
	echo "$OCF_RESKEY_tunnel_port_map" \| tr ';' '\n' \| tr -d ' ' \| sed 's/:/ /' \| awk -F' ' '$2=="'"$port"'" {print $1;exit}'
	}

	get_tunnel_port_from_master()
	{
	local master_name=$1
	crm_attribute --node "$master_name" -l forever --name ${INSTANCE_ATTR_NAME}-tunnel-port --query -q 2>/dev/null
	}

	get_master_from_tunnel_port()
	{
	local master_name=$1
	crm_attribute --node "$master_name" -l forever --name ${INSTANCE_ATTR_NAME}-tunnel-port --query -q 2>/dev/null
	}

	check_dump_file()
	{
	if ! have_binary "$REDIS_CHECK_DUMP"; then
	return 0
	fi
	$REDIS_CHECK_DUMP ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE} 2>&1
	}

	redis_start() {
	local size

	redis_monitor
	status=$?

	if (( status == OCF_SUCCESS )) \|\| (( status == OCF_RUNNING_MASTER )); then
	ocf_log info "start: redis is already running"
	return $OCF_SUCCESS
	fi

	[[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR"
	chown -R "$REDIS_USER" "$REDIS_RUNDIR"
	if have_binary "restorecon"; then
	restorecon -Rv "$REDIS_RUNDIR"
	fi


	# check for 0 byte database dump file. This is an unrecoverable start
	# condition that we can avoid by deleting the 0 byte database file.
	if [ -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" ]; then
	size="$(stat --format "%s" ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE})"
	if [ "$?" -eq "0" ] && [ "$size" -eq "0" ]; then
	ocf_log notice "Detected 0 byte ${REDIS_DUMP_FILE}, deleting zero length file to avoid start failure."
	rm -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}"
	fi
	fi

	ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'"
	output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)"

	while true; do
	# wait for redis to start
	typeset -A info
	while read line; do
	[[ "$line" == "#"* ]] && continue
	[[ "$line" != ":" ]] && continue
	IFS=':' read -r key value <<< "$line"
	info[$key]="$value"
	done < <(redis_client info)

	if (( info[loading] == 0 )); then
	break
	elif (( info[loading] == 1 )); then
	sleep "${info[loading_eta_seconds]}"
	elif pidof "$REDIS_SERVER" >/dev/null; then
	# unknown error, but the process still exists.
	# This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail
	# See https://github.com/antirez/redis/issues/2368
	# It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out
	sleep 1
	else
	check_output="$(check_dump_file)"
	ocf_log err "start: Unknown error waiting for redis to start. redis-check-dump output=${check_output//$'\n'/; }"
	return $OCF_ERR_GENERIC
	fi
	done

	while ! [ -s "$REDIS_PIDFILE" ]; do
	ocf_log debug "start: Waiting for pid file '$REDIS_PIDFILE' to appear"
	sleep 1
	done

	ocf_is_ms && redis_demote # pacemaker expects resources to start in slave mode

	redis_monitor
	status=$?
	if (( status == OCF_SUCCESS )) \|\| (( status == OCF_RUNNING_MASTER )); then
	return $OCF_SUCCESS
	fi

	check_output="$(check_dump_file)"
	ocf_log err "start: Unknown error starting redis. redis-server output=${output//$'\n'/; } redis-check-dump output=${check_output//$'\n'/; }"
	return $status
	}

	redis_stop() {
	redis_monitor
	status=$?

	if (( status == OCF_NOT_RUNNING )); then
	ocf_log info "stop: redis is already stopped"
	crm_master_reboot -D
	return $OCF_SUCCESS
	fi

	pid="$(<"$REDIS_PIDFILE")"
	kill -TERM "$pid"

	while true; do
	simple_status
	status=$?
	if (( status == OCF_NOT_RUNNING )); then
	crm_master_reboot -D
	return $OCF_SUCCESS
	fi
	sleep 1
	done
	}

	redis_promote() {
	redis_monitor
	status=$?

	if (( status == OCF_RUNNING_MASTER )); then
	ocf_log info "promote: Already running as master"
	set_master "$NODENAME"
	return $OCF_SUCCESS
	elif (( status != OCF_SUCCESS )); then
	ocf_log err "promote: Node is not running as a slave"
	return $OCF_ERR_GENERIC
	fi

	redis_client slaveof no one

	redis_monitor
	status=$?
	if (( status == OCF_RUNNING_MASTER )); then
	set_master "$NODENAME"
	return $OCF_SUCCESS
	fi

	ocf_log err "promote: Unknown error while promoting to master (status=$status)"
	return $OCF_ERR_GENERIC
	}

	redis_demote() {
	local master_host
	local master_port
	local tunnel_port

	# client kill is only supported in Redis 2.8.12 or greater
	version=$(redis_client -v \| awk '{print $NF}')
	ocf_version_cmp "$version" "2.8.11"
	client_kill=$?

	CHECK_SLAVE_STATE=1
	redis_monitor
	status=$?

	if (( status == OCF_SUCCESS )); then
	ocf_log info "demote: Already running as slave"
	return $OCF_SUCCESS
	elif (( status == OCF_NOT_RUNNING )); then
	ocf_log err "demote: Failed to demote, redis not running."
	return $OCF_NOT_RUNNING
	fi

	master_host="$(last_known_master)"
	master_port="${REDIS_REPLICATION_PORT}"

	# The elected master has to remain a slave during startup.
	# During this period a placeholder master host is assigned.
	if [ -z "$master_host" ] \|\| [[ "$master_host" == "$NODENAME" ]]; then
	CHECK_SLAVE_STATE=0
	master_host="no-such-master"
	elif ! master_is_active; then
	# no master has been promoted yet. we'll be notified when the
	# master starts.
	CHECK_SLAVE_STATE=0
	master_host="no-such-master"
	fi

	if [ -n "${OCF_RESKEY_tunnel_port_map}" ]; then
	# master_host can be the special marker "no-such-master"
	# while a master is being selected. In this case, no
	# tunnel port is returned, but this is not fatal.
	tunnel_port=$(redis_node_to_port "$master_host")
	if [ -n "$tunnel_port" ]; then
	ocf_log info "demote: Setting master to '$master_host' via local tunnel '${OCF_RESKEY_tunnel_host}' on port '$tunnel_port'"
	master_host="${OCF_RESKEY_tunnel_host}"
	master_port="$tunnel_port"
	fi
	else
	ocf_log info "demote: Setting master to '$master_host'"
	fi

	redis_client slaveof "$master_host" "$master_port"

	# Wait forever for the slave to connect to the master and finish the
	# sync. Timeout is controlled by Pacemaker "op start timeout=XX".
	#
	# hint: redis master_link_status will only come "up" when
	# the SYNC with the master has completed.
	# This can take an arbitraty time (data) and should
	# only be parametrized by the start operation timeout
	# by the administrator, not by this resource agent code
	while true; do
	# Wait infinite if replication is syncing
	# Then start/demote operation timeout determines timeout
	if [ "$client_kill" -eq 2 ]; then
	redis_client CLIENT PAUSE 2000
	fi
	redis_monitor
	status=$?
	if (( status == OCF_SUCCESS )); then
	if [ "$client_kill" -eq 2 ]; then
	redis_client CLIENT KILL type normal
	fi
	return $OCF_SUCCESS
	fi

	sleep 1
	done

	ocf_log err "demote: Unexpected error setting slave mode (status=$status)"
	return $OCF_ERR_GENERIC
	}

	redis_notify() {
	mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
	case "$mode" in
	post-demote\|post-promote) # change the master
	redis_monitor
	status=$?
	if (( status == OCF_SUCCESS )); then # were a slave
	# calling demote updates the slave's connection
	# to the newly appointed Master instance.
	redis_demote
	fi
	;;
	esac
	return $OCF_SUCCESS
	}

	redis_validate() {
	if [[ -x "$REDIS_SERVER" ]]; then
	ocf_log err "validate: $REDIS_SERVER does not exist or is not executable"
	return $OCF_ERR_INSTALLED
	fi
	if [[ -x "$REDIS_CLIENT" ]]; then
	ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable"
	return $OCF_ERR_INSTALLED
	fi
	if [[ -f "$REDIS_CONFIG" ]]; then
	ocf_log err "validate: $REDIS_CONFIG does not exist"
	return $OCF_ERR_CONFIGURED
	fi
	if ! getent passwd "$REDIS_USER" &>/dev/null; then
	ocf_log err "validate: $REDIS_USER is not a valid user"
	return $OCF_ERR_CONFIGURED
	fi
	}

	-NODENAME=$(ocf_attribute_target)
	+if [ "$__OCF_ACTION" != "meta-data" ]; then
	+ NODENAME=$(ocf_attribute_target)
	+fi
	if [ -r "$REDIS_CONFIG" ]; then
	clientpasswd="$(sed -n -e 's/^\srequirepass\s$.$\s$/\1/p' < $REDIS_CONFIG \| tail -n 1)"
	fi

	ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}"

	case "${1:-$__OCF_ACTION}" in
	status\|monitor)
	redis_monitor
	;;
	start)
	redis_start
	;;
	stop)
	redis_stop
	;;
	restart)
	redis_stop && redis_start
	;;
	promote)
	redis_promote
	;;
	demote)
	redis_demote
	;;
	notify)
	redis_notify
	;;
	meta-data)
	redis_meta_data
	;;
	validate-all)
	redis_validate
	;;
	*)
	echo "Usage: $0 {monitor\|start\|stop\|restart\|promote\|demote\|notify\|validate-all\|meta-data}"
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac
	status=$?
	ocf_log debug "exit_status=$status"
	exit $status

File Metadata

Mime Type: text/x-diff
Expires: Wed, Feb 26, 11:23 AM (20 h, 21 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1465295
Default Alt Text: (70 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions