Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/heartbeat/galera b/heartbeat/galera
index 270bdaf1b..4f341ceef 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -1,975 +1,977 @@
#!/bin/sh
#
# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
##
# README.
#
# This agent only supports being configured as a multistate Master
# resource.
#
# Slave vs Master role:
#
# During the 'Slave' role, galera instances are in read-only mode and
# will not attempt to connect to the cluster. This role exists only as
# a means to determine which galera instance is the most up-to-date. The
# most up-to-date node will be used to bootstrap a galera cluster that
# has no current members.
#
# The galera instances will only begin to be promoted to the Master role
# once all the nodes in the 'wsrep_cluster_address' connection address
# have entered read-only mode. At that point the node containing the
# database that is most current will be promoted to Master. Once the first
# Master instance bootstraps the galera cluster, the other nodes will be
# promoted to Master as well.
#
# Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3
#
# pcs resource create db galera enable_creation=true \
# wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master
#
# By setting the 'enable_creation' option, the database will be automatically
# generated at startup. The meta attribute 'master-max=3' means that all 3
# nodes listed in the wsrep_cluster_address list will be allowed to connect
# to the galera cluster and perform replication.
#
# NOTE: If you have more nodes in the pacemaker cluster then you wish
# to have in the galera cluster, make sure to use location contraints to prevent
# pacemaker from attempting to place a galera instance on a node that is
# not in the 'wsrep_cluster_address" list.
#
##
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
-. ${OCF_FUNCTIONS_DIR}/mysql-common.sh
-NODENAME=$(ocf_attribute_target)
+if [ "$__OCF_ACTION" != "meta-data" ]; then
+ . ${OCF_FUNCTIONS_DIR}/mysql-common.sh
+ NODENAME=$(ocf_attribute_target)
+fi
# It is common for some galera instances to store
# check user that can be used to query status
# in this file
if [ -f "/etc/sysconfig/clustercheck" ]; then
. /etc/sysconfig/clustercheck
elif [ -f "/etc/default/clustercheck" ]; then
. /etc/default/clustercheck
fi
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote)
$0 manages a galera Database as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'promote' operation makes this mysql server run as master
The 'demote' operation makes this mysql server run as slave
The 'validate-all' operation reports whether the parameters are valid
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="galera">
<version>1.0</version>
<longdesc lang="en">
Resource script for managing galara database.
</longdesc>
<shortdesc lang="en">Manages a galara instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL server binary
</longdesc>
<shortdesc lang="en">MySQL server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL client binary
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Configuration file
</longdesc>
<shortdesc lang="en">MySQL config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="datadir" unique="0" required="0">
<longdesc lang="en">
Directory containing databases
</longdesc>
<shortdesc lang="en">MySQL datadir</shortdesc>
<content type="string" default="${OCF_RESKEY_datadir_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running MySQL daemon
</longdesc>
<shortdesc lang="en">MySQL user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running MySQL daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">MySQL group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}"/>
</parameter>
<parameter name="log" unique="0" required="0">
<longdesc lang="en">
The logfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_default}"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pidfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}"/>
</parameter>
<parameter name="socket" unique="0" required="0">
<longdesc lang="en">
The socket to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL socket</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_default}"/>
</parameter>
<parameter name="enable_creation" unique="0" required="0">
<longdesc lang="en">
If the MySQL database does not exist, it will be created
</longdesc>
<shortdesc lang="en">Create the database if it does not exist</shortdesc>
<content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
</parameter>
<parameter name="wsrep_cluster_address" unique="0" required="1">
<longdesc lang="en">
The galera cluster address. This takes the form of:
gcomm://node,node,node
Only nodes present in this node list will be allowed to start a galera instance.
The galera node names listed in this address are expected to match valid
pacemaker node names. If both names need to differ, you must provide a
mapping in option cluster_host_map.
</longdesc>
<shortdesc lang="en">Galera cluster address</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="cluster_host_map" unique="0" required="0">
<longdesc lang="en">
A mapping of pacemaker node names to galera node names.
To be used when both pacemaker and galera names need to differ,
(e.g. when galera names map to IP from a specific network interface)
This takes the form of:
pcmk1:node.1.galera;pcmk2:node.2.galera;pcmk3:node.3.galera
where the galera resource started on node pcmk1 would be named
node.1.galera in the wsrep_cluster_address
</longdesc>
<shortdesc lang="en">Pacemaker to Galera name mapping</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="check_user" unique="0" required="0">
<longdesc lang="en">
Cluster check user.
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="root" />
</parameter>
<parameter name="check_passwd" unique="0" required="0">
<longdesc lang="en">
Cluster check user password
</longdesc>
<shortdesc lang="en">check password</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
<action name="monitor" role="Master" depth="0" timeout="30s" interval="10s" />
<action name="monitor" role="Slave" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="300s" />
<action name="demote" timeout="120s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
get_option_variable()
{
local key=$1
$MYSQL $MYSQL_OPTIONS_CHECK -e "SHOW VARIABLES like '$key';" | tail -1
}
get_status_variable()
{
local key=$1
$MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" | tail -1
}
set_bootstrap_node()
{
local node=$(ocf_attribute_target $1)
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true"
}
clear_bootstrap_node()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D
}
is_bootstrap()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" --quiet 2>/dev/null
}
set_no_grastate()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -v "true"
}
clear_no_grastate()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -D
}
is_no_grastate()
{
local node=$(ocf_attribute_target $1)
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" --quiet 2>/dev/null
}
clear_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D
}
set_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1
}
get_last_commit()
{
local node=$(ocf_attribute_target $1)
if [ -z "$node" ]; then
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
else
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
fi
}
clear_safe_to_bootstrap()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -D
}
set_safe_to_bootstrap()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -v $1
}
get_safe_to_bootstrap()
{
local node=$(ocf_attribute_target $1)
if [ -z "$node" ]; then
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null
else
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null
fi
}
wait_for_sync()
{
local state=$(get_status_variable "wsrep_local_state")
ocf_log info "Waiting for database to sync with the cluster. "
while [ "$state" != "4" ]; do
sleep 1
state=$(get_status_variable "wsrep_local_state")
done
ocf_log info "Database synced."
}
is_primary()
{
cluster_status=$(get_status_variable "wsrep_cluster_status")
if [ "$cluster_status" = "Primary" ]; then
return 0
fi
if [ -z "$cluster_status" ]; then
ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
else
ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}"
fi
return 1
}
is_readonly()
{
local res=$(get_option_variable "read_only")
if ! ocf_is_true "$res"; then
return 1
fi
cluster_status=$(get_status_variable "wsrep_cluster_status")
if ! [ "$cluster_status" = "Disconnected" ]; then
return 1
fi
return 0
}
master_exists()
{
if [ "$__OCF_ACTION" = "demote" ]; then
# We don't want to detect master instances during demote.
# 1. we could be detecting ourselves as being master, which is no longer the case.
# 2. we could be detecting other master instances that are in the process of shutting down.
# by not detecting other master instances in "demote" we are deferring this check
# to the next recurring monitor operation which will be much more accurate
return 1
fi
# determine if a master instance is already up and is healthy
crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
return $?
}
clear_master_score()
{
local node=$(ocf_attribute_target $1)
if [ -z "$node" ]; then
$CRM_MASTER -D
else
$CRM_MASTER -D -N $node
fi
}
set_master_score()
{
local node=$(ocf_attribute_target $1)
if [ -z "$node" ]; then
$CRM_MASTER -v 100
else
$CRM_MASTER -N $node -v 100
fi
}
promote_everyone()
{
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
local pcmk_node=$(galera_to_pcmk_name $node)
if [ -z "$pcmk_node" ]; then
ocf_log err "Could not determine pacemaker node from galera name <${node}>."
return
else
node=$pcmk_node
fi
set_master_score $node
done
}
greater_than_equal_long()
{
# there are values we need to compare in this script
# that are too large for shell -gt to process
echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true"
}
galera_to_pcmk_name()
{
local galera=$1
if [ -z "$OCF_RESKEY_cluster_host_map" ]; then
echo $galera
else
echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$2=="'"$galera"'" {print $1;exit}'
fi
}
pcmk_to_galera_name()
{
local pcmk=$1
if [ -z "$OCF_RESKEY_cluster_host_map" ]; then
echo $pcmk
else
echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$pcmk"'" {print $2;exit}'
fi
}
detect_first_master()
{
local best_commit=0
local last_commit=0
local missing_nodes=0
local nodes=""
local nodes_recovered=""
local all_nodes
local best_node_gcomm
local best_node
local safe_to_bootstrap
all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ')
best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/')
best_node=$(galera_to_pcmk_name $best_node_gcomm)
if [ -z "$best_node" ]; then
ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>."
return
fi
# avoid selecting a recovered node as bootstrap if possible
for node in $all_nodes; do
local pcmk_node=$(galera_to_pcmk_name $node)
if [ -z "$pcmk_node" ]; then
ocf_log err "Could not determine pacemaker node from galera name <${node}>."
return
else
node=$pcmk_node
fi
if is_no_grastate $node; then
nodes_recovered="$nodes_recovered $node"
else
nodes="$nodes $node"
fi
done
for node in $nodes_recovered $nodes; do
safe_to_bootstrap=$(get_safe_to_bootstrap $node)
if [ "$safe_to_bootstrap" = "1" ]; then
# Galera marked the node as safe to boostrap during shutdown. Let's just
# pick it as our bootstrap node.
ocf_log info "Node <${node}> is marked as safe to bootstrap."
best_node=$node
# We don't need to wait for the other nodes to report state in this case
missing_nodes=0
break
fi
last_commit=$(get_last_commit $node)
if [ -z "$last_commit" ]; then
ocf_log info "Waiting on node <${node}> to report database status before Master instances can start."
missing_nodes=1
continue
fi
# this means -1, or that no commit has occured yet.
if [ "$last_commit" = "18446744073709551615" ]; then
last_commit="0"
fi
greater_than_equal_long "$last_commit" "$best_commit"
if [ $? -eq 0 ]; then
best_node=$(ocf_attribute_target $node)
best_commit=$last_commit
fi
done
if [ $missing_nodes -eq 1 ]; then
return
fi
ocf_log info "Promoting $best_node to be our bootstrap node"
set_master_score $best_node
set_bootstrap_node $best_node
}
detect_safe_to_bootstrap()
{
local safe_to_bootstrap=""
if [ -f ${OCF_RESKEY_datadir}/grastate.dat ]; then
ocf_log info "attempting to read safe_to_bootstrap flag from ${OCF_RESKEY_datadir}/grastate.dat"
safe_to_bootstrap=$(sed -n 's/^safe_to_bootstrap:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat)
fi
if [ "$safe_to_bootstrap" = "1" ] || [ "$safe_to_bootstrap" = "0" ]; then
set_safe_to_bootstrap $safe_to_bootstrap
else
clear_safe_to_bootstrap
fi
}
detect_last_commit()
{
local last_commit
local recover_args="--defaults-file=$OCF_RESKEY_config \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
--datadir=$OCF_RESKEY_datadir \
--user=$OCF_RESKEY_user"
local recovery_file_regex='s/.*WSREP\:.*position\s*recovery.*--log_error='\''\([^'\'']*\)'\''.*/\1/p'
local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p'
# codership/galera#354
# Some ungraceful shutdowns can leave an empty gvwstate.dat on
# disk. This will prevent galera to join the cluster if it is
# configured to attempt PC recovery. Removing that file makes the
# node fall back to the normal, unoptimized joining process.
if [ -f ${OCF_RESKEY_datadir}/gvwstate.dat ] && \
[ ! -s ${OCF_RESKEY_datadir}/gvwstate.dat ]; then
ocf_log warn "empty ${OCF_RESKEY_datadir}/gvwstate.dat detected, removing it to prevent PC recovery failure at next restart"
rm -f ${OCF_RESKEY_datadir}/gvwstate.dat
fi
ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
local tmp=$(mktemp)
chown $OCF_RESKEY_user:$OCF_RESKEY_group $tmp
# if we pass here because grastate.dat doesn't exist,
# try not to bootstrap from this node if possible
if [ ! -f ${OCF_RESKEY_datadir}/grastate.dat ]; then
set_no_grastate
fi
ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null
last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)"
if [ -z "$last_commit" ]; then
# Galera uses InnoDB's 2pc transactions internally. If
# server was stopped in the middle of a replication, the
# recovery may find a "prepared" XA transaction in the
# redo log, and mysql won't recover automatically
local recovery_file="$(cat $tmp | sed -n $recovery_file_regex)"
if [ -e $recovery_file ]; then
cat $recovery_file | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null
if [ $? -eq 0 ]; then
# we can only rollback the transaction, but that's OK
# since the DB will get resynchronized anyway
ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover"
${OCF_RESKEY_binary} $recover_args --wsrep-recover \
--tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null
last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)"
if [ ! -z "$last_commit" ]; then
ocf_log warn "State recovered. force SST at next restart for full resynchronization"
rm -f ${OCF_RESKEY_datadir}/grastate.dat
# try not to bootstrap from this node if possible
set_no_grastate
fi
fi
fi
fi
rm -f $tmp
fi
if [ ! -z "$last_commit" ]; then
ocf_log info "Last commit version found: $last_commit"
set_last_commit $last_commit
return $OCF_SUCCESS
else
ocf_exit_reason "Unable to detect last known write sequence number"
clear_last_commit
return $OCF_ERR_GENERIC
fi
}
# For galera, promote is really start
galera_promote()
{
local rc
local extra_opts
local bootstrap
local safe_to_bootstrap
master_exists
if [ $? -eq 0 ]; then
# join without bootstrapping
extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}"
else
bootstrap=$(is_bootstrap)
if ocf_is_true $bootstrap; then
# The best node for bootstrapping wasn't cleanly shutdown. Allow
# bootstrapping anyways
if [ "$(get_safe_to_bootstrap)" = "0" ]; then
sed -ie 's/^\(safe_to_bootstrap:\) 0/\1 1/' ${OCF_RESKEY_datadir}/grastate.dat
fi
ocf_log info "Node <${NODENAME}> is bootstrapping the cluster"
extra_opts="--wsrep-cluster-address=gcomm://"
else
ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected."
clear_last_commit
return $OCF_ERR_GENERIC
fi
fi
galera_monitor
if [ $? -eq $OCF_RUNNING_MASTER ]; then
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
ocf_log info "boostrap node already up, promoting the rest of the galera instances."
fi
clear_safe_to_bootstrap
clear_last_commit
return $OCF_SUCCESS
fi
# last commit/safe_to_bootstrap flag are no longer relevant once promoted
clear_last_commit
clear_safe_to_bootstrap
mysql_common_prepare_dirs
mysql_common_start "$extra_opts"
rc=$?
if [ $rc != $OCF_SUCCESS ]; then
return $rc
fi
galera_monitor
rc=$?
if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "Failed initial monitor action"
return $rc
fi
is_readonly
if [ $? -eq 0 ]; then
ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration."
return $OCF_ERR_GENERIC
fi
is_primary
if [ $? -ne 0 ]; then
ocf_exit_reason "Failure. Master instance started, but is not in Primary mode."
return $OCF_ERR_GENERIC
fi
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
# clear attribute no-grastate. if last shutdown was
# not clean, we cannot be extra-cautious by requesting a SST
# since this is the bootstrap node
clear_no_grastate
ocf_log info "Bootstrap complete, promoting the rest of the galera instances."
else
# if this is not the bootstrap node, make sure this instance
# syncs with the rest of the cluster before promotion returns.
wait_for_sync
# sync is done, clear info about last startup
clear_no_grastate
fi
ocf_log info "Galera started"
return $OCF_SUCCESS
}
galera_demote()
{
mysql_common_stop
rc=$?
if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_exit_reason "Failed to stop Master galera instance during demotion to Master"
return $rc
fi
# if this node was previously a bootstrap node, that is no longer the case.
clear_bootstrap_node
clear_last_commit
clear_no_grastate
clear_safe_to_bootstrap
# Clear master score here rather than letting pacemaker do so once
# demote finishes. This way a promote cannot take place right
# after this demote even if pacemaker is requested to do so. It
# will first have to run a start/monitor op, to reprobe the state
# of the other galera nodes and act accordingly.
clear_master_score
# record last commit for next promotion
detect_safe_to_bootstrap
detect_last_commit
rc=$?
return $rc
}
galera_start()
{
local rc
local galera_node
galera_node=$(pcmk_to_galera_name $NODENAME)
if [ -z "$galera_node" ]; then
ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>."
return $OCF_ERR_CONFIGURED
fi
echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node
if [ $? -ne 0 ]; then
ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}> to start this galera instance"
return $OCF_ERR_CONFIGURED
fi
galera_monitor
if [ $? -eq $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "master galera instance started outside of the cluster's control"
return $OCF_ERR_GENERIC
fi
mysql_common_prepare_dirs
detect_safe_to_bootstrap
detect_last_commit
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
master_exists
if [ $? -eq 0 ]; then
ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster."
set_master_score $NODENAME
else
clear_master_score
detect_first_master
fi
return $OCF_SUCCESS
}
galera_monitor()
{
local rc
local galera_node
local status_loglevel="err"
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
mysql_common_status $status_loglevel
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
last_commit=$(get_last_commit $node)
if [ -n "$last_commit" ]; then
# if last commit is set, this instance is considered started in slave mode
rc=$OCF_SUCCESS
master_exists
if [ $? -ne 0 ]; then
detect_first_master
else
# a master instance exists and is healthy, promote this
# local read only instance
# so it can join the master galera cluster.
set_master_score
fi
fi
return $rc
elif [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
# if we make it here, mysql is running. Check cluster status now.
galera_node=$(pcmk_to_galera_name $NODENAME)
if [ -z "$galera_node" ]; then
ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>."
return $OCF_ERR_CONFIGURED
fi
echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node
if [ $? -ne 0 ]; then
ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
return $OCF_ERR_GENERIC
fi
is_primary
if [ $? -eq 0 ]; then
if ocf_is_probe; then
# restore master score during probe
# if we detect this is a master instance
set_master_score
fi
rc=$OCF_RUNNING_MASTER
else
ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
rc=$OCF_ERR_GENERIC
fi
return $rc
}
galera_stop()
{
local rc
# make sure the process is stopped
mysql_common_stop
rc=$1
clear_safe_to_bootstrap
clear_last_commit
clear_master_score
clear_bootstrap_node
clear_no_grastate
return $rc
}
galera_validate()
{
if ! ocf_is_ms; then
ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
return $OCF_ERR_CONFIGURED
fi
if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then
ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value."
return $OCF_ERR_CONFIGURED
fi
mysql_common_validate
}
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
galera_validate
rc=$?
LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
esac
fi
if [ -z "${OCF_RESKEY_check_passwd}" ]; then
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
OCF_RESKEY_check_passwd=${MYSQL_PASSWORD}
fi
if [ -z "${OCF_RESKEY_check_user}" ]; then
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
OCF_RESKEY_check_user=${MYSQL_USERNAME}
fi
: ${OCF_RESKEY_check_user="root"}
MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}"
if [ -n "${OCF_RESKEY_check_passwd}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
fi
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
if [ -n "${MYSQL_HOST}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}"
fi
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
if [ -n "${MYSQL_PORT}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}"
fi
# What kind of method was invoked?
case "$1" in
start) galera_start;;
stop) galera_stop;;
status) mysql_common_status err;;
monitor) galera_monitor;;
promote) galera_promote;;
demote) galera_demote;;
validate-all) exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vi:sw=4:ts=4:et:
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 9ff49e075..54a16c941 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -1,549 +1,551 @@
#!/bin/sh
#
# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
RMQ_SERVER=/usr/sbin/rabbitmq-server
RMQ_CTL=/usr/sbin/rabbitmqctl
RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
RMQ_PID_DIR="/var/run/rabbitmq"
RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid"
RMQ_LOG_DIR="/var/log/rabbitmq"
-NODENAME=$(ocf_attribute_target)
+if [ "$__OCF_ACTION" != "meta-data" ]; then
+ NODENAME=$(ocf_attribute_target)
+fi
# this attr represents the current active local rmq node name.
# when rmq stops or the node is fenced, this attr disappears
RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}"
# this attr represents the last known active local rmq node name
# when rmp stops or the node is fenced, the attr stays forever so
# we can continue to map an offline pcmk node to it's rmq node name
# equivalent.
RMQ_CRM_ATTR_COOKIE_LAST_KNOWN="rmq-node-attr-last-known-${OCF_RESOURCE_INSTANCE}"
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="rabbitmq-cluster">
<version>1.0</version>
<longdesc lang="en">
Starts cloned rabbitmq cluster instance. NB: note that this RA
cannot be spawned across a mix of pacemaker and pacemaker-remote nodes.
Only on pacemaker *or* pacemaker-remote nodes exclusively.
</longdesc>
<shortdesc lang="en">rabbitmq clustered</shortdesc>
<parameters>
<parameter name="set_policy" unique="1">
<longdesc lang="en">
Policy string to pass to 'rabbitmqctl set_policy' right after bootstrapping the first rabbitmq instance.
</longdesc>
<shortdesc lang="en">rabbitmqctl set_policy args</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="100s" />
<action name="stop" timeout="90s" />
<action name="monitor" timeout="40s" interval="10s" depth="0" />
<action name="meta-data" timeout="10s" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
}
#######################################################################
rmq_usage() {
cat <<END
usage: $0 {start|stop|monitor|notify|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
rmq_wipe_data()
{
rm -rf $RMQ_DATA_DIR > /dev/null 2>&1
}
rmq_local_node()
{
local node_name=$($RMQ_CTL status 2>&1 | sed -n -e "s/^.*[S|s]tatus of node \(.*\)\s.*$/\1/p" | tr -d "'")
if [ -z "$node_name" ]; then
node_name=$(cat /etc/rabbitmq/rabbitmq-env.conf 2>/dev/null | grep "\s*RABBITMQ_NODENAME=" | awk -F= '{print $2}')
fi
echo "$node_name"
}
rmq_join_list()
{
local join_list=$(cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p")
# If join_list is empty we want to check if there are any remote nodes
# where rabbitmq is allowed to run (i.e. nodes without the crmd=online selector)
if [ -z "$join_list" ]; then
# Get all the nodes written in the ATTR_COOKIE no matter if
# they are online or not. This will be one line per node like
# rabbit@overcloud-rabbit-0
# rabbit@overcloud-rabbit-1
# ...
local remote_join_list=$(cibadmin -Q --xpath "//node_state//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p")
# The following expression prepares a filter like '-e overcloud-rabbit-0 -e overcloud-rabbit-1 -e ...'
local filter=$(crm_mon -r --as-xml | xmllint --format --xpath "//nodes//node[@online='true' and @standby='false']/@name" - | xargs -n1 echo | awk -F= '{print "-e "$2}')
# export the intersection which gives us only the nodes that
# a) wrote their namein the cib attrd
# b) run on nodes where pacemaker_remote is enabled
join_list="$(echo $remote_join_list | grep $filter)"
fi
echo $join_list
}
rmq_write_nodename()
{
local node_name=$(rmq_local_node)
if [ -z "$node_name" ]; then
ocf_log err "Failed to determine rabbitmq node name, exiting"
exit $OCF_ERR_GENERIC
fi
# store the pcmknode to rmq node mapping as a transient attribute. This allows
# us to retrieve the join list with a simple xpath.
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name"
# the pcmknode to rmq node mapping as a permanent attribute as well. this lets
# us continue to map offline nodes to their equivalent rmq node name
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --name "$RMQ_CRM_ATTR_COOKIE_LAST_KNOWN" -v "$node_name"
}
rmq_delete_nodename()
{
# remove node-name
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -D
}
prepare_dir () {
if [ ! -d ${1} ] ; then
mkdir -p ${1}
chown -R rabbitmq:rabbitmq ${1}
chmod 755 ${1}
fi
}
remove_pid () {
rm -f ${RMQ_PID_FILE} > /dev/null 2>&1
}
rmq_monitor() {
local rc
$RMQ_CTL cluster_status > /dev/null 2>&1
rc=$?
case "$rc" in
0)
ocf_log debug "RabbitMQ server is running normally"
rmq_write_nodename
return $OCF_SUCCESS
;;
2|68|69|70|75|78)
ocf_log info "RabbitMQ server is not running"
rmq_delete_nodename
return $OCF_NOT_RUNNING
;;
*)
ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc"
rmq_delete_nodename
return $OCF_ERR_GENERIC
;;
esac
}
rmq_init_and_wait()
{
local rc
prepare_dir $RMQ_PID_DIR
prepare_dir $RMQ_LOG_DIR
remove_pid
# the server startup script uses this environment variable
export RABBITMQ_PID_FILE="$RMQ_PID_FILE"
setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" &
ocf_log info "Waiting for server to start"
$RMQ_CTL wait $RMQ_PID_FILE
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
remove_pid
ocf_log info "rabbitmq-server start failed: $rc"
return $OCF_ERR_GENERIC
fi
rmq_monitor
return $?
}
rmq_set_policy()
{
$RMQ_CTL set_policy "$@" > /dev/null 2>&1
}
rmq_start_first()
{
local rc
ocf_log info "Bootstrapping rabbitmq cluster"
rmq_wipe_data
rmq_init_and_wait
rc=$?
if [ $rc -eq 0 ]; then
rc=$OCF_SUCCESS
ocf_log info "cluster bootstrapped"
if [ -n "$OCF_RESKEY_set_policy" ]; then
# do not quote set_policy, we are passing in arguments
rmq_set_policy $OCF_RESKEY_set_policy > /dev/null 2>&1
if [ $? -ne 0 ]; then
ocf_log err "Failed to set policy: $OCF_RESKEY_set_policy"
rc=$OCF_ERR_GENERIC
else
ocf_log info "Policy set: $OCF_RESKEY_set_policy"
fi
fi
else
ocf_log info "failed to bootstrap cluster. Check SELINUX policy"
rc=$OCF_ERR_GENERIC
fi
return $rc
}
rmq_is_clustered()
{
$RMQ_CTL eval 'rabbit_mnesia:is_clustered().' | grep -q true
}
rmq_join_existing()
{
local join_list="$1"
local rc=$OCF_ERR_GENERIC
ocf_log info "Joining existing cluster with [ $(echo $join_list | tr '\n' ' ') ] nodes."
rmq_init_and_wait
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
if rmq_is_clustered; then
ocf_log info "Successfully re-joined existing rabbitmq cluster automatically"
return $OCF_SUCCESS
fi
# unconditionally join the cluster
$RMQ_CTL stop_app > /dev/null 2>&1
for node in $(echo "$join_list"); do
ocf_log info "Attempting to join cluster with target node $node"
$RMQ_CTL join_cluster $node
if [ $? -eq 0 ]; then
ocf_log info "Joined cluster by connecting to node $node, starting app"
$RMQ_CTL start_app
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "'$RMQ_CTL start_app' failed"
fi
break;
fi
done
if [ "$rc" -ne 0 ]; then
ocf_log info "Join process incomplete, shutting down."
return $OCF_ERR_GENERIC
fi
ocf_log info "Successfully joined existing rabbitmq cluster"
return $OCF_SUCCESS
}
rmq_forget_cluster_node_remotely() {
local running_cluster_nodes="$1"
local node_to_forget="$2"
ocf_log info "Forgetting $node_to_forget via nodes [ $(echo $running_cluster_nodes | tr '\n' ' ') ]."
for running_cluster_node in $running_cluster_nodes; do
$RMQ_CTL -n $running_cluster_node forget_cluster_node $node_to_forget
if [ $? = 0 ]; then
ocf_log info "Succeeded forgetting $node_to_forget via $running_cluster_node."
return
else
ocf_log err "Failed to forget node $node_to_forget via $running_cluster_node."
fi
done
}
rmq_notify() {
node_list="${OCF_RESKEY_CRM_meta_notify_stop_uname}"
mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
# When notifications are on, this agent is going to "forget" nodes once they
# leave the cluster. This is thought to resolve some issues where rabbitmq
# blocks trying to sync with an offline node after a fencing action occurs.
if ! [ "${mode}" = "post-stop" ]; then
return $OCF_SUCCESS
fi
rmq_monitor
if [ $? -ne $OCF_SUCCESS ]; then
# only run forget when we are for sure active
return $OCF_SUCCESS
fi
# forget each stopped rmq instance in the provided pcmk node in the list.
for node in $(echo "$node_list"); do
local rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $(ocf_attribute_target $node) -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)"
if [ -z "$rmq_node" ]; then
ocf_log warn "Unable to map pcmk node $node to a known rmq node."
continue
fi
ocf_log notice "Forgetting stopped node $rmq_node"
$RMQ_CTL forget_cluster_node $rmq_node
if [ $? -ne 0 ]; then
ocf_log warn "Unable to forget offline node $rmq_node."
fi
done
return $OCF_SUCCESS
}
rmq_start() {
local join_list=""
local rc
rmq_monitor
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
join_list=$(rmq_join_list)
# No join list means no active instances are up. This instance
# is the first, so it needs to bootstrap the rest
if [ -z "$join_list" ]; then
rmq_start_first
rc=$?
return $rc
fi
# Try to join existing cluster
ocf_log info "wiping data directory before joining"
local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)"
rmq_stop
rmq_wipe_data
rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node"
rmq_join_existing "$join_list"
rc=$?
if [ $rc -ne 0 ]; then
ocf_log info "node failed to join even after reseting local data. Check SELINUX policy"
return $OCF_ERR_GENERIC
fi
# Restore users, user permissions, and policies (if any)
BaseDataDir=`dirname $RMQ_DATA_DIR`
$RMQ_CTL eval "
%% Run only if Mnesia is ready.
lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
begin
Restore = fun(Table, PostprocessFun, Filename) ->
case file:consult(Filename) of
{error, _} ->
ok;
{ok, [Result]} ->
lists:foreach(fun(X) -> mnesia:dirty_write(Table, PostprocessFun(X)) end, Result),
file:delete(Filename)
end
end,
%% Restore users
Upgrade = fun
({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5};
({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D}
end,
Downgrade = fun
({internal_user, A, B, C}) -> {internal_user, A, B, C};
({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C};
%% Incompatible scheme, so we will loose user's password ('B' value) during conversion.
%% Unfortunately, this case will require manual intervention - user have to run:
%% rabbitmqctl change_password <A> <somenewpassword>
({internal_user, A, B, C, _}) -> {internal_user, A, B, C}
end,
%% Check db scheme first
[WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]),
case WildPattern of
%% Version < 3.6.0
{internal_user,'_','_','_'} ->
Restore(rabbit_user, Downgrade, \"$BaseDataDir/users.erl\");
%% Version >= 3.6.0
{internal_user,'_','_','_','_'} ->
Restore(rabbit_user, Upgrade, \"$BaseDataDir/users.erl\")
end,
NoOp = fun(X) -> X end,
%% Restore user permissions
Restore(rabbit_user_permission, NoOp, \"$BaseDataDir/users_perms.erl\"),
%% Restore policies
Restore(rabbit_runtime_parameters, NoOp, \"$BaseDataDir/policies.erl\")
end.
"
return $OCF_SUCCESS
}
rmq_stop() {
# Backup users, user permissions, and policies
BaseDataDir=`dirname $RMQ_DATA_DIR`
$RMQ_CTL eval "
%% Run only if Mnesia is still available.
lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
begin
Backup = fun(Table, SelectPattern, Filter, Filename) ->
Result = case catch mnesia:dirty_select(Table, [{SelectPattern, [Filter], ['\\\$_']}]) of
{'EXIT', _} -> [];
Any -> Any
end,
Result /= [] andalso file:write_file(Filename, io_lib:fwrite(\"~p.~n\", [Result]))
end,
%% Backup users
%% Check db scheme first
[WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]),
UsersSelectPattern = case WildPattern of
%% Version < 3.6.0
{internal_user,'_','_','_'} -> {internal_user, '\\\$1', '_', '_'};
%% Version >= 3.6.0
{internal_user,'_','_','_','_'} -> {internal_user, '\\\$1', '_', '_', '_'}
end,
Backup(rabbit_user, UsersSelectPattern, {'/=', '\\\$1', <<\"guest\">>}, \"$BaseDataDir/users.erl\"),
%% Backup user permissions
Backup(rabbit_user_permission, {'\\\$1', {'\\\$2', '\\\$3','\\\$4'}, '\\\$5'}, {'/=', '\\\$3', <<\"guest\">>}, \"$BaseDataDir/users_perms.erl\"),
%% Backup policies
Backup(rabbit_runtime_parameters, {runtime_parameters, {'_', '\\\$1', '_'}, '_'}, {'==', '\\\$1', <<\"policy\">>}, \"$BaseDataDir/policies.erl\")
end.
"
rmq_monitor
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi
$RMQ_CTL stop
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "rabbitmq-server stop command failed: $RMQ_CTL stop, $rc"
return $rc
fi
#TODO add kill logic
stop_wait=1
while [ $stop_wait = 1 ]; do
rmq_monitor
rc=$?
if [ "$rc" -eq $OCF_NOT_RUNNING ]; then
stop_wait=0
break
elif [ "$rc" -ne $OCF_SUCCESS ]; then
ocf_log info "rabbitmq-server stop failed: $rc"
exit $OCF_ERR_GENERIC
fi
sleep 1
done
remove_pid
return $OCF_SUCCESS
}
rmq_validate() {
check_binary $RMQ_SERVER
check_binary $RMQ_CTL
# This resource only makes sense as a clone right now. at some point
# we may want to verify the following.
#TODO verify cloned
#TODO verify ordered=true
# Given that this resource does the cluster join explicitly,
# having a cluster_nodes list in the static config file will
# likely conflict with this agent.
#TODO verify no cluster list in rabbitmq conf
#cat /etc/rabbitmq/rabbitmq.config | grep "cluster_nodes"
return $OCF_SUCCESS
}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) rmq_start;;
stop) rmq_stop;;
monitor) rmq_monitor;;
validate-all) rmq_validate;;
notify) rmq_notify;;
usage|help) rmq_usage
exit $OCF_SUCCESS
;;
*) rmq_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/redis.in b/heartbeat/redis.in
index d5eb8f664..ddc62d8a7 100644
--- a/heartbeat/redis.in
+++ b/heartbeat/redis.in
@@ -1,709 +1,711 @@
#!@BASH_SHELL@
#
# Resource agent script for redis server.
#
# Copyright (c) 2013 Patrick Hemmer <patrick.hemmer@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_RESKEY_bin:=/usr/bin/redis-server}
: ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli}
: ${OCF_RESKEY_user:=redis}
: ${OCF_RESKEY_rundir:=/var/run/redis}
: ${OCF_RESKEY_pidfile_name:=redis-server.pid}
: ${OCF_RESKEY_socket_name:=redis.sock}
: ${OCF_RESKEY_port:=6379}
: ${OCF_RESKEY_tunnel_host:=127.0.0.1}
if [ -z "$OCF_RESKEY_config" ]; then
if [ -f "/etc/redis.conf" ]; then
OCF_RESKEY_config="/etc/redis.conf"
else
OCF_RESKEY_config="/etc/redis/redis.conf"
fi
fi
CHECK_SLAVE_STATE=0
REDIS_CHECK_DUMP="/usr/bin/redis-check-dump"
REDIS_SERVER="$OCF_RESKEY_bin"
REDIS_CLIENT="$OCF_RESKEY_client_bin"
REDIS_CONFIG="$OCF_RESKEY_config"
REDIS_USER="$OCF_RESKEY_user"
REDIS_RUNDIR="$OCF_RESKEY_rundir"
REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name"
REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name"
REDIS_REPLICATION_PORT="$OCF_RESKEY_port"
if ! [ -f $REDIS_CHECK_DUMP ]; then
REDIS_CHECK_DUMP="$(which redis-check-dump 2>/dev/null)"
fi
if [ -z "$REDIS_CHECK_DUMP" ]; then
REDIS_CHECK_DUMP="$(which redis-check-rdb 2>/dev/null)"
fi
if [ -r "$REDIS_CONFIG" ]; then
REDIS_DUMP_DIR="$(grep "^\s*dir\s" < "$REDIS_CONFIG" | awk '{ print $2 }' 2>/dev/null)"
REDIS_DUMP_FILE="$(grep "^\s*dbfilename\s" < "$REDIS_CONFIG" | awk '{ print $2 }' 2>/dev/null)"
fi
: ${REDIS_DUMP_DIR:=/var/lib/redis/}
: ${REDIS_DUMP_FILE:=dump.rdb}
redis_meta_data() {
cat <<EOI
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="redis">
<version>1.0</version>
<longdesc lang="en">
Resource agent script for redis server.
This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config.
When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000.
</longdesc>
<shortdesc lang="en">Redis server</shortdesc>
<parameters>
<parameter name="bin" unique="0" required="0">
<longdesc lang="en">
Path to \`redis-server\`
</longdesc>
<shortdesc lang="en">Path to \`redis-server\`</shortdesc>
<content type="string" default="${OCF_RESKEY_bin}" />
</parameter>
<parameter name="client_bin" unique="0" required="0">
<longdesc lang="en">
Path to \`redis-cli\`
</longdesc>
<shortdesc lang="en">Path to \`redis-cli\`</shortdesc>
<content type="string" default="${OCF_RESKEY_client_bin}" />
</parameter>
<parameter name="config" unique="1" required="0">
<longdesc lang="en">
Path to 'redis.conf'
</longdesc>
<shortdesc lang="en">Path to 'redis.conf'</shortdesc>
<content type="string" default="${OCF_RESKEY_config}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User to run redis as
</longdesc>
<shortdesc lang="en">Redis user</shortdesc>
<content type="string" default="${OCF_RESKEY_user}" />
</parameter>
<parameter name="rundir" unique="1" required="0">
<longdesc lang="en">
Directory to store socket and pid file in
</longdesc>
<shortdesc lang="en">Redis var/run dir</shortdesc>
<content type="string" default="${OCF_RESKEY_rundir}"/>
</parameter>
<parameter name="pidfile_name" unique="0" required="0">
<longdesc lang="en">
The filename to use for the pidfile. Will be created in the rundir.
Should only be a basename, not a full path.
</longdesc>
<shortdesc lang="en">Redis pidfile name</shortdesc>
<content type="string" default="${OCF_RESKEY_pidfile_name}"/>
</parameter>
<parameter name="socket_name" unique="0" required="0">
<longdesc lang="en">
The filename to use for the socket. Will be crated in the rundir.
Should only be a basename, not a full path.
</longdesc>
<shortdesc lang="en">Redis socket name</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_name}"/>
</parameter>
<parameter name="port" unique="0" required="0">
<longdesc lang="en">
Port for replication client to connect to on remote server
</longdesc>
<shortdesc lang="en">Replication port</shortdesc>
<content type="string" default="${OCF_RESKEY_port}"/>
</parameter>
<parameter name="tunnel_host" unique="0" required="0">
<longdesc lang="en">
When replication traffic is tunnelled, this is the host to target
to forward outgoing traffic to the redis master. The resource
agent configures the redis slave to target the master via
tunnel_host:tunnel_port.
Note that in order to enable replication traffic tunneling,
parameter {tunnel_port_map} must be populated.
</longdesc>
<shortdesc lang="en">Tunnel host for replication traffic</shortdesc>
<content type="string" default="${OCF_RESKEY_tunnel_host}"/>
</parameter>
<parameter name="tunnel_port_map" unique="0" required="0">
<longdesc lang="en">
A mapping of pacemaker node names to redis port number.
To be used when redis servers need to tunnel replication traffic.
On every node where the redis resource is running, the redis server
listens to a different port. Each redis server can access its peers
for replication traffic via a tunnel accessible at {tunnel_host}:port.
The mapping the form of:
pcmk1-name:port-for-redis1;pcmk2-name:port-for-redis2;pcmk3-name:port-for-redis3
where the redis resource started on node pcmk1-name would listen on
port port-for-redis1
</longdesc>
<shortdesc lang="en">Mapping of Redis server name to redis port</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="wait_last_known_master" unique="0" required="0">
<longdesc lang="en">
During redis cluster bootstrap, wait for the last known master to be
promoted before allowing any other instances in the cluster to be
promoted. This lessens the risk of data loss when persistent data
is in use.
</longdesc>
<shortdesc lang="en">Wait for last known master</shortdesc>
<content type="boolean" default="false"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="120s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="60s" interval="45s" />
<action name="monitor" role="Master" depth="0" timeout="60s" interval="20s" />
<action name="monitor" role="Slave" depth="0" timeout="60s" interval="60s" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
EOI
}
INSTANCE_ATTR_NAME=$(echo "${OCF_RESOURCE_INSTANCE}" | awk -F : '{print $1}')
CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication"
MASTER_HOST=""
MASTER_ACTIVE_CACHED=""
MASTER_ACTIVE=""
master_is_active()
{
if [ -z "$MASTER_ACTIVE_CACHED" ]; then
# determine if a master instance is already up and is healthy
crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
MASTER_ACTIVE=$?
MASTER_ACTIVE_CACHED="true"
fi
return $MASTER_ACTIVE
}
set_master()
{
MASTER_HOST="$1"
${CRM_ATTR_REPL_INFO} -v "$1" -q
}
last_known_master()
{
if [ -z "$MASTER_HOST" ]; then
MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)"
fi
echo "$MASTER_HOST"
}
crm_master_reboot() {
local node
node=$(ocf_attribute_target)
"${HA_SBIN_DIR}/crm_master" -N "$node" -l reboot "$@"
}
calculate_score()
{
perf_score="$1"
connected_clients="$2"
if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then
# only set perferred score by slave_priority if
# we are not waiting for the last known master. Otherwise
# we want the agent to have complete control over the scoring.
perf_score=""
connected_clients="0"
fi
if [[ -z "$perf_score" ]]; then
if [[ "$(last_known_master)" == "$NODENAME" ]]; then
perf_score=1000
else
perf_score=1
fi
fi
perf_score=$(( perf_score + connected_clients ))
echo "$perf_score"
}
set_score()
{
local score
local last_master
score="$1"
if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then
last_master="$(last_known_master)"
if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then
ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted"
return
fi
fi
ocf_log debug "monitor: Setting master score to '$score'"
crm_master_reboot -v "$score"
}
redis_client() {
ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $*"
if [ -n "$clientpasswd" ]; then
"$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" | sed 's/\r//'
else
"$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//'
fi
}
simple_status() {
local pid
if ! [ -f "$REDIS_PIDFILE" ]; then
return $OCF_NOT_RUNNING
fi
pid="$(<"$REDIS_PIDFILE")"
pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING
ocf_log debug "monitor: redis-server running under pid $pid"
return $OCF_SUCCESS
}
redis_monitor() {
local res
local master_name
local last_known_master_port
simple_status
res=$?
if (( res != OCF_SUCCESS )); then
return $res
fi
typeset -A info
while read line; do
[[ "$line" == "#"* ]] && continue
[[ "$line" != *":"* ]] && continue
IFS=':' read -r key value <<< "$line"
info[$key]="$value"
done < <(redis_client info)
if [[ -z "${info[role]}" ]]; then
ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`"
return $OCF_ERR_GENERIC
fi
if ocf_is_ms; then
# Here we see if a score has already been set.
# If score isn't set we the redis setting 'slave_priority'.
# If that isn't set, we default to 1000 for a master, and 1 for slave.
# We then add 1 for each connected client
score="$(crm_master_reboot -G --quiet 2>/dev/null)"
if [[ -z "$score" ]]; then
score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}")
set_score "$score"
fi
if [[ "${info[role]}" == "master" ]]; then
if ocf_is_probe; then
set_master "$NODENAME"
fi
return $OCF_RUNNING_MASTER
fi
if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then
if [[ "${info[master_link_status]}" != "up" ]]; then
ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})"
return $OCF_ERR_GENERIC
fi
if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then
if [ -n "${OCF_RESKEY_tunnel_port_map}" ]; then
master_name=$(port_to_redis_node ${info[master_port]})
last_known_master_port=$(redis_node_to_port $(last_known_master))
if [[ "${info[master_host]}" != "${OCF_RESKEY_tunnel_host}" ]] ||
[[ "${info[master_port]}" != "${last_known_master_port}" ]]; then
ocf_log err "monitor: Slave mode current tunnelled connection to redis server does not match running master. tunnelled='${info[master_host]}:${info[master_port]} (${master_name})', running='$(last_known_master)'"
return $OCF_ERR_GENERIC
fi
else
ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)"
return $OCF_ERR_GENERIC
fi
fi
fi
fi
return $OCF_SUCCESS
}
redis_node_to_port()
{
local node=$1
echo "$OCF_RESKEY_tunnel_port_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$node"'" {print $2;exit}'
}
port_to_redis_node()
{
local port=$1
echo "$OCF_RESKEY_tunnel_port_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$2=="'"$port"'" {print $1;exit}'
}
get_tunnel_port_from_master()
{
local master_name=$1
crm_attribute --node "$master_name" -l forever --name ${INSTANCE_ATTR_NAME}-tunnel-port --query -q 2>/dev/null
}
get_master_from_tunnel_port()
{
local master_name=$1
crm_attribute --node "$master_name" -l forever --name ${INSTANCE_ATTR_NAME}-tunnel-port --query -q 2>/dev/null
}
check_dump_file()
{
if ! have_binary "$REDIS_CHECK_DUMP"; then
return 0
fi
$REDIS_CHECK_DUMP ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE} 2>&1
}
redis_start() {
local size
redis_monitor
status=$?
if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
ocf_log info "start: redis is already running"
return $OCF_SUCCESS
fi
[[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR"
chown -R "$REDIS_USER" "$REDIS_RUNDIR"
if have_binary "restorecon"; then
restorecon -Rv "$REDIS_RUNDIR"
fi
# check for 0 byte database dump file. This is an unrecoverable start
# condition that we can avoid by deleting the 0 byte database file.
if [ -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" ]; then
size="$(stat --format "%s" ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE})"
if [ "$?" -eq "0" ] && [ "$size" -eq "0" ]; then
ocf_log notice "Detected 0 byte ${REDIS_DUMP_FILE}, deleting zero length file to avoid start failure."
rm -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}"
fi
fi
ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'"
output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)"
while true; do
# wait for redis to start
typeset -A info
while read line; do
[[ "$line" == "#"* ]] && continue
[[ "$line" != *":"* ]] && continue
IFS=':' read -r key value <<< "$line"
info[$key]="$value"
done < <(redis_client info)
if (( info[loading] == 0 )); then
break
elif (( info[loading] == 1 )); then
sleep "${info[loading_eta_seconds]}"
elif pidof "$REDIS_SERVER" >/dev/null; then
# unknown error, but the process still exists.
# This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail
# See https://github.com/antirez/redis/issues/2368
# It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out
sleep 1
else
check_output="$(check_dump_file)"
ocf_log err "start: Unknown error waiting for redis to start. redis-check-dump output=${check_output//$'\n'/; }"
return $OCF_ERR_GENERIC
fi
done
while ! [ -s "$REDIS_PIDFILE" ]; do
ocf_log debug "start: Waiting for pid file '$REDIS_PIDFILE' to appear"
sleep 1
done
ocf_is_ms && redis_demote # pacemaker expects resources to start in slave mode
redis_monitor
status=$?
if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
return $OCF_SUCCESS
fi
check_output="$(check_dump_file)"
ocf_log err "start: Unknown error starting redis. redis-server output=${output//$'\n'/; } redis-check-dump output=${check_output//$'\n'/; }"
return $status
}
redis_stop() {
redis_monitor
status=$?
if (( status == OCF_NOT_RUNNING )); then
ocf_log info "stop: redis is already stopped"
crm_master_reboot -D
return $OCF_SUCCESS
fi
pid="$(<"$REDIS_PIDFILE")"
kill -TERM "$pid"
while true; do
simple_status
status=$?
if (( status == OCF_NOT_RUNNING )); then
crm_master_reboot -D
return $OCF_SUCCESS
fi
sleep 1
done
}
redis_promote() {
redis_monitor
status=$?
if (( status == OCF_RUNNING_MASTER )); then
ocf_log info "promote: Already running as master"
set_master "$NODENAME"
return $OCF_SUCCESS
elif (( status != OCF_SUCCESS )); then
ocf_log err "promote: Node is not running as a slave"
return $OCF_ERR_GENERIC
fi
redis_client slaveof no one
redis_monitor
status=$?
if (( status == OCF_RUNNING_MASTER )); then
set_master "$NODENAME"
return $OCF_SUCCESS
fi
ocf_log err "promote: Unknown error while promoting to master (status=$status)"
return $OCF_ERR_GENERIC
}
redis_demote() {
local master_host
local master_port
local tunnel_port
# client kill is only supported in Redis 2.8.12 or greater
version=$(redis_client -v | awk '{print $NF}')
ocf_version_cmp "$version" "2.8.11"
client_kill=$?
CHECK_SLAVE_STATE=1
redis_monitor
status=$?
if (( status == OCF_SUCCESS )); then
ocf_log info "demote: Already running as slave"
return $OCF_SUCCESS
elif (( status == OCF_NOT_RUNNING )); then
ocf_log err "demote: Failed to demote, redis not running."
return $OCF_NOT_RUNNING
fi
master_host="$(last_known_master)"
master_port="${REDIS_REPLICATION_PORT}"
# The elected master has to remain a slave during startup.
# During this period a placeholder master host is assigned.
if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then
CHECK_SLAVE_STATE=0
master_host="no-such-master"
elif ! master_is_active; then
# no master has been promoted yet. we'll be notified when the
# master starts.
CHECK_SLAVE_STATE=0
master_host="no-such-master"
fi
if [ -n "${OCF_RESKEY_tunnel_port_map}" ]; then
# master_host can be the special marker "no-such-master"
# while a master is being selected. In this case, no
# tunnel port is returned, but this is not fatal.
tunnel_port=$(redis_node_to_port "$master_host")
if [ -n "$tunnel_port" ]; then
ocf_log info "demote: Setting master to '$master_host' via local tunnel '${OCF_RESKEY_tunnel_host}' on port '$tunnel_port'"
master_host="${OCF_RESKEY_tunnel_host}"
master_port="$tunnel_port"
fi
else
ocf_log info "demote: Setting master to '$master_host'"
fi
redis_client slaveof "$master_host" "$master_port"
# Wait forever for the slave to connect to the master and finish the
# sync. Timeout is controlled by Pacemaker "op start timeout=XX".
#
# hint: redis master_link_status will only come "up" when
# the SYNC with the master has completed.
# This can take an arbitraty time (data) and should
# only be parametrized by the start operation timeout
# by the administrator, not by this resource agent code
while true; do
# Wait infinite if replication is syncing
# Then start/demote operation timeout determines timeout
if [ "$client_kill" -eq 2 ]; then
redis_client CLIENT PAUSE 2000
fi
redis_monitor
status=$?
if (( status == OCF_SUCCESS )); then
if [ "$client_kill" -eq 2 ]; then
redis_client CLIENT KILL type normal
fi
return $OCF_SUCCESS
fi
sleep 1
done
ocf_log err "demote: Unexpected error setting slave mode (status=$status)"
return $OCF_ERR_GENERIC
}
redis_notify() {
mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
case "$mode" in
post-demote|post-promote) # change the master
redis_monitor
status=$?
if (( status == OCF_SUCCESS )); then # were a slave
# calling demote updates the slave's connection
# to the newly appointed Master instance.
redis_demote
fi
;;
esac
return $OCF_SUCCESS
}
redis_validate() {
if [[ -x "$REDIS_SERVER" ]]; then
ocf_log err "validate: $REDIS_SERVER does not exist or is not executable"
return $OCF_ERR_INSTALLED
fi
if [[ -x "$REDIS_CLIENT" ]]; then
ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable"
return $OCF_ERR_INSTALLED
fi
if [[ -f "$REDIS_CONFIG" ]]; then
ocf_log err "validate: $REDIS_CONFIG does not exist"
return $OCF_ERR_CONFIGURED
fi
if ! getent passwd "$REDIS_USER" &>/dev/null; then
ocf_log err "validate: $REDIS_USER is not a valid user"
return $OCF_ERR_CONFIGURED
fi
}
-NODENAME=$(ocf_attribute_target)
+if [ "$__OCF_ACTION" != "meta-data" ]; then
+ NODENAME=$(ocf_attribute_target)
+fi
if [ -r "$REDIS_CONFIG" ]; then
clientpasswd="$(sed -n -e 's/^\s*requirepass\s*\(.*\)\s*$/\1/p' < $REDIS_CONFIG | tail -n 1)"
fi
ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}"
case "${1:-$__OCF_ACTION}" in
status|monitor)
redis_monitor
;;
start)
redis_start
;;
stop)
redis_stop
;;
restart)
redis_stop && redis_start
;;
promote)
redis_promote
;;
demote)
redis_demote
;;
notify)
redis_notify
;;
meta-data)
redis_meta_data
;;
validate-all)
redis_validate
;;
*)
echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}"
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
status=$?
ocf_log debug "exit_status=$status"
exit $status

File Metadata

Mime Type
text/x-diff
Expires
Wed, Feb 26, 11:23 AM (20 h, 21 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1465295
Default Alt Text
(70 KB)

Event Timeline