Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/heartbeat/Stateful b/heartbeat/Stateful
index aa2284467..72dd5501e 100755
--- a/heartbeat/Stateful
+++ b/heartbeat/Stateful
@@ -1,194 +1,192 @@
#!/bin/sh
#
#
# Example of a stateful OCF Resource Agent.
#
# Copyright (c) 2006 Andrew Beekhof
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Parameter defaults
OCF_RESKEY_state_default="${HA_RSCTMP}/Stateful-${OCF_RESOURCE_INSTANCE}.state"
: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}}
-CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
-
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Stateful" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This is an example resource agent that implements two states
</longdesc>
<shortdesc lang="en">Example stateful resource agent</shortdesc>
<parameters>
<parameter name="state" unique="1">
<longdesc lang="en">
Location to store the resource state in
</longdesc>
<shortdesc lang="en">State file</shortdesc>
<content type="string" default="${OCF_RESKEY_state_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="promote" timeout="20s" />
<action name="demote" timeout="20s" />
<action name="monitor" depth="0" timeout="20s" interval="10s"/>
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
#######################################################################
stateful_usage() {
cat <<END
usage: $0 {start|stop|promote|demote|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
exit $1
}
stateful_update() {
echo $1 > ${OCF_RESKEY_state}
}
stateful_check_state() {
target=$1
if [ -f ${OCF_RESKEY_state} ]; then
state=`cat ${OCF_RESKEY_state}`
if [ "x$target" = "x$state" ]; then
return $OCF_SUCCESS
fi
else
if [ "x$target" = "x" ]; then
return $OCF_SUCCESS
fi
fi
return $OCF_ERR_GENERIC
}
stateful_start() {
stateful_check_state master
if [ $? = 0 ]; then
# CRM Error - Should never happen
return $OCF_RUNNING_MASTER
fi
stateful_update slave
- $CRM_MASTER -v 5
+ ocf_promotion_score -v 5
return $OCF_SUCCESS
}
stateful_demote() {
stateful_check_state
if [ $? = 0 ]; then
# CRM Error - Should never happen
return $OCF_NOT_RUNNING
fi
stateful_update slave
- $CRM_MASTER -v 5
+ ocf_promotion_score -v 5
return $OCF_SUCCESS
}
stateful_promote() {
stateful_check_state
if [ $? = 0 ]; then
return $OCF_NOT_RUNNING
fi
stateful_update master
- $CRM_MASTER -v 10
+ ocf_promotion_score -v 10
return $OCF_SUCCESS
}
stateful_stop() {
- $CRM_MASTER -D
+ ocf_promotion_score -D
stateful_check_state master
if [ $? = 0 ]; then
# CRM Error - Should never happen
return $OCF_RUNNING_MASTER
fi
if [ -f ${OCF_RESKEY_state} ]; then
rm ${OCF_RESKEY_state}
fi
return $OCF_SUCCESS
}
stateful_monitor() {
stateful_check_state "master"
if [ $? = 0 ]; then
return $OCF_RUNNING_MASTER
fi
stateful_check_state "slave"
if [ $? = 0 ]; then
return $OCF_SUCCESS
fi
if [ -f ${OCF_RESKEY_state} ]; then
echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents"
cat ${OCF_RESKEY_state}
return $OCF_ERR_GENERIC
fi
return $OCF_NOT_RUNNING
}
stateful_validate() {
exit $OCF_SUCCESS
}
case $__OCF_ACTION in
meta-data) meta_data;;
start) stateful_start;;
promote) stateful_promote;;
demote) stateful_demote;;
stop) stateful_stop;;
monitor) stateful_monitor;;
validate-all) stateful_validate;;
usage|help) stateful_usage $OCF_SUCCESS;;
*) stateful_usage $OCF_ERR_UNIMPLEMENTED;;
esac
exit $?
diff --git a/heartbeat/galera.in b/heartbeat/galera.in
index 546b1a853..cd2fee7c0 100755
--- a/heartbeat/galera.in
+++ b/heartbeat/galera.in
@@ -1,1094 +1,1094 @@
#!@BASH_SHELL@
#
# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
##
# README.
#
# This agent only supports being configured as a multistate Promoted
# resource.
#
# Unpromoted vs Promoted role:
#
# During the 'Unpromoted' role, galera instances are in read-only mode and
# will not attempt to connect to the cluster. This role exists only as
# a means to determine which galera instance is the most up-to-date. The
# most up-to-date node will be used to bootstrap a galera cluster that
# has no current members.
#
# The galera instances will only begin to be promoted to the Promoted role
# once all the nodes in the 'wsrep_cluster_address' connection address
# have entered read-only mode. At that point the node containing the
# database that is most current will be promoted to Promoted. Once the first
# Promoted instance bootstraps the galera cluster, the other nodes will be
# promoted to Promoted as well.
#
# Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3
#
# pcs resource create db galera enable_creation=true \
# wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta promoted-max=3 --promoted
#
# By setting the 'enable_creation' option, the database will be automatically
# generated at startup. The meta attribute 'promoted-max=3' means that all 3
# nodes listed in the wsrep_cluster_address list will be allowed to connect
# to the galera cluster and perform replication.
#
# NOTE: If you have more nodes in the pacemaker cluster then you wish
# to have in the galera cluster, make sure to use location contraints to prevent
# pacemaker from attempting to place a galera instance on a node that is
# not in the 'wsrep_cluster_address" list.
#
##
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
if [ "$__OCF_ACTION" != "meta-data" ]; then
. ${OCF_FUNCTIONS_DIR}/mysql-common.sh
NODENAME=$(ocf_attribute_target)
fi
# It is common for some galera instances to store
# check user that can be used to query status
# in this file
if [ -f "/etc/sysconfig/clustercheck" ]; then
. /etc/sysconfig/clustercheck
elif [ -f "/etc/default/clustercheck" ]; then
. /etc/default/clustercheck
fi
# Parameter defaults
OCF_RESKEY_wsrep_cluster_address_default=""
OCF_RESKEY_cluster_host_map_default=""
OCF_RESKEY_check_user_default=""
OCF_RESKEY_check_passwd_default=""
OCF_RESKEY_two_node_mode_default="false"
: ${OCF_RESKEY_wsrep_cluster_address=${OCF_RESKEY_wsrep_cluster_address_default}}
: ${OCF_RESKEY_cluster_host_map=${OCF_RESKEY_cluster_host_map_default}}
: ${OCF_RESKEY_check_user=${OCF_RESKEY_check_user_default}}
: ${OCF_RESKEY_check_passwd=${OCF_RESKEY_check_passwd_default}}
: ${OCF_RESKEY_two_node_mode=${OCF_RESKEY_two_node_mode_default}}
#######################################################################
# Defaults:
OCF_RESKEY_check_passwd_use_empty_default=0
: ${OCF_RESKEY_check_passwd_use_empty=${OCF_RESKEY_check_passwd_use_empty_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote)
$0 manages a galera Database as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'promote' operation makes this mysql server run as promoted
The 'demote' operation makes this mysql server run as unpromoted
The 'validate-all' operation reports whether the parameters are valid
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="galera" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Resource script for managing galera database.
</longdesc>
<shortdesc lang="en">Manages a galera instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL server binary
</longdesc>
<shortdesc lang="en">MySQL server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL client binary
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Configuration file
</longdesc>
<shortdesc lang="en">MySQL config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="datadir" unique="0" required="0">
<longdesc lang="en">
Directory containing databases
</longdesc>
<shortdesc lang="en">MySQL datadir</shortdesc>
<content type="string" default="${OCF_RESKEY_datadir_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running MySQL daemon
</longdesc>
<shortdesc lang="en">MySQL user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running MySQL daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">MySQL group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}"/>
</parameter>
<parameter name="log" unique="0" required="0">
<longdesc lang="en">
The logfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_default}"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pidfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}"/>
</parameter>
<parameter name="socket" unique="0" required="0">
<longdesc lang="en">
The socket to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL socket</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_default}"/>
</parameter>
<parameter name="enable_creation" unique="0" required="0">
<longdesc lang="en">
If the MySQL database does not exist, it will be created
</longdesc>
<shortdesc lang="en">Create the database if it does not exist</shortdesc>
<content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
</parameter>
<parameter name="wsrep_cluster_address" unique="0" required="1">
<longdesc lang="en">
The galera cluster address. This takes the form of:
gcomm://node,node,node
Only nodes present in this node list will be allowed to start a galera instance.
The galera node names listed in this address are expected to match valid
pacemaker node names. If both names need to differ, you must provide a
mapping in option cluster_host_map.
</longdesc>
<shortdesc lang="en">Galera cluster address</shortdesc>
<content type="string" default="${OCF_RESKEY_wsrep_cluster_address_default}"/>
</parameter>
<parameter name="cluster_host_map" unique="0" required="0">
<longdesc lang="en">
A mapping of pacemaker node names to galera node names.
To be used when both pacemaker and galera names need to differ,
(e.g. when galera names map to IP from a specific network interface)
This takes the form of:
pcmk1:node.1.galera;pcmk2:node.2.galera;pcmk3:node.3.galera
where the galera resource started on node pcmk1 would be named
node.1.galera in the wsrep_cluster_address
</longdesc>
<shortdesc lang="en">Pacemaker to Galera name mapping</shortdesc>
<content type="string" default="${OCF_RESKEY_cluster_host_map_default}"/>
</parameter>
<parameter name="check_user" unique="0" required="0">
<longdesc lang="en">
Cluster check user.
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="${OCF_RESKEY_check_user_default}" />
</parameter>
<parameter name="check_passwd" unique="0" required="0">
<longdesc lang="en">
Cluster check user password. Empty passwords are ignored unless
the parameter "check_passwd_use_empty" is set to 1.
</longdesc>
<shortdesc lang="en">check password</shortdesc>
<content type="string" default="${OCF_RESKEY_check_passwd_default}" />
</parameter>
<parameter name="check_passwd_use_empty" unique="0" required="0">
<longdesc lang="en">
Use an empty "check_passwd" password. If this parameter is set to 1,
"check_passwd" will be ignored and an empty password is used
when calling the "mysql" client binary.
</longdesc>
<shortdesc lang="en">check password use empty</shortdesc>
<content type="boolean" default="${OCF_RESKEY_check_passwd_use_empty_default}"/>
</parameter>
<parameter name="two_node_mode" unique="0" required="0">
<longdesc lang="en">
If running in a 2-node pacemaker cluster, rely on pacemaker quorum
to allow automatic recovery even when the other node is unreachable.
Use it with caution! (and fencing)
</longdesc>
<shortdesc lang="en">Special recovery when running on a 2-node cluster</shortdesc>
<content type="boolean" default="${OCF_RESKEY_two_node_mode_default}"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="120s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
<action name="monitor" role="Promoted" depth="0" timeout="30s" interval="10s" />
<action name="monitor" role="Unpromoted" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="300s" />
<action name="demote" timeout="120s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
get_option_variable()
{
local key=$1
$MYSQL $MYSQL_OPTIONS_CHECK -e "SHOW VARIABLES like '$key';" | tail -1
}
get_status_variable()
{
local key=$1
$MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" | tail -1
}
set_bootstrap_node()
{
local node=$(ocf_attribute_target $1)
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true"
}
clear_bootstrap_node()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D
}
is_bootstrap()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" --quiet 2>/dev/null
}
set_no_grastate()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -v "true"
}
clear_no_grastate()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -D
}
is_no_grastate()
{
local node=$(ocf_attribute_target $1)
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" --quiet 2>/dev/null
}
clear_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D
}
set_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1
}
get_last_commit()
{
local node=$(ocf_attribute_target $1)
if [ -z "$node" ]; then
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
else
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
fi
}
clear_safe_to_bootstrap()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -D
}
set_safe_to_bootstrap()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -v $1
}
get_safe_to_bootstrap()
{
local node=$(ocf_attribute_target $1)
if [ -z "$node" ]; then
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null
else
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null
fi
}
wait_for_sync()
{
local state=$(get_status_variable "wsrep_local_state")
ocf_log info "Waiting for database to sync with the cluster. "
while [ "$state" != "4" ]; do
sleep 1
state=$(get_status_variable "wsrep_local_state")
done
ocf_log info "Database synced."
}
is_primary()
{
cluster_status=$(get_status_variable "wsrep_cluster_status")
if [ "$cluster_status" = "Primary" ]; then
return 0
fi
if [ -z "$cluster_status" ]; then
ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
else
ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}"
fi
return 1
}
is_readonly()
{
local res=$(get_option_variable "read_only")
if ! ocf_is_true "$res"; then
return 1
fi
cluster_status=$(get_status_variable "wsrep_cluster_status")
if ! [ "$cluster_status" = "Disconnected" ]; then
return 1
fi
return 0
}
is_two_node_mode_active()
{
# crm_node or corosync-quorumtool cannot access various corosync
# flags when running inside a bundle, so only count the cluster
# members
ocf_is_true "$OCF_RESKEY_two_node_mode" && crm_mon_no_validation -1X | xmllint --xpath "count(//nodes/node[@type='member'])" - | grep -q -w 2
}
is_last_node_in_quorate_partition()
{
# when a network split occurs in a 2-node cluster, pacemaker
# fences the other node and try to retain quorum. So until
# the fencing is resolved (and the status of the peer node
# is clean), we shouldn't consider ourself quorate.
local partition_members=$(${HA_SBIN_DIR}/crm_node -p | wc -w)
local quorate=$(${HA_SBIN_DIR}/crm_node -q)
local clean_members=$(crm_mon_no_validation -1X | xmllint --xpath 'count(//nodes/node[@type="member" and @unclean="false"])' -)
[ "$partition_members" = 1 ] && [ "$quorate" = 1 ] && [ "$clean_members" = 2 ]
}
master_exists()
{
if [ "$__OCF_ACTION" = "demote" ]; then
# We don't want to detect master instances during demote.
# 1. we could be detecting ourselves as being master, which is no longer the case.
# 2. we could be detecting other master instances that are in the process of shutting down.
# by not detecting other master instances in "demote" we are deferring this check
# to the next recurring monitor operation which will be much more accurate
return 1
fi
# determine if a master instance is already up and is healthy
ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.1.0"
res=$?
if [ -z "$OCF_RESKEY_crm_feature_set" ] || [ $res -eq 2 ]; then
XMLOPT="--output-as=xml"
ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.2.0"
if [ $? -eq 1 ]; then
crm_mon_no_validation -1 $XMLOPT >/dev/null 2>&1
if [ $? -ne 0 ]; then
XMLOPT="--as-xml"
fi
fi
else
XMLOPT="--as-xml"
fi
crm_mon_no_validation -1 $XMLOPT | grep -q -i -E "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"(Promoted|Master)\".*active=\"true\".*orphaned=\"false\".*failed=\"false\""
return $?
}
clear_master_score()
{
local node=$(ocf_attribute_target $1)
if [ -z "$node" ]; then
- $CRM_MASTER -D
+ ocf_promotion_score -D
else
- $CRM_MASTER -D -N $node
+ ocf_promotion_score -D -N $node
fi
}
set_master_score()
{
local node=$(ocf_attribute_target $1)
if [ -z "$node" ]; then
- $CRM_MASTER -v 100
+ ocf_promotion_score -v 100
else
- $CRM_MASTER -N $node -v 100
+ ocf_promotion_score -N $node -v 100
fi
}
promote_everyone()
{
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
local pcmk_node=$(galera_to_pcmk_name $node)
if [ -z "$pcmk_node" ]; then
ocf_log err "Could not determine pacemaker node from galera name <${node}>."
return
else
node=$pcmk_node
fi
set_master_score $node
done
}
greater_than_equal_long()
{
# there are values we need to compare in this script
# that are too large for shell -gt to process
echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true"
}
galera_to_pcmk_name()
{
local galera=$1
if [ -z "$OCF_RESKEY_cluster_host_map" ]; then
echo $galera
else
echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$2=="'"$galera"'" {print $1;exit}'
fi
}
pcmk_to_galera_name()
{
local pcmk=$1
if [ -z "$OCF_RESKEY_cluster_host_map" ]; then
echo $pcmk
else
echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$pcmk"'" {print $2;exit}'
fi
}
detect_first_master()
{
local best_commit=0
local last_commit=0
local missing_nodes=0
local nodes=""
local nodes_recovered=""
local all_nodes
local best_node_gcomm
local best_node
local safe_to_bootstrap
all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ')
best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/')
best_node=$(galera_to_pcmk_name $best_node_gcomm)
if [ -z "$best_node" ]; then
ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>."
return
fi
# avoid selecting a recovered node as bootstrap if possible
for node in $all_nodes; do
local pcmk_node=$(galera_to_pcmk_name $node)
if [ -z "$pcmk_node" ]; then
ocf_log err "Could not determine pacemaker node from galera name <${node}>."
return
else
node=$pcmk_node
fi
if is_no_grastate $node; then
nodes_recovered="$nodes_recovered $node"
else
nodes="$nodes $node"
fi
done
for node in $nodes_recovered $nodes; do
# On clean shutdown, galera sets the last stopped node as 'safe to bootstrap',
# so use this hint when we can
safe_to_bootstrap=$(get_safe_to_bootstrap $node)
# Special case for 2-node clusters: during a network split, rely on
# pacemaker's quorum to check whether we can restart galera
if [ "$safe_to_bootstrap" != "1" ] && [ "$node" = "$NODENAME" ] && is_two_node_mode_active; then
is_last_node_in_quorate_partition
if [ $? -eq 0 ]; then
ocf_log warn "Survived a split in a 2-node cluster, considering ourselves safe to bootstrap"
safe_to_bootstrap=1
fi
fi
if [ "$safe_to_bootstrap" = "1" ]; then
# Galera marked the node as safe to boostrap during shutdown. Let's just
# pick it as our bootstrap node.
ocf_log info "Node <${node}> is marked as safe to bootstrap."
best_node=$node
# We don't need to wait for the other nodes to report state in this case
missing_nodes=0
break
fi
last_commit=$(get_last_commit $node)
if [ -z "$last_commit" ]; then
ocf_log info "Waiting on node <${node}> to report database status before Master instances can start."
missing_nodes=1
continue
fi
# this means -1, or that no commit has occured yet.
if [ "$last_commit" = "18446744073709551615" ]; then
last_commit="0"
fi
greater_than_equal_long "$last_commit" "$best_commit"
if [ $? -eq 0 ]; then
best_node=$(ocf_attribute_target $node)
best_commit=$last_commit
fi
done
if [ $missing_nodes -eq 1 ]; then
return
fi
ocf_log info "Promoting $best_node to be our bootstrap node"
set_bootstrap_node $best_node
set_master_score $best_node
}
detect_safe_to_bootstrap()
{
local safe_to_bootstrap=""
local uuid=""
local seqno=""
if [ -f ${OCF_RESKEY_datadir}/grastate.dat ]; then
ocf_log info "attempting to read safe_to_bootstrap flag from ${OCF_RESKEY_datadir}/grastate.dat"
safe_to_bootstrap=$(sed -n 's/^safe_to_bootstrap:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat)
uuid=$(sed -n 's/^uuid:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat)
seqno=$(sed -n 's/^seqno:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat)
fi
if [ -z "$uuid" ] || \
[ "$uuid" = "00000000-0000-0000-0000-000000000000" ]; then
clear_safe_to_bootstrap
return
fi
if [ "$safe_to_bootstrap" = "1" ]; then
if [ -z "$seqno" ] || [ "$seqno" = "-1" ]; then
clear_safe_to_bootstrap
return
fi
fi
if [ "$safe_to_bootstrap" = "1" ] || [ "$safe_to_bootstrap" = "0" ]; then
set_safe_to_bootstrap $safe_to_bootstrap
else
clear_safe_to_bootstrap
fi
}
detect_last_commit()
{
local last_commit
local recover_args="--defaults-file=$OCF_RESKEY_config \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
--datadir=$OCF_RESKEY_datadir"
local recovery_file_regex='s/.*WSREP\:.*position\s*recovery.*--log_error='\''\([^'\'']*\)'\''.*/\1/p'
local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p'
# codership/galera#354
# Some ungraceful shutdowns can leave an empty gvwstate.dat on
# disk. This will prevent galera to join the cluster if it is
# configured to attempt PC recovery. Removing that file makes the
# node fall back to the normal, unoptimized joining process.
if [ -f ${OCF_RESKEY_datadir}/gvwstate.dat ] && \
[ ! -s ${OCF_RESKEY_datadir}/gvwstate.dat ]; then
ocf_log warn "empty ${OCF_RESKEY_datadir}/gvwstate.dat detected, removing it to prevent PC recovery failure at next restart"
rm -f ${OCF_RESKEY_datadir}/gvwstate.dat
fi
ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
local tmp=$(mktemp)
chown $OCF_RESKEY_user:$OCF_RESKEY_group $tmp
# if we pass here because grastate.dat doesn't exist,
# try not to bootstrap from this node if possible
if [ ! -f ${OCF_RESKEY_datadir}/grastate.dat ]; then
set_no_grastate
fi
ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
$SU - $OCF_RESKEY_user -s /bin/sh -c \
"${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null"
last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)"
if [ -z "$last_commit" ]; then
# Galera uses InnoDB's 2pc transactions internally. If
# server was stopped in the middle of a replication, the
# recovery may find a "prepared" XA transaction in the
# redo log, and mysql won't recover automatically
local recovery_file="$(cat $tmp | sed -n $recovery_file_regex)"
if [ -e $recovery_file ]; then
cat $recovery_file | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null
if [ $? -eq 0 ]; then
# we can only rollback the transaction, but that's OK
# since the DB will get resynchronized anyway
ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover"
$SU - $OCF_RESKEY_user -s /bin/sh -c \
"${OCF_RESKEY_binary} $recover_args --wsrep-recover \
--tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null"
last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)"
if [ ! -z "$last_commit" ]; then
ocf_log warn "State recovered. force SST at next restart for full resynchronization"
rm -f ${OCF_RESKEY_datadir}/grastate.dat
# try not to bootstrap from this node if possible
set_no_grastate
fi
fi
fi
fi
rm -f $tmp
fi
if [ ! -z "$last_commit" ]; then
ocf_log info "Last commit version found: $last_commit"
set_last_commit $last_commit
return $OCF_SUCCESS
else
ocf_exit_reason "Unable to detect last known write sequence number"
clear_last_commit
return $OCF_ERR_GENERIC
fi
}
# For galera, promote is really start
galera_promote()
{
local rc
local extra_opts
local bootstrap
local safe_to_bootstrap
master_exists
if [ $? -eq 0 ]; then
# join without bootstrapping
extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}"
else
bootstrap=$(is_bootstrap)
if ocf_is_true $bootstrap; then
# The best node for bootstrapping wasn't cleanly shutdown. Allow
# bootstrapping anyways
if [ "$(get_safe_to_bootstrap)" = "0" ]; then
sed -ie 's/^\(safe_to_bootstrap:\) 0/\1 1/' ${OCF_RESKEY_datadir}/grastate.dat
ocf_log info "safe_to_bootstrap in ${OCF_RESKEY_datadir}/grastate.dat set to 1 on node ${NODENAME}"
fi
ocf_log info "Node <${NODENAME}> is bootstrapping the cluster"
extra_opts="--wsrep-cluster-address=gcomm://"
else
# We are being promoted without having the bootstrap
# attribute in the CIB, which means we are supposed to
# join a cluster; however if we end up here, there is no
# Master remaining right now, which means there is no
# cluster to join anymore. So force a demotion, and and
# let the RA decide later which node should be the next
# bootstrap node.
ocf_log warn "There is no running cluster to join, demoting ourself"
clear_master_score
return $OCF_SUCCESS
fi
fi
galera_monitor
if [ $? -eq $OCF_RUNNING_MASTER ]; then
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
ocf_log info "boostrap node already up, promoting the rest of the galera instances."
fi
clear_safe_to_bootstrap
clear_last_commit
return $OCF_SUCCESS
fi
# last commit/safe_to_bootstrap flag are no longer relevant once promoted
clear_last_commit
clear_safe_to_bootstrap
mysql_common_prepare_dirs
mysql_common_start "$extra_opts"
rc=$?
if [ $rc != $OCF_SUCCESS ]; then
return $rc
fi
galera_monitor
rc=$?
if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "Failed initial monitor action"
return $rc
fi
is_readonly
if [ $? -eq 0 ]; then
ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration."
return $OCF_ERR_GENERIC
fi
is_primary
if [ $? -ne 0 ]; then
ocf_exit_reason "Failure. Master instance started, but is not in Primary mode."
return $OCF_ERR_GENERIC
fi
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
# clear attribute no-grastate. if last shutdown was
# not clean, we cannot be extra-cautious by requesting a SST
# since this is the bootstrap node
clear_no_grastate
ocf_log info "Bootstrap complete, promoting the rest of the galera instances."
else
# if this is not the bootstrap node, make sure this instance
# syncs with the rest of the cluster before promotion returns.
wait_for_sync
# sync is done, clear info about last startup
clear_no_grastate
fi
ocf_log info "Galera started"
return $OCF_SUCCESS
}
galera_demote()
{
mysql_common_stop
rc=$?
if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_exit_reason "Failed to stop Master galera instance during demotion to Master"
return $rc
fi
# if this node was previously a bootstrap node, that is no longer the case.
clear_bootstrap_node
clear_last_commit
clear_no_grastate
clear_safe_to_bootstrap
# Clear master score here rather than letting pacemaker do so once
# demote finishes. This way a promote cannot take place right
# after this demote even if pacemaker is requested to do so. It
# will first have to run a start/monitor op, to reprobe the state
# of the other galera nodes and act accordingly.
clear_master_score
# record last commit for next promotion
detect_safe_to_bootstrap
detect_last_commit
rc=$?
return $rc
}
galera_start()
{
local rc
local galera_node
galera_node=$(pcmk_to_galera_name $NODENAME)
if [ -z "$galera_node" ]; then
ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>."
return $OCF_ERR_CONFIGURED
fi
echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node
if [ $? -ne 0 ]; then
ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}> to start this galera instance"
return $OCF_ERR_CONFIGURED
fi
galera_monitor
if [ $? -eq $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "master galera instance started outside of the cluster's control"
return $OCF_ERR_GENERIC
fi
mysql_common_prepare_dirs
detect_safe_to_bootstrap
detect_last_commit
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
master_exists
if [ $? -eq 0 ]; then
ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster."
set_master_score $NODENAME
else
clear_master_score
detect_first_master
fi
return $OCF_SUCCESS
}
galera_monitor()
{
local rc
local galera_node
local status_loglevel="err"
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
mysql_common_status $status_loglevel
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
last_commit=$(get_last_commit $node)
if [ -n "$last_commit" ]; then
# if last commit is set, this instance is considered started in slave mode
rc=$OCF_SUCCESS
master_exists
if [ $? -ne 0 ]; then
detect_first_master
else
# a master instance exists and is healthy, promote this
# local read only instance
# so it can join the master galera cluster.
set_master_score
fi
fi
return $rc
elif [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
# if we make it here, mysql is running. Check cluster status now.
galera_node=$(pcmk_to_galera_name $NODENAME)
if [ -z "$galera_node" ]; then
ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>."
return $OCF_ERR_CONFIGURED
fi
echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node
if [ $? -ne 0 ]; then
ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
return $OCF_ERR_GENERIC
fi
is_primary
if [ $? -eq 0 ]; then
if ocf_is_probe; then
# restore master score during probe
# if we detect this is a master instance
set_master_score
fi
rc=$OCF_RUNNING_MASTER
else
ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
rc=$OCF_ERR_GENERIC
fi
return $rc
}
galera_stop()
{
local rc
# make sure the process is stopped
mysql_common_stop
rc=$1
clear_safe_to_bootstrap
clear_last_commit
clear_master_score
clear_bootstrap_node
clear_no_grastate
return $rc
}
galera_validate()
{
if ! ocf_is_ms; then
ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
return $OCF_ERR_CONFIGURED
fi
if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then
ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value."
return $OCF_ERR_CONFIGURED
fi
mysql_common_validate
}
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
galera_validate
rc=$?
LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
esac
fi
if [ -z "${OCF_RESKEY_check_passwd}" ]; then
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
OCF_RESKEY_check_passwd=${MYSQL_PASSWORD}
fi
if [ -z "${OCF_RESKEY_check_user}" ]; then
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
OCF_RESKEY_check_user=${MYSQL_USERNAME}
fi
: ${OCF_RESKEY_check_user="root"}
MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}"
if ocf_is_true "${OCF_RESKEY_check_passwd_use_empty}"; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password="
elif [ -n "${OCF_RESKEY_check_passwd}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
fi
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
if [ -n "${MYSQL_HOST}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}"
fi
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
if [ -n "${MYSQL_PORT}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}"
fi
# What kind of method was invoked?
case "$1" in
start) galera_start;;
stop) galera_stop;;
status) mysql_common_status err;;
monitor) galera_monitor;;
promote) galera_promote;;
demote) galera_demote;;
validate-all) exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vi:sw=4:ts=4:et:
diff --git a/heartbeat/mariadb.in b/heartbeat/mariadb.in
index a8f06e4de..e0f1f3c9f 100644
--- a/heartbeat/mariadb.in
+++ b/heartbeat/mariadb.in
@@ -1,1040 +1,1040 @@
#!@BASH_SHELL@
#
#
# MariaDB
#
# Description: Manages a MariaDB Promotable database as Linux-HA resource
#
# Authors: Alan Robertson: DB2 Script
# Jakub Janczak: rewrite as MySQL
# Andrew Beekhof: cleanup and import
# Sebastian Reitenbach: add OpenBSD defaults, more cleanup
# Narayan Newton: add Gentoo/Debian defaults
# Marian Marinov, Florian Haas: add replication capability
# Yves Trudeau, Baron Schwartz: add VIP support and improve replication
# Nils Carlson: add GTID support and semi-sync support
#
# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# (c) 2002-2005 International Business Machines, Inc.
# 2005-2010 Linux-HA contributors
#
# See usage() function below for more details...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_client_binary
# OCF_RESKEY_config
# OCF_RESKEY_datadir
# OCF_RESKEY_user
# OCF_RESKEY_group
# OCF_RESKEY_node_list
# OCF_RESKEY_test_table
# OCF_RESKEY_test_user
# OCF_RESKEY_test_passwd
# OCF_RESKEY_enable_creation
# OCF_RESKEY_additional_parameters
# OCF_RESKEY_log
# OCF_RESKEY_pid
# OCF_RESKEY_socket
# OCF_RESKEY_replication_user
# OCF_RESKEY_replication_passwd
# OCF_RESKEY_replication_port
#######################################################################
# Initialization:
OCF_RESKEY_node_list_default=""
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/mysql-common.sh
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote|notify)
$0 manages a MariaDB Database as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'promote' operation makes this mysql server run as promoted
The 'demote' operation makes this mysql server run as unpromoted
The 'validate-all' operation reports whether the parameters are valid
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="mariadb" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Resource script for MariaDB.
Manages a complete promotable replication setup with GTID, for simpler
uses look at the mysql resource agent which supports older replication
forms which mysql and mariadb have in common.
The resource must be setup to use notifications. Set 'notify=true' in the metadata
attributes when defining a MariaDB promotable instance.
The default behavior is to use uname -n values in the change promoted to command.
Other IPs can be specified manually by adding a node attribute
\${INSTANCE_ATTR_NAME}_mysql_master_IP giving the IP to use for replication.
For example, if the mariadb primitive you are using is p_mariadb, the
attribute to set will be p_mariadb_mysql_master_IP.
</longdesc>
<shortdesc lang="en">Manages a MariaDB promotable instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the MariaDB server binary
</longdesc>
<shortdesc lang="en">MariaDB server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MariaDB client binary
</longdesc>
<shortdesc lang="en">MariaDB client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Configuration file
</longdesc>
<shortdesc lang="en">MariaDB config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="datadir" unique="0" required="0">
<longdesc lang="en">
Directory containing databases
</longdesc>
<shortdesc lang="en">MariaDB datadir</shortdesc>
<content type="string" default="${OCF_RESKEY_datadir_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running MariaDB daemon
</longdesc>
<shortdesc lang="en">MariaDB user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running MariaDB daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">MariaDB group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}"/>
</parameter>
<parameter name="log" unique="0" required="0">
<longdesc lang="en">
The logfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MariaDB log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_default}"/>
</parameter>
<parameter name="node_list" unique="0" required="1">
<longdesc lang="en">
All node names of nodes that will execute mariadb.
Please separate each node name with a space.
This is required for the promoted selection to function.
</longdesc>
<shortdesc lang="en">node list</shortdesc>
<content type="string" default="${OCF_RESKEY_node_list_default}" />
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pidfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MariaDB pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}"/>
</parameter>
<parameter name="socket" unique="0" required="0">
<longdesc lang="en">
The socket to be used for mysqld.
</longdesc>
<shortdesc lang="en">MariaDB socket</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_default}"/>
</parameter>
<parameter name="test_table" unique="0" required="0">
<longdesc lang="en">
Table to be tested in monitor statement (in database.table notation)
</longdesc>
<shortdesc lang="en">MariaDB test table</shortdesc>
<content type="string" default="${OCF_RESKEY_test_table_default}" />
</parameter>
<parameter name="test_user" unique="0" required="0">
<longdesc lang="en">
MariaDB test user, must have select privilege on test_table
</longdesc>
<shortdesc lang="en">MariaDB test user</shortdesc>
<content type="string" default="${OCF_RESKEY_test_user_default}" />
</parameter>
<parameter name="test_passwd" unique="0" required="0">
<longdesc lang="en">
MariaDB test user password
</longdesc>
<shortdesc lang="en">MariaDB test user password</shortdesc>
<content type="string" default="${OCF_RESKEY_test_passwd_default}" />
</parameter>
<parameter name="enable_creation" unique="0" required="0">
<longdesc lang="en">
If the MariaDB database does not exist, it will be created
</longdesc>
<shortdesc lang="en">Create the database if it does not exist</shortdesc>
<content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
</parameter>
<parameter name="replication_user" unique="0" required="0">
<longdesc lang="en">
MariaDB replication user. This user is used for starting and stopping
MariaDB replication, for setting and resetting the promoted host, and for
setting and unsetting read-only mode. Because of that, this user must
have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, PROCESS and RELOAD
privileges on all nodes within the cluster. Mandatory if you define a
promotable resource.
</longdesc>
<shortdesc lang="en">MariaDB replication user</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_user_default}" />
</parameter>
<parameter name="replication_passwd" unique="0" required="0">
<longdesc lang="en">
MariaDB replication password. Used for replication client and unpromoted.
Mandatory if you define a promotable resource.
</longdesc>
<shortdesc lang="en">MariaDB replication user password</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_passwd_default}" />
</parameter>
<parameter name="replication_port" unique="0" required="0">
<longdesc lang="en">
The port on which the Promoted MariaDB instance is listening.
</longdesc>
<shortdesc lang="en">MariaDB replication port</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_port_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
<action name="monitor" role="Promoted" depth="0" timeout="30s" interval="10s" />
<action name="monitor" role="Unpromoted" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
# Convenience functions
greater_than_equal_long()
{
# there are values we need to compare in this script
# that are too large for shell -gt to process
local true=$(echo "$1 > $2" | bc)
if [ "$true" -eq "1" ]; then
return 0
else
return 1
fi
}
greater_than_gtid()
{
local gtid1_transaction_id=$(echo $1 | cut -d - -f 3)
local gtid2_transaction_id=$(echo $2 | cut -d - -f 3)
greater_than_equal_long $gtid1_transaction_id $gtid2_transaction_id
return $?
}
set_gtid() {
# Sets the GTID in CIB using attrd_updater for this node.
local gtid=$($MYSQL $MYSQL_OPTIONS_REPL \
-s -N -e "show global variables like 'gtid_current_pos'" | cut -f 2)
# Ensure that we got somethine like a valid GTID
if ! echo $gtid | grep -q '-'; then
ocf_exit_reason "Unable to read GTID from MariaDB"
ocf_log err "Unable to read GTID from MariaDB"
return $OCF_ERR_GENERIC
fi
${HA_SBIN_DIR}/attrd_updater -p -n ${OCF_RESOURCE_INSTANCE}-gtid -U $gtid
}
read_gtid() {
local node=$1
local query_result
local name
local host
local value
# This produces output of the form 'name="var-name" host="node2" value="val"'.
# This should be set at this point, because we have store our own GTID previously.
if ! query_result=$(${HA_SBIN_DIR}/attrd_updater -p -N $node -n ${OCF_RESOURCE_INSTANCE}-gtid -Q); then
ocf_exit_reason "Unable to read GTID from attrd"
ocf_log err "Unable to read GTID from attrd"
echo ""
return
fi
# Evaluate the query result to place the variables in the local scope.
eval ${query_result}
echo ${value}
}
clear_all_gtid() {
for node in $OCF_RESKEY_node_list; do
${HA_SBIN_DIR}/attrd_updater -n ${OCF_RESOURCE_INSTANCE}-gtid -N $node -D
done
}
set_waiting_for_first_master() {
${HA_SBIN_DIR}/attrd_updater -p -n ${OCF_RESOURCE_INSTANCE}-waiting-for-first-master -U true
}
waiting_for_first_master() {
local query_result
local name
local host
local value
if ! query_result=$(${HA_SBIN_DIR}/attrd_updater -p -n ${OCF_RESOURCE_INSTANCE}-waiting-for-first-master -Q); then
ocf_exit_reason "Unable to read waiting-for-first-master from attrd"
ocf_log err "Unable to read waiting-for-first-master from attrd"
return 1
fi
# Evaluate the query result to place the variables in the local scope.
eval ${query_result}
if [ "$value" = "true" ]; then
return 0
else
return 1
fi
}
clear_waiting_for_first_master() {
attrd_updater -n ${OCF_RESOURCE_INSTANCE}-waiting-for-first-master -D
}
have_master_with_priority() {
# Go through each node and validate that at least one has
# a set priority. Because we unset the priority on reboot
# a lack of priority indicates that we need to select a
# new master.
for node in $OCF_RESKEY_node_list; do
- $CRM_MASTER -G -N $node >/dev/null 2>&1
+ ocf_promotion_score -G -N $node >/dev/null 2>&1
rc=$?
if [ $rc -eq 0 ]; then
return 0
fi
done
return 1
}
attempt_to_set_master() {
ocf_log info "Attempting to set master"
local expected_node_count
if waiting_for_first_master; then
# Wait for all nodes to come online
expected_node_count=$OCF_RESKEY_CRM_meta_clone_max
else
# We accept one node being down. This is not arbitrary,
# synchronous replication requires acknowledgement from
# at least one host, which means only two nodes must have
# the latest GTID. So a set of n - 1 ensures that we do
# not lose any writes.
expected_node_count=$(($OCF_RESKEY_CRM_meta_clone_max-1))
fi
# Set the gtid for this node, making it available to other nodes
set_gtid
local node_count=0
local highest_gtid=0
local master_candidate=""
for node in $OCF_RESKEY_node_list; do
local node_gtid=$(read_gtid $node)
if [ -z "$node_gtid" ]; then
continue
fi
# Got a valid gtid, increment node count
node_count=$(($node_count+1))
# Check if this is a good master candidate
if greater_than_gtid $node_gtid $highest_gtid; then
master_candidate=$node
highest_gtid=$node_gtid
fi
done
# If we managed to query a sufficient number of nodes
# then set a master
if [ $node_count -ge $expected_node_count ]; then
ocf_log info "Promoting $master_candidate to master, highest gtid $highest_gtid, queried $node_count nodes."
- $CRM_MASTER -v 100 -N $master_candidate
+ ocf_promotion_score -v 100 -N $master_candidate
else
ocf_log info "Not enough nodes ($node_count) contributed to select a master, need $expected_node_count nodes."
fi
}
set_read_only() {
# Sets or unsets read-only mode. Accepts one boolean as its
# optional argument. If invoked without any arguments, defaults to
# enabling read only mode. Should only be set in master/slave
# setups.
# Returns $OCF_SUCCESS if the operation succeeds, or
# $OCF_ERR_GENERIC if it fails.
local ro_val
if ocf_is_true $1; then
ro_val="on"
else
ro_val="off"
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "SET GLOBAL read_only=${ro_val}"
}
get_read_only() {
# Check if read-only is set
local read_only_state
read_only_state=$($MYSQL $MYSQL_OPTIONS_REPL \
-e "SHOW VARIABLES" | grep -w read_only | awk '{print $2}')
if [ "$read_only_state" = "ON" ]; then
return 0
else
return 1
fi
}
is_slave() {
# Determine whether the machine is currently running as a MariaDB
# slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW
# SLAVE STATUS creates an empty result set, 0 otherwise.
local rc
# Check whether this machine should be slave
if ! get_read_only; then
return 1
fi
if get_slave_info; then
# show slave status is not empty
# Is the slave sql thread running, then we are a slave!
if [ "$slave_sql" == 'Yes' ]; then
return 0
else
return 1
fi
else
# "SHOW SLAVE STATUS" returns an empty set if instance is not a
# replication slave
return 1
fi
}
parse_slave_info() {
# Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2
sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2
}
get_slave_info() {
if [ "$master_log_file" -a "$master_host" ]; then
# variables are already defined, get_slave_info has been run before
return $OCF_SUCCESS
else
local tmpfile=$(mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX)
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW SLAVE STATUS\G' > $tmpfile
if [ -s $tmpfile ]; then
master_host=$(parse_slave_info Master_Host $tmpfile)
master_user=$(parse_slave_info Master_User $tmpfile)
master_port=$(parse_slave_info Master_Port $tmpfile)
master_using_gtid=$(parse_slave_info Using_Gtid $tmpfile)
master_log_file=$(parse_slave_info Master_Log_File $tmpfile)
slave_sql=$(parse_slave_info Slave_SQL_Running $tmpfile)
slave_io=$(parse_slave_info Slave_IO_Running $tmpfile)
last_errno=$(parse_slave_info Last_Errno $tmpfile)
last_error=$(parse_slave_info Last_Error $tmpfile)
secs_behind=$(parse_slave_info Seconds_Behind_Master $tmpfile)
last_io_errno=$(parse_slave_info Last_IO_Errno $tmpfile)
last_io_error=$(parse_slave_info Last_IO_Error $tmpfile)
ocf_log debug "MariaDB instance running as a replication slave"
rm "$tmpfile"
else
# Instance produced an empty "SHOW SLAVE STATUS" output --
# instance is not a slave
rm "$tmpfile"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
fi
}
check_slave() {
# Checks slave status
local rc new_master
get_slave_info
rc=$?
if [ $rc -eq 0 ]; then
# Check normal errors
if [ $last_errno -ne 0 ]; then
ocf_exit_reason "MariaDB slave replication has failed ($last_errno): $last_error"
exit $OCF_ERR_GENERIC
fi
# Check IO Errors, ignore 2003 which indicates a connection failure to the master
if [ $last_io_errno -ne 0 ] && [ $last_io_errno -ne 2003 ]; then
ocf_exit_reason "MariaDB slave io has failed ($last_io_errno): $last_io_error"
exit $OCF_ERR_GENERIC
fi
if [ $last_io_errno -eq 2003 ]; then
ocf_log warn "MariaDB master not reachable from slave"
fi
if [ "$slave_io" != 'Yes' ]; then
# Not necessarily a bad thing. The master may have
# temporarily shut down, and the slave may just be
# reconnecting. A warning can't hurt, though.
ocf_log warn "MariaDB Slave IO threads currently not running."
# Sanity check, are we at least on the right master
new_master=$($CRM_ATTR_REPL_INFO --query -q)
if [ "$master_host" != "$new_master" ]; then
# Not pointing to the right master, not good, removing the VIPs
set_reader_attr 0
exit $OCF_SUCCESS
fi
fi
if [ "$slave_sql" != 'Yes' ]; then
# We don't have a replication SQL thread running. Not a
# good thing. Try to recoved by restarting the SQL thread
# and remove reader vip. Prevent MariaDB restart.
ocf_exit_reason "MariaDB Slave SQL threads currently not running."
# Remove reader vip
set_reader_attr 0
# try to restart slave
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "START SLAVE"
# Return success to prevent a restart
exit $OCF_SUCCESS
fi
ocf_log debug "MariaDB instance running as a replication slave"
else
# Instance produced an empty "SHOW SLAVE STATUS" output --
# instance is not a slave
# TODO: Needs to handle when get_slave_info will return too many connections error
ocf_exit_reason "check_slave invoked on an instance that is not a replication slave."
exit $OCF_ERR_GENERIC
fi
}
set_master() {
local new_master=$($CRM_ATTR_REPL_INFO --query -q)
# Informs the MariaDB server of the master to replicate
# from. Accepts one mandatory argument which must contain the host
# name of the new master host. The master must either be unchanged
# from the laste master the slave replicated from, or freshly
# reset with RESET MASTER.
ocf_log info "Changing MariaDB configuration to replicate from $new_master."
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "CHANGE MASTER TO MASTER_HOST='$new_master', \
MASTER_PORT=$OCF_RESKEY_replication_port, \
MASTER_USER='$OCF_RESKEY_replication_user', \
MASTER_PASSWORD='$OCF_RESKEY_replication_passwd', \
MASTER_USE_GTID=current_pos";
}
unset_master(){
# Instructs the MariaDB server to stop replicating from a master
# host.
# If we're currently not configured to be replicating from any
# host, then there's nothing to do. But we do log a warning as
# no-one but the CRM should be touching the MariaDB master/slave
# configuration.
if ! is_slave; then
ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave"
return $OCF_SUCCESS
fi
# Stop the slave I/O thread and wait for relay log
# processing to complete
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE IO_THREAD"
if [ $? -gt 0 ]; then
ocf_exit_reason "Error stopping slave IO thread"
exit $OCF_ERR_GENERIC
fi
local tmpfile=$(mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX)
while true; do
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW PROCESSLIST\G' > $tmpfile
if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
ocf_log info "MariaDB slave has finished processing relay log"
break
fi
if ! grep -q 'system user' $tmpfile; then
ocf_log info "Slave not runnig - not waiting to finish"
break
fi
ocf_log info "Waiting for MariaDB slave to finish processing relay log"
sleep 1
done
rm -f $tmpfile
# Now, stop all slave activity and unset the master host
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE"
if [ $? -gt 0 ]; then
ocf_exit_reason "Error stopping rest slave threads"
exit $OCF_ERR_GENERIC
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "RESET SLAVE /*!50516 ALL */;"
if [ $? -gt 0 ]; then
ocf_exit_reason "Failed to reset slave"
exit $OCF_ERR_GENERIC
fi
}
# Start replication as slave
start_slave() {
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "START SLAVE"
}
# Set the attribute controlling the readers VIP
set_reader_attr() {
local curr_attr_value
curr_attr_value=$(get_reader_attr)
if [ "$curr_attr_value" -ne "$1" ]; then
$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $1
fi
}
# get the attribute controlling the readers VIP
get_reader_attr() {
local attr_value
local rc
attr_value=$($CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q)
rc=$?
if [ "$rc" -eq "0" ]; then
echo $attr_value
else
echo -1
fi
}
# Determines what IP address is attached to the current host. The output of the
# crm_attribute command looks like this:
# scope=nodes name=IP value=10.2.2.161
# If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n
# The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the
# change master to command.
get_local_ip() {
local IP
IP=$($CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G 2>/dev/null)
if [ ! $? -eq 0 ]; then
uname -n
else
echo $IP
fi
}
#######################################################################
# Functions invoked by resource manager actions
mysql_monitor() {
local rc
local status_loglevel="err"
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
mysql_common_status $status_loglevel
rc=$?
# If status returned an error, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
# Check if this instance is configured as a slave, and if so
# check slave status
if is_slave; then
if ! check_slave; then
return $OCF_ERR_GENERIC
fi
fi
if [ -n "$OCF_RESKEY_test_table" ]; then
# Check for test table
ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \
-e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table"
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "Failed to select from $test_table";
return $OCF_ERR_GENERIC;
fi
fi
# Check if we are in read-only mode and there is no master
# with priority then we attempt to select a master
if get_read_only && ! have_master_with_priority; then
attempt_to_set_master
fi
if ! get_read_only; then
ocf_log debug "MariaDB monitor succeeded (master)";
return $OCF_RUNNING_MASTER
else
ocf_log debug "MariaDB monitor succeeded";
return $OCF_SUCCESS
fi
}
mysql_start() {
local rc
if ! ocf_is_ms; then
ocf_exit_reason "Resource is not configured as master/slave"
return $OCF_ERR_GENERIC
fi
# Initialize the ReaderVIP attribute, monitor will enable it
set_reader_attr 0
mysql_common_status info
if [ $? = $OCF_SUCCESS ]; then
ocf_log info "MariaDB already running"
return $OCF_SUCCESS
fi
mysql_common_prepare_dirs
mysql_common_start --skip-slave-start --log-slave-updates
rc=$?
if [ $rc != $OCF_SUCCESS ]; then
return $rc
fi
# Enable semi-sync
ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \
-e "SET GLOBAL rpl_semi_sync_slave_enabled='ON', \
rpl_semi_sync_master_enabled='ON', \
rpl_semi_sync_master_wait_no_slave='OFF', \
rpl_semi_sync_master_wait_point='AFTER_SYNC', \
gtid_strict_mode='ON', \
sync_binlog=1, \
sync_master_info=1, \
innodb_flush_log_at_trx_commit=1;"
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "Failed to enable semi-sync and set variables";
return $OCF_ERR_GENERIC;
fi
# We're configured as a stateful resource. We must start as
# slave by default. At this point we don't know if the CRM has
# already promoted a master. So, we simply start in read only
# mode and make sure our old score is invalidated.
set_read_only on
- $CRM_MASTER -D
+ ocf_promotion_score -D
# Now, let's see whether there is a master. We might be a new
# node that is just joining the cluster, and the CRM may have
# promoted a master before.
new_master_host=$(echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " ")
if [ "$new_master_host" -a "$new_master_host" != ${NODENAME} ]; then
set_master
start_slave
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start slave"
return $OCF_ERR_GENERIC
fi
else
ocf_log info "No MariaDB master present - clearing replication state, setting gtid in attrd, waiting for first master"
unset_master
set_waiting_for_first_master
fi
# Initial monitor action
if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" -a -n "$OCF_RESKEY_test_passwd" ]; then
OCF_CHECK_LEVEL=10
fi
mysql_monitor
rc=$?
if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "Failed initial monitor action"
return $rc
fi
ocf_log info "MariaDB started"
return $OCF_SUCCESS
}
mysql_stop() {
# clear preference for becoming master
- $CRM_MASTER -D
+ ocf_promotion_score -D
# Remove VIP capability
set_reader_attr 0
mysql_common_stop
}
mysql_promote() {
local master_info
if ( ! mysql_common_status err ); then
return $OCF_NOT_RUNNING
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE"
set_read_only off || return $OCF_ERR_GENERIC
# Force the master to wait for timeout period on slave disconnect
ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \
-e "SET GLOBAL rpl_semi_sync_master_wait_no_slave='ON';"
# Set Master Info in CIB, cluster level attribute
master_info="$(get_local_ip)"
${CRM_ATTR_REPL_INFO} -v "$master_info"
# A master can accept reads
set_reader_attr 1
# Clear the gtids in attrd now that there is a master
clear_all_gtid
return $OCF_SUCCESS
}
mysql_demote() {
if ! mysql_common_status err; then
return $OCF_NOT_RUNNING
fi
# Return to default no wait setting.
ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \
-e "SET GLOBAL rpl_semi_sync_master_wait_no_slave='OFF';"
# Return master preference to default, so the cluster manager gets
# a chance to select a new master
- $CRM_MASTER -D
+ ocf_promotion_score -D
}
mysql_notify() {
local type_op
type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
ocf_log debug "Received $type_op notification."
case "$type_op" in
'pre-promote')
# A master is now being promoted, remove the waiting-for-first-master flag
clear_waiting_for_first_master
;;
'post-promote')
# The master has completed its promotion. Now is a good
# time to check whether our replication slave is working
# correctly.
new_master_host=$(echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " ")
if [ "$new_master_host" = ${NODENAME} ]; then
ocf_log info "This will be the new master, ignoring post-promote notification."
else
ocf_log info "Resetting replication, uname of master: $new_master_host"
unset_master
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
set_master
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
start_slave
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start slave"
return $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
;;
'pre-demote')
demote_host=$(echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " ")
if [ $demote_host = ${NODENAME} ]; then
ocf_log info "pre-demote notification for $demote_host"
set_read_only on
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to set read-only";
return $OCF_ERR_GENERIC;
fi
# Must kill all existing user threads because they are still Read/write
# in order for the slaves to complete the read of binlogs
local tmpfile=$(mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX)
$MYSQL $MYSQL_OPTIONS_REPL -e "SHOW PROCESSLIST" > $tmpfile
for thread in $(awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile)
do
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "KILL ${thread}"
done
rm -f $tmpfile
else
ocf_log info "Ignoring post-demote notification execpt for my own demotion."
fi
return $OCF_SUCCESS
;;
'post-demote')
demote_host=$(echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " ")
if [ $demote_host = ${NODENAME} ]; then
ocf_log info "Ignoring post-demote notification for my own demotion."
return $OCF_SUCCESS
fi
ocf_log info "post-demote notification for $demote_host."
# The former master has just been gracefully demoted.
unset_master
;;
*)
return $OCF_SUCCESS
;;
esac
}
mysql_validate() {
check_binary bc
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
mysql_common_validate
rc=$?
LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
stop) ;;
monitor)
mysql_common_status "info"
if [ $? -eq $OCF_SUCCESS ]; then
# if validatation fails and pid is active, always treat this as an error
ocf_exit_reason "environment validation failed, active pid is in unknown state."
exit $OCF_ERR_GENERIC
fi
# validation failed and pid is not active, it's safe to say this instance is inactive.
exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
esac
fi
# What kind of method was invoked?
case "$1" in
start) mysql_start;;
stop) mysql_stop;;
status) mysql_common_status err;;
monitor) mysql_monitor;;
promote) mysql_promote;;
demote) mysql_demote;;
notify) mysql_notify;;
validate-all) mysql_validate;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vi:sw=4:ts=4:et:
diff --git a/heartbeat/mpathpersist.in b/heartbeat/mpathpersist.in
index e47fef4bd..0e2c2a4a0 100644
--- a/heartbeat/mpathpersist.in
+++ b/heartbeat/mpathpersist.in
@@ -1,682 +1,681 @@
#!@BASH_SHELL@
#
#
# OCF Resource Agent compliant PERSISTENT SCSI RESERVATION on multipath devices resource script.
# Testversion for a mpathpersist implementation for demo purposes by Andreas Thomas
#
# Copyright (c) 2017 Evgeny Nifontov, lwang@suse.com,
# Andreas Tomas<Andreas.Tomas@suse.com>,
# Zhu Lingshan<lszhu@suse.com>
# All Rights Reserved.
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#
# OCF instance parameters
# OCF_RESKEY_binary
# OCF_RESKEY_devs
# OCF_RESKEY_required_devs_no
# OCF_RESKEY_reservation_type
# OCF_RESKEY_master_score_base
# OCF_RESKEY_master_score_dev_factor
# OCF_RESKEY_master_score_delay
#
# TODO
#
# 1) PROBLEM: devices which were not accessible during 'start' action, will be never registered/reserved
# TODO: 'Master' and 'Slave' registers new devs in 'monitor' action
# TODO: 'Master' reserves new devs in 'monitor' action
#Defaults
OCF_RESKEY_mpathpersist_binary_default="mpathpersist"
OCF_RESKEY_required_devs_no_default=1
OCF_RESKEY_reservation_type_default=1
OCF_RESKEY_master_score_base_default=0
OCF_RESKEY_master_score_dev_factor_default=100
OCF_RESKEY_master_score_delay_default=30
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# set default values
: ${OCF_RESKEY_mpathpersist_binary=${OCF_RESKEY_mpathpersist_binary_default}} # binary name for the resource
: ${OCF_RESKEY_required_devs_no=${OCF_RESKEY_required_devs_no_default}} # number of required devices
: ${OCF_RESKEY_reservation_type=${OCF_RESKEY_reservation_type_default}} # reservation type
: ${OCF_RESKEY_master_score_base=${OCF_RESKEY_master_score_base_default}} # master score base
: ${OCF_RESKEY_master_score_dev_factor=${OCF_RESKEY_master_score_dev_factor_default}} # device factor for master score
: ${OCF_RESKEY_master_score_delay=${OCF_RESKEY_master_score_delay_default}} # delay for master score
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="mpathpersist" version="1.1">
<version>1.0</version>
<longdesc lang="en">
This resource agent manages SCSI persistent reservations on multipath devices.
"mpathpersist" from multipath-tools is used, please see its documentation.
Should be used as multistate (Promotable) resource
Unpromoted registers its node id ("crm_node -i") as reservation key ( --param-sark ) on each device in the params "devs" list.
Promoted reserves all devices from params "devs" list with reservation "--prout-type" value from "reservation_type" parameter.
Please see man sg_persist(8) and mpathpersist(8) for reservation_type details.
</longdesc>
<shortdesc lang="en">Manages SCSI persistent reservations on multipath devices</shortdesc>
<parameters>
<parameter name="binary" unique="0">
<longdesc lang="en">
The name of the binary that manages the resource.
</longdesc>
<shortdesc lang="en">the binary name of the resource</shortdesc>
<content type="string" default="${OCF_RESKEY_mpathpersist_binary_default}"/>
</parameter>
<parameter name="devs" unique="0" required="1">
<longdesc lang="en">
Device list. Multiple devices can be listed with blank space as separator.
Shell wildcards are allowed.
</longdesc>
<shortdesc lang="en">device list</shortdesc>
<content type="string"/>
</parameter>
<parameter name="required_devs_no" unique="0" required="0">
<longdesc lang="en">
Minimum number of "working" devices from device list
1) existing
2) "mpathpersist --in --read-keys &lt;device&gt;" works (Return code 0)
resource actions "start","monitor","promote" and "validate-all" return "OCF_ERR_INSTALLED"
if the actual number of "working" devices is less than "required_devs_no".
resource actions "stop" and "demote" tries to remove reservations and registration keys from
all working devices, but always return "OCF_SUCCESS"
</longdesc>
<shortdesc lang="en">minimum number of working devices</shortdesc>
<content type="string" default="${OCF_RESKEY_required_devs_no_default}"/>
</parameter>
<parameter name="reservation_type" unique="0" required="0">
<longdesc lang="en">
reservation type
</longdesc>
<shortdesc lang="en">reservation type</shortdesc>
<content type="string" default="${OCF_RESKEY_reservation_type_default}" />
</parameter>
<parameter name="master_score_base" unique="0" required="0">
<longdesc lang="en">
master_score_base value
"master_score_base" value is used in "master_score" calculation:
master_score = master_score_base + master_score_dev_factor * working_devs
if set to bigger value in mpathpersist resource configuration on some node, this node will be "preferred" for promoted role.
</longdesc>
<shortdesc lang="en">base master_score value</shortdesc>
<content type="string" default="${OCF_RESKEY_master_score_base_default}" />
</parameter>
<parameter name="master_score_dev_factor" unique="0" required="0">
<longdesc lang="en">
Working device factor in promoted calculation
each "working" device provides additional value to "master_score",
so the node that sees more devices will be preferred for the "Promoted"-role
Setting it to 0 will disable this behavior.
</longdesc>
<shortdesc lang="en">working device factor in master_score calculation</shortdesc>
<content type="string" default="${OCF_RESKEY_master_score_dev_factor_default}" />
</parameter>
<parameter name="master_score_delay" unique="0" required="0">
<longdesc lang="en">
promoted/unpromoted decreases/increases its master_score after delay of "master_score_delay" seconds
so if some device gets inaccessible, the unpromoted decreases its promoted first and the resource will no be watched
and after this device reappears again the promoted increases its master_score first
this can work only if the master_score_delay is bigger then monitor interval on both promoted and unpromoted
Setting it to 0 will disable this behavior.
</longdesc>
<shortdesc lang="en">master_score decrease/increase delay time</shortdesc>
<content type="string" default="${OCF_RESKEY_master_score_delay_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="30s" />
<action name="promote" timeout="30s" />
<action name="demote" timeout="30s" />
<action name="notify" timeout="30s" />
<action name="stop" timeout="30s" />
<action name="monitor" depth="0" timeout="20s" interval="29s" role="Unpromoted" />
<action name="monitor" depth="0" timeout="20s" interval="60s" role="Promoted" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="30s" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
mpathpersist_init() {
if ! ocf_is_root ; then
ocf_log err "You must be root to perform this operation."
exit $OCF_ERR_PERM
fi
MPATHPERSIST="${OCF_RESKEY_mpathpersist_binary}"
check_binary $MPATHPERSIST
ROLE=$OCF_RESKEY_CRM_meta_role
NOW=$(date +%s)
RESOURCE="${OCF_RESOURCE_INSTANCE}"
MASTER_SCORE_VAR_NAME="master-${OCF_RESOURCE_INSTANCE}"
PENDING_VAR_NAME="pending-$MASTER_SCORE_VAR_NAME"
#only works with corocync
CRM_NODE="${HA_SBIN_DIR}/crm_node"
NODE_ID_DEC=$($CRM_NODE -i)
NODE=$($CRM_NODE -l | $GREP -w ^$NODE_ID_DEC)
NODE=${NODE#$NODE_ID_DEC }
NODE=${NODE% *}
MASTER_SCORE_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$MASTER_SCORE_VAR_NAME --node=$NODE"
- CRM_MASTER="${HA_SBIN_DIR}/crm_master --lifetime=reboot"
PENDING_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$PENDING_VAR_NAME --node=$NODE"
NODE_ID_HEX=$(printf '0x%x' $NODE_ID_DEC)
if [ -z "$NODE_ID_HEX" ]; then
ocf_log err "Couldn't get node id with \"$CRM_NODE\""
exit $OCF_ERR_INSTALLED
fi
ocf_log debug "$RESOURCE: NODE:$NODE, ROLE:$ROLE, NODE_ID DEC:$NODE_ID_DEC HEX:$NODE_ID_HEX"
DEVS="${OCF_RESKEY_devs}"
REQUIRED_DEVS_NO="${OCF_RESKEY_required_devs_no}"
RESERVATION_TYPE="${OCF_RESKEY_reservation_type}"
MASTER_SCORE_BASE="${OCF_RESKEY_master_score_base}"
MASTER_SCORE_DEV_FACTOR="${OCF_RESKEY_master_score_dev_factor}"
MASTER_SCORE_DELAY="${OCF_RESKEY_master_score_delay}"
ocf_log debug "$RESOURCE: DEVS=$DEVS"
ocf_log debug "$RESOURCE: REQUIRED_DEVS_NO=$REQUIRED_DEVS_NO"
ocf_log debug "$RESOURCE: RESERVATION_TYPE=$RESERVATION_TYPE"
ocf_log debug "$RESOURCE: MASTER_SCORE_BASE=$MASTER_SCORE_BASE"
ocf_log debug "$RESOURCE: MASTER_SCORE_DEV_FACTOR=$MASTER_SCORE_DEV_FACTOR"
ocf_log debug "$RESOURCE: MASTER_SCORE_DELAY=$MASTER_SCORE_DELAY"
#expand path wildcards
DEVS=$(echo $DEVS)
if [ -z "$DEVS" ]; then
ocf_log err "\"devs\" not defined"
exit $OCF_ERR_INSTALLED
fi
mpathpersist_check_devs
mpathpersist_get_status
}
mpathpersist_action_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|promote|demote|notify|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
mpathpersist_get_status() {
unset WORKING_DEVS[*]
for dev in ${EXISTING_DEVS[*]}
do
READ_KEYS=`$MPATHPERSIST --in --read-keys $dev 2>&1`
if [ $? -eq 0 ]; then
WORKING_DEVS+=($dev)
echo "$READ_KEYS" | $GREP -w $NODE_ID_HEX\$ >/dev/null
if [ $? -eq 0 ]; then
REGISTERED_DEVS+=($dev)
READ_RESERVATION=`$MPATHPERSIST --in --read-reservation $dev 2>&1`
if [ $? -eq 0 ]; then
echo "$READ_RESERVATION" | $GREP -w $NODE_ID_HEX\$ >/dev/null
if [ $? -eq 0 ]; then
RESERVED_DEVS+=($dev)
fi
reservation_key=`echo $READ_RESERVATION | $GREP -o 'Key = 0x[0-9a-f]*' | $GREP -o '0x[0-9a-f]*'`
if [ -n "$reservation_key" ]; then
DEVS_WITH_RESERVATION+=($dev)
RESERVATION_KEYS+=($reservation_key)
fi
fi
fi
fi
done
WORKING_DEVS_NO=${#WORKING_DEVS[*]}
ocf_log debug "$RESOURCE: working devices: `mpathpersist_echo_array ${WORKING_DEVS[*]}`"
ocf_log debug "$RESOURCE: number of working devices: $WORKING_DEVS_NO"
ocf_log debug "$RESOURCE: registered devices: `mpathpersist_echo_array ${REGISTERED_DEVS[*]}`"
ocf_log debug "$RESOURCE: reserved devices: `mpathpersist_echo_array ${RESERVED_DEVS[*]}`"
ocf_log debug "$RESOURCE: devices with reservation: `mpathpersist_echo_array ${DEVS_WITH_RESERVATION[*]}`"
ocf_log debug "$RESOURCE: reservation keys: `mpathpersist_echo_array ${RESERVATION_KEYS[*]}`"
MASTER_SCORE=$(($MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NO))
ocf_log debug "$RESOURCE: master_score: $MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NO = $MASTER_SCORE"
}
mpathpersist_check_devs() {
for dev in $DEVS
do
if [ -e "$dev" ]; then
EXISTING_DEVS+=($dev)
fi
done
EXISTING_DEVS_NO=${#EXISTING_DEVS[*]}
if [ $EXISTING_DEVS_NO -lt $REQUIRED_DEVS_NO ]; then
ocf_log err "Number of existing devices=$EXISTING_DEVS_NO less then required_devs_no=$REQUIRED_DEVS_NO"
exit $OCF_ERR_INSTALLED
fi
}
mpathpersist_is_registered() {
for registered_dev in ${REGISTERED_DEVS[*]}
do
if [ "$registered_dev" == "$1" ]; then
return 0
fi
done
return 1
}
mpathpersist_get_reservation_key() {
for array_index in ${!DEVS_WITH_RESERVATION[*]}
do
if [ "${DEVS_WITH_RESERVATION[$array_index]}" == "$1" ]; then
echo ${RESERVATION_KEYS[$array_index]}
return 0
fi
done
echo ""
}
mpathpersist_echo_array() {
str_count=0
arr_str=""
for str in "$@"
do
arr_str="$arr_str[$str_count]:$str "
str_count=$(($str_count+1))
done
echo $arr_str
}
mpathpersist_parse_act_pending() {
ACT_PENDING_TS=0
ACT_PENDING_SCORE=0
if [ -n "$ACT_PENDING" ]; then
ACT_PENDING_TS=${ACT_PENDING%%_*}
ACT_PENDING_SCORE=${ACT_PENDING##*_}
fi
}
mpathpersist_clear_pending() {
if [ -n "$ACT_PENDING" ]; then
DO_PENDING_UPDATE="YES"
NEW_PENDING=""
fi
}
mpathpersist_new_master_score() {
DO_MASTER_SCORE_UPDATE="YES"
NEW_MASTER_SCORE=$1
}
mpathpersist_new_pending() {
DO_PENDING_UPDATE="YES"
NEW_PENDING=$1
}
# Functions invoked by resource manager actions
mpathpersist_action_start() {
ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE
ocf_run $PENDING_ATTRIBUTE --update=""
if [ $WORKING_DEVS_NO -lt $REQUIRED_DEVS_NO ]; then
ocf_log err "$RESOURCE: Number of working devices=$WORKING_DEVS_NO less then required_devs_no=$REQUIRED_DEVS_NO"
exit $OCF_ERR_GENERIC
fi
for dev in ${WORKING_DEVS[*]}
do
if mpathpersist_is_registered $dev ; then
: OK
else
ocf_run $MPATHPERSIST --out --register --param-sark=$NODE_ID_HEX $dev
if [ $? -ne $OCF_SUCCESS ]
then
return $OCF_ERR_GENERIC
fi
fi
done
return $OCF_SUCCESS
}
mpathpersist_action_stop() {
if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then
ocf_log debug "$RESOURCE stop: already no registrations"
else
# Clear preference for becoming master
ocf_run $MASTER_SCORE_ATTRIBUTE --delete
ocf_run $PENDING_ATTRIBUTE --delete
for dev in ${REGISTERED_DEVS[*]}
do
ocf_run $MPATHPERSIST --out --register --param-rk=$NODE_ID_HEX $dev
done
fi
return $OCF_SUCCESS
}
mpathpersist_action_monitor() {
ACT_MASTER_SCORE=`$MASTER_SCORE_ATTRIBUTE --query --quiet 2>&1`
ocf_log debug "$RESOURCE monitor: ACT_MASTER_SCORE=$ACT_MASTER_SCORE"
ACT_PENDING=`$PENDING_ATTRIBUTE --query --quiet 2>&1`
ocf_log debug "$RESOURCE monitor: ACT_PENDING=$ACT_PENDING"
mpathpersist_parse_act_pending
ocf_log debug "$RESOURCE monitor: ACT_PENDING_TS=$ACT_PENDING_TS"
ocf_log debug "$RESOURCE monitor: ACT_PENDING_VAL=$ACT_PENDING_SCORE"
ocf_log debug "$MASTER_SCORE, $ACT_MASTER_SCORE, $ROLE"
DO_MASTER_SCORE_UPDATE="NO"
DO_PENDING_UPDATE="NO"
if [ -n "$ACT_MASTER_SCORE" ]
then
if [ $ACT_MASTER_SCORE -eq $MASTER_SCORE ]; then
mpathpersist_clear_pending
else
case $ROLE in
Master)
if [ $MASTER_SCORE -lt $ACT_MASTER_SCORE ]; then
if [ -n "$ACT_PENDING" ]
then
if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then
mpathpersist_new_master_score $MASTER_SCORE
mpathpersist_clear_pending
fi
else
if [ $MASTER_SCORE_DELAY -eq 0 ]; then
mpathpersist_new_master_score $MASTER_SCORE
mpathpersist_clear_pending
else
mpathpersist_new_pending "${NOW}_${MASTER_SCORE}"
fi
fi
else
mpathpersist_new_master_score $MASTER_SCORE
mpathpersist_clear_pending
fi
;;
Slave)
if [ $MASTER_SCORE -gt $ACT_MASTER_SCORE ]; then
if [ -n "$ACT_PENDING" ]; then
if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then
mpathpersist_new_master_score $MASTER_SCORE
mpathpersist_clear_pending
fi
else
if [ $MASTER_SCORE_DELAY -eq 0 ]; then
mpathpersist_new_master_score $MASTER_SCORE
mpathpersist_clear_pending
else
mpathpersist_new_pending "${NOW}_${MASTER_SCORE}"
fi
fi
else
mpathpersist_new_master_score $MASTER_SCORE
mpathpersist_clear_pending
fi
;;
*)
;;
esac
fi
fi
if [ $DO_MASTER_SCORE_UPDATE == "YES" ]; then
ocf_run $MASTER_SCORE_ATTRIBUTE --update=$NEW_MASTER_SCORE
fi
if [ $DO_PENDING_UPDATE == "YES" ]; then
ocf_run $PENDING_ATTRIBUTE --update=$NEW_PENDING
fi
if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then
ocf_log debug "$RESOURCE monitor: no registrations"
return $OCF_NOT_RUNNING
fi
if [ ${#RESERVED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then
return $OCF_RUNNING_MASTER
fi
if [ ${#REGISTERED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then
if [ $RESERVATION_TYPE -eq 7 ] || [ $RESERVATION_TYPE -eq 8 ]; then
if [ ${#DEVS_WITH_RESERVATION[*]} -gt 0 ]; then
return $OCF_RUNNING_MASTER
else
return $OCF_SUCCESS
fi
else
return $OCF_SUCCESS
fi
fi
ocf_log err "$RESOURCE monitor: unexpected state"
return $OCF_ERR_GENERIC
}
mpathpersist_action_promote() {
if [ ${#RESERVED_DEVS[*]} -gt 0 ]; then
ocf_log info "$RESOURCE promote: already master"
return $OCF_SUCCESS
fi
for dev in ${WORKING_DEVS[*]}
do
reservation_key=`mpathpersist_get_reservation_key $dev`
case $RESERVATION_TYPE in
1|3|5|6)
if [ -z "$reservation_key" ]; then
ocf_run $MPATHPERSIST --out --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
else
ocf_run $MPATHPERSIST --out --preempt --param-sark=$reservation_key --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
fi
;;
7|8)
if [ -z "$reservation_key" ]; then
ocf_run $MPATHPERSIST --out --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
if [ $? -ne $OCF_SUCCESS ]
then
return $OCF_ERR_GENERIC
fi
else
ocf_log info "$RESOURCE promote: there already exist an reservation holder, all registrants become reservation holders"
return $OCF_SUCCESS
fi
;;
*)
return $OCF_ERR_ARGS
;;
esac
done
return $OCF_SUCCESS
}
mpathpersist_action_demote() {
case $RESERVATION_TYPE in
1|3|5|6)
if [ ${#RESERVED_DEVS[*]} -eq 0 ]; then
ocf_log info "$RESOURCE demote: already slave"
return $OCF_SUCCESS
fi
for dev in ${RESERVED_DEVS[*]}
do
ocf_run $MPATHPERSIST --out --release --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
done
;;
7|8) #in case of 7/8, --release won't release the reservation unless unregister the key.
if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then
ocf_log info "$RESOURCE demote: already slave"
return $OCF_SUCCESS
fi
for dev in ${REGISTERED_DEVS[*]}
do
ocf_run $MPATHPERSIST --out --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
done
;;
*)
return $OCF_ERR_ARGS
;;
esac
return $OCF_SUCCESS
}
mpathpersist_action_notify() {
local n_type="$OCF_RESKEY_CRM_meta_notify_type"
local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
set -- $OCF_RESKEY_CRM_meta_notify_active_resource
local n_active="$#"
set -- $OCF_RESKEY_CRM_meta_notify_stop_resource
local n_stop="$#"
set -- $OCF_RESKEY_CRM_meta_notify_start_resource
local n_start="$#"
ocf_log debug "$RESOURCE notify: $n_type for $n_op - counts: active $n_active - starting $n_start - stopping $n_stop"
return $OCF_SUCCESS
}
mpathpersist_action_validate_all () {
if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
ocf_log err "Master options misconfigured."
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
if [ $# -ne 1 ]; then
echo "Incorrect parameter count."
mpathpersist_action_usage
exit $OCF_ERR_ARGS
fi
ACTION=$1
case $ACTION in
meta-data)
meta_data
;;
validate-all)
mpathpersist_init
mpathpersist_action_validate_all
;;
start|promote|monitor|stop|demote)
ocf_log debug "$RESOURCE: starting action \"$ACTION\""
mpathpersist_init
mpathpersist_action_$ACTION
exit $?
;;
notify)
mpathpersist_action_notify
exit $?
;;
usage|help)
mpathpersist_action_usage
exit $OCF_SUCCESS
;;
*)
mpathpersist_action_usage
exit $OCF_ERR_ARGS
;;
esac
diff --git a/heartbeat/mysql b/heartbeat/mysql
index aec44fe5e..9ab49ab20 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -1,1078 +1,1078 @@
#!/bin/sh
#
#
# MySQL
#
# Description: Manages a MySQL database as Linux-HA resource
#
# Authors: Alan Robertson: DB2 Script
# Jakub Janczak: rewrite as MySQL
# Andrew Beekhof: cleanup and import
# Sebastian Reitenbach: add OpenBSD defaults, more cleanup
# Narayan Newton: add Gentoo/Debian defaults
# Marian Marinov, Florian Haas: add replication capability
# Yves Trudeau, Baron Schwartz: add VIP support and improve replication
#
# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# (c) 2002-2005 International Business Machines, Inc.
# 2005-2010 Linux-HA contributors
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 mysql
#
# See usage() function below for more details...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_client_binary
# OCF_RESKEY_config
# OCF_RESKEY_datadir
# OCF_RESKEY_user
# OCF_RESKEY_group
# OCF_RESKEY_test_table
# OCF_RESKEY_test_user
# OCF_RESKEY_test_passwd
# OCF_RESKEY_enable_creation
# OCF_RESKEY_additional_parameters
# OCF_RESKEY_log
# OCF_RESKEY_pid
# OCF_RESKEY_socket
# OCF_RESKEY_replication_user
# OCF_RESKEY_replication_passwd
# OCF_RESKEY_replication_port
# OCF_RESKEY_max_slave_lag
# OCF_RESKEY_evict_outdated_slaves
# OCF_RESKEY_reader_attribute
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/mysql-common.sh
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote|notify)
$0 manages a MySQL Database as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'promote' operation makes this mysql server run as master
The 'demote' operation makes this mysql server run as slave
The 'validate-all' operation reports whether the parameters are valid
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="mysql" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Resource script for MySQL.
May manage a standalone MySQL database, a clone set with externally
managed replication, or a complete master/slave replication setup.
Note, when master/slave replication is in use, the resource must
be setup to use notifications. Set 'notify=true' in the metadata
attributes when defining a MySQL master/slave instance.
While managing replication, the default behavior is to use uname -n
values in the change master to command. Other IPs can be specified
manually by adding a node attribute \${INSTANCE_ATTR_NAME}_mysql_master_IP
giving the IP to use for replication. For example, if the mysql primitive
you are using is p_mysql, the attribute to set will be
p_mysql_mysql_master_IP.
</longdesc>
<shortdesc lang="en">Manages a MySQL database instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL server binary
</longdesc>
<shortdesc lang="en">MySQL server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL client binary
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Configuration file
</longdesc>
<shortdesc lang="en">MySQL config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="datadir" unique="0" required="0">
<longdesc lang="en">
Directory containing databases
</longdesc>
<shortdesc lang="en">MySQL datadir</shortdesc>
<content type="string" default="${OCF_RESKEY_datadir_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running MySQL daemon
</longdesc>
<shortdesc lang="en">MySQL user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running MySQL daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">MySQL group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}"/>
</parameter>
<parameter name="log" unique="0" required="0">
<longdesc lang="en">
The logfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_default}"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pidfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}"/>
</parameter>
<parameter name="socket" unique="0" required="0">
<longdesc lang="en">
The socket to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL socket</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_default}"/>
</parameter>
<parameter name="test_table" unique="0" required="0">
<longdesc lang="en">
Table to be tested in monitor statement (in database.table notation)
</longdesc>
<shortdesc lang="en">MySQL test table</shortdesc>
<content type="string" default="${OCF_RESKEY_test_table_default}" />
</parameter>
<parameter name="test_user" unique="0" required="0">
<longdesc lang="en">
MySQL test user, must have select privilege on test_table
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="${OCF_RESKEY_test_user_default}" />
</parameter>
<parameter name="test_passwd" unique="0" required="0">
<longdesc lang="en">
MySQL test user password
</longdesc>
<shortdesc lang="en">MySQL test user password</shortdesc>
<content type="string" default="${OCF_RESKEY_test_passwd_default}" />
</parameter>
<parameter name="enable_creation" unique="0" required="0">
<longdesc lang="en">
If the MySQL database does not exist, it will be created
</longdesc>
<shortdesc lang="en">Create the database if it does not exist</shortdesc>
<content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
</parameter>
<parameter name="replication_user" unique="0" required="0">
<longdesc lang="en">
MySQL replication user. This user is used for starting and stopping
MySQL replication, for setting and resetting the master host, and for
setting and unsetting read-only mode. Because of that, this user must
have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, PROCESS and RELOAD
privileges on all nodes within the cluster. Mandatory if you define a
master-slave resource.
</longdesc>
<shortdesc lang="en">MySQL replication user</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_user_default}" />
</parameter>
<parameter name="replication_passwd" unique="0" required="0">
<longdesc lang="en">
MySQL replication password. Used for replication client and slave.
Mandatory if you define a master-slave resource.
</longdesc>
<shortdesc lang="en">MySQL replication user password</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_passwd_default}" />
</parameter>
<parameter name="replication_port" unique="0" required="0">
<longdesc lang="en">
The port on which the Master MySQL instance is listening.
</longdesc>
<shortdesc lang="en">MySQL replication port</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_port_default}" />
</parameter>
<parameter name="replication_require_ssl" unique="0" required="0">
<longdesc lang="en">
Enables SSL connection to local MySQL service for replication user.
i.e. if REQUIRE SSL for replication user in MySQL set, this should be set to "true".
</longdesc>
<shortdesc lang="en">MySQL replication require ssl</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_require_ssl_default}" />
</parameter>
<parameter name="replication_master_ssl_ca" unique="0" required="0">
<longdesc lang="en">
The SSL CA certificate to be used for replication over SSL.
</longdesc>
<shortdesc lang="en">MySQL replication SSL CA certificate</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_master_ssl_ca_default}" />
</parameter>
<parameter name="replication_master_ssl_cert" unique="0" required="0">
<longdesc lang="en">
The SSL CA certificate to be used for replication over SSL.
</longdesc>
<shortdesc lang="en">MySQL replication SSL certificate</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_master_ssl_cert_default}" />
</parameter>
<parameter name="replication_master_ssl_key" unique="0" required="0">
<longdesc lang="en">
The SSL certificate key to be used for replication over SSL.
</longdesc>
<shortdesc lang="en">MySQL replication SSL certificate key</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_master_ssl_key_default}" />
</parameter>
<parameter name="max_slave_lag" unique="0" required="0">
<longdesc lang="en">
The maximum number of seconds a replication slave is allowed to lag
behind its master. Do not set this to zero. What the cluster manager
does in case a slave exceeds this maximum lag is determined by the
evict_outdated_slaves parameter.
</longdesc>
<shortdesc lang="en">Maximum time (seconds) a MySQL slave is allowed
to lag behind a master</shortdesc>
<content type="integer" default="${OCF_RESKEY_max_slave_lag_default}"/>
</parameter>
<parameter name="evict_outdated_slaves" unique="0" required="0">
<longdesc lang="en">
If set to true, any slave which is more than max_slave_lag seconds
behind the master has its MySQL instance shut down. If this parameter
is set to false in a primitive or clone resource, it is simply
ignored. If set to false in a master/slave resource, then exceeding
the maximum slave lag will merely push down the master preference so
the lagging slave is never promoted to the new master.
</longdesc>
<shortdesc lang="en">Determines whether to shut down badly lagging
slaves</shortdesc>
<content type="boolean" default="${OCF_RESKEY_evict_outdated_slaves_default}" />
</parameter>
<parameter name="reader_attribute" unique="1" required="0">
<longdesc lang="en">
An attribute that the RA can manage to specify whether a node
can be read from. This node attribute will be 1 if it's fine to
read from the node, and 0 otherwise (for example, when a slave
has lagged too far behind the master).
A typical example for the use of this attribute would be to tie
a set of IP addresses to MySQL slaves that can be read from.
This parameter is only meaningful in master/slave set configurations.
</longdesc>
<shortdesc lang="en">Sets the node attribute that determines
whether a node is usable for clients to read from.</shortdesc>
<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
<action name="monitor" role="Promoted" depth="0" timeout="30s" interval="10s" />
<action name="monitor" role="Unpromoted" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
# Convenience functions
set_read_only() {
# Sets or unsets read-only mode. Accepts one boolean as its
# optional argument. If invoked without any arguments, defaults to
# enabling read only mode. Should only be set in master/slave
# setups.
# Returns $OCF_SUCCESS if the operation succeeds, or
# $OCF_ERR_GENERIC if it fails.
local ro_val
if ocf_is_true $1; then
ro_val="on"
else
ro_val="off"
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "SET GLOBAL read_only=${ro_val}"
}
get_read_only() {
# Check if read-only is set
local read_only_state
read_only_state=`$MYSQL $MYSQL_OPTIONS_REPL \
--skip-column-names -e "SHOW VARIABLES LIKE 'read_only'" | awk '{print $2}'`
if [ "$read_only_state" = "ON" ]; then
return 0
else
return 1
fi
}
is_slave() {
# Determine whether the machine is currently running as a MySQL
# slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW
# SLAVE STATUS creates an empty result set, 0 otherwise.
local rc
local tmpfile
# Check whether this machine should be slave
if ! ocf_is_ms || ! get_read_only; then
return 1
fi
get_slave_info
rc=$?
rm -f $tmpfile
if [ $rc -eq 0 ]; then
# show slave status is not empty
# Is there a master_log_file defined? (master_log_file is deleted
# by reset slave
if [ "$master_log_file" ]; then
return 0
else
return 1
fi
else
# "SHOW SLAVE STATUS" returns an empty set if instance is not a
# replication slave
return 1
fi
}
parse_slave_info() {
# Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2
sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2
}
get_slave_info() {
# Warning: this sets $tmpfile and LEAVE this file! You must delete it after use!
local mysql_options
if [ "$master_log_file" -a "$master_host" ]; then
# variables are already defined, get_slave_info has been run before
return $OCF_SUCCESS
else
tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW SLAVE STATUS\G' > $tmpfile
if [ -s $tmpfile ]; then
master_host=`parse_slave_info Master_Host $tmpfile`
master_user=`parse_slave_info Master_User $tmpfile`
master_port=`parse_slave_info Master_Port $tmpfile`
master_log_file=`parse_slave_info Master_Log_File $tmpfile`
master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
last_errno=`parse_slave_info Last_Errno $tmpfile`
secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
ocf_log debug "MySQL instance running as a replication slave"
else
# Instance produced an empty "SHOW SLAVE STATUS" output --
# instance is not a slave
ocf_exit_reason "check_slave invoked on an instance that is not a replication slave."
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
fi
}
check_slave() {
# Checks slave status
local rc new_master
get_slave_info
rc=$?
if [ $rc -eq 0 ]; then
# Did we receive an error other than max_connections?
if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
# Whoa. Replication ran into an error. This slave has
# diverged from its master. Make sure this resource
# doesn't restart in place.
ocf_exit_reason "MySQL instance configured for replication, but replication has failed."
ocf_log err "See $tmpfile for details"
# Just pull the reader VIP away, killing MySQL here would be pretty evil
# on a loaded server
set_reader_attr 0
exit $OCF_SUCCESS
fi
# If we got max_connections, let's remove the vip
if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
set_reader_attr 0
exit $OCF_SUCCESS
fi
if [ "$slave_io" != 'Yes' ]; then
# Not necessarily a bad thing. The master may have
# temporarily shut down, and the slave may just be
# reconnecting. A warning can't hurt, though.
ocf_log warn "MySQL Slave IO threads currently not running."
# Sanity check, are we at least on the right master
new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
if [ "$master_host" != "$new_master" ]; then
# Not pointing to the right master, not good, removing the VIPs
set_reader_attr 0
exit $OCF_SUCCESS
fi
fi
if [ "$slave_sql" != 'Yes' ]; then
# We don't have a replication SQL thread running. Not a
# good thing. Try to recoved by restarting the SQL thread
# and remove reader vip. Prevent MySQL restart.
ocf_exit_reason "MySQL Slave SQL threads currently not running."
ocf_log err "See $tmpfile for details"
# Remove reader vip
set_reader_attr 0
# try to restart slave
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "START SLAVE"
# Return success to prevent a restart
exit $OCF_SUCCESS
fi
if ocf_is_true $OCF_RESKEY_evict_outdated_slaves; then
# We're supposed to bail out if we lag too far
# behind. Let's check our lag.
if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
ocf_exit_reason "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)."
ocf_log err "See $tmpfile for details"
# Remove reader vip
set_reader_attr 0
exit $OCF_ERR_INSTALLED
fi
elif ocf_is_ms; then
# Even if we're not set to evict lagging slaves, we can
# still use the seconds behind master value to set our
# master preference.
local master_pref
master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind}))
if [ $master_pref -lt 0 ]; then
# Sanitize a below-zero preference to just zero
master_pref=0
fi
- $CRM_MASTER -v $master_pref
+ ocf_promotion_score -v $master_pref
fi
# is the slave ok to have a VIP on it
if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
set_reader_attr 0
else
set_reader_attr 1
fi
ocf_log debug "MySQL instance running as a replication slave"
rm -f $tmpfile
else
# Instance produced an empty "SHOW SLAVE STATUS" output --
# instance is not a slave
# TODO: Needs to handle when get_slave_info will return too many connections error
rm -f $tmpfile
ocf_exit_reason "check_slave invoked on an instance that is not a replication slave."
exit $OCF_ERR_GENERIC
fi
}
set_master() {
local new_master master_log_file master_log_pos
local master_params master_ssl_params
new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
# Keep replication position
get_slave_info
if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
# master_params=", MASTER_LOG_FILE='$master_log_file', \
# MASTER_LOG_POS=$master_log_pos"
ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
rm -f $tmpfile
return
else
master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
master_params=", MASTER_LOG_FILE='$master_log_file', \
MASTER_LOG_POS=$master_log_pos"
ocf_log info "Restored master pos for $new_master : $master_log_file:$master_log_pos"
fi
fi
# Informs the MySQL server of the master to replicate
# from. Accepts one mandatory argument which must contain the host
# name of the new master host. The master must either be unchanged
# from the last master the slave replicated from, or freshly
# reset with RESET MASTER.
if [ -n "$OCF_RESKEY_replication_master_ssl_ca" ] && [ -n "$OCF_RESKEY_replication_master_ssl_cert" ] && [ -n "$OCF_RESKEY_replication_master_ssl_key" ]; then
master_ssl_params=", MASTER_SSL=1, \
MASTER_SSL_CA='$OCF_RESKEY_replication_master_ssl_ca', \
MASTER_SSL_CERT='$OCF_RESKEY_replication_master_ssl_cert', \
MASTER_SSL_KEY='$OCF_RESKEY_replication_master_ssl_key'"
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "CHANGE MASTER TO MASTER_HOST='$new_master', \
MASTER_PORT=$OCF_RESKEY_replication_port, \
MASTER_USER='$OCF_RESKEY_replication_user', \
MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params $master_ssl_params"
rm -f $tmpfile
}
unset_master(){
# Instructs the MySQL server to stop replicating from a master
# host.
# If we're currently not configured to be replicating from any
# host, then there's nothing to do. But we do log a warning as
# no-one but the CRM should be touching the MySQL master/slave
# configuration.
if ! is_slave; then
ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave"
return $OCF_SUCCESS
fi
local tmpfile
tmpfile=`mktemp ${HA_RSCTMP}/unset_master.${OCF_RESOURCE_INSTANCE}.XXXXXX`
# At this point, the master is read only so there should not be much binlogs to transfer
# Let's wait for the last bits
while true; do
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW PROCESSLIST\G' > $tmpfile
if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then
ocf_log info "MySQL slave has finished reading master binary log"
break
fi
if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
ocf_log info "Master is down, no more binary logs to come"
break
fi
if grep -i 'Connecting to master' $tmpfile >/dev/null; then
ocf_log info "Master is down, no more binary logs to come"
break
fi
if ! grep 'system user' $tmpfile >/dev/null; then
ocf_log info "Slave is not running - not waiting to finish"
break
fi
sleep 1
done
# Now, stop the slave I/O thread and wait for relay log
# processing to complete
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE IO_THREAD"
if [ $? -gt 0 ]; then
ocf_exit_reason "Error stopping slave IO thread"
exit $OCF_ERR_GENERIC
fi
while true; do
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW PROCESSLIST\G' > $tmpfile
if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
ocf_log info "MySQL slave has finished processing relay log"
break
fi
if ! grep -q 'system user' $tmpfile; then
ocf_log info "Slave not runnig - not waiting to finish"
break
fi
ocf_log info "Waiting for MySQL slave to finish processing relay log"
sleep 1
done
rm -f $tmpfile
# Now, stop all slave activity and unset the master host
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE"
if [ $? -gt 0 ]; then
ocf_exit_reason "Error stopping rest slave threads"
exit $OCF_ERR_GENERIC
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "RESET SLAVE /*!50516 ALL */;"
if [ $? -gt 0 ]; then
ocf_exit_reason "Failed to reset slave"
exit $OCF_ERR_GENERIC
fi
}
# Start replication as slave
start_slave() {
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "START SLAVE"
}
# Set the attribute controlling the readers VIP
set_reader_attr() {
local curr_attr_value
curr_attr_value=$(get_reader_attr)
if [ "$curr_attr_value" -ne "$1" ]; then
$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $1
fi
}
# get the attribute controlling the readers VIP
get_reader_attr() {
local attr_value
local rc
attr_value=`$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q`
rc=$?
if [ "$rc" -eq "0" ]; then
echo $attr_value
else
echo -1
fi
}
# Stores data for MASTER STATUS from MySQL
update_data_master_status() {
master_status_file="${HA_RSCTMP}/master_status.${OCF_RESOURCE_INSTANCE}"
$MYSQL $MYSQL_OPTIONS_REPL -e "SHOW MASTER STATUS\G" > $master_status_file
}
# Returns the specified value from the stored copy of SHOW MASTER STATUS.
# should be call after update_data_master_status for tmpfile
# Arguments:
# $1 The value to get.
get_master_status() {
awk -v var="$1" '$1 == var ":" {print substr($0, index($0, ":") + 2)}' "$master_status_file"
}
# Determines what IP address is attached to the current host. The output of the
# crm_attribute command looks like this:
# scope=nodes name=IP value=10.2.2.161
# If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n
# The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the
# change master to command.
get_local_ip() {
local IP
IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G`
if [ ! $? -eq 0 ]; then
uname -n
else
echo $IP
fi
}
#######################################################################
# Functions invoked by resource manager actions
mysql_monitor() {
local rc
local status_loglevel="err"
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
mysql_common_status $status_loglevel
rc=$?
# TODO: check max connections error
# If status returned an error, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
if ocf_is_ms ; then
# This is a master slave setup but monitored host returned some errors.
# Immediately remove it from the pool of possible masters by erasing its master-mysql key
# When new mysql master election is started and node got no or negative master-mysql attribute the following is logged
# nodename.com pengine: debug: master_color: mysql:0 master score: -1
# If there are NO nodes with positive vaule election of mysql master will fail with
# nodename.com pengine: info: master_color: ms_mysql: Promoted 0 instances of a possible 1 to master
- $CRM_MASTER -D
+ ocf_promotion_score -D
fi
return $rc
fi
if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then
# Check if this instance is configured as a slave, and if so
# check slave status
if is_slave; then
check_slave
fi
# Check for test table
ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \
-e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table"
rc=$?
if [ $rc -ne 0 ]; then
# We are master/slave and test failed. Delete master score for this node as it is considered unhealthy because of this particular failed check.
- ocf_is_ms && $CRM_MASTER -D
+ ocf_is_ms && ocf_promotion_score -D
ocf_exit_reason "Failed to select from $test_table";
return $OCF_ERR_GENERIC;
fi
else
# In case no exnteded tests are enabled and we are in master/slave mode _always_ set the master score to 1 if we reached this point
- ocf_is_ms && $CRM_MASTER -v 1
+ ocf_is_ms && ocf_promotion_score -v 1
fi
if ocf_is_ms && ! get_read_only; then
ocf_log debug "MySQL monitor succeeded (master)";
# Always set master score for the master
- $CRM_MASTER -v 2
+ ocf_promotion_score -v 2
return $OCF_RUNNING_MASTER
else
ocf_log debug "MySQL monitor succeeded";
return $OCF_SUCCESS
fi
}
mysql_start() {
local rc
if ocf_is_ms; then
# Initialize the ReaderVIP attribute, monitor will enable it
set_reader_attr 0
fi
mysql_common_status info
if [ $? = $OCF_SUCCESS ]; then
ocf_log info "MySQL already running"
return $OCF_SUCCESS
fi
mysql_common_prepare_dirs
# Uncomment to perform permission clensing
# - not convinced this should be enabled by default
#
#chmod 0755 $OCF_RESKEY_datadir
#chown -R $OCF_RESKEY_user $OCF_RESKEY_datadir
#chgrp -R $OCF_RESKEY_group $OCF_RESKEY_datadir
mysql_extra_params=
if ocf_is_ms; then
mysql_extra_params="--skip-slave-start"
fi
mysql_common_start $mysql_extra_params
rc=$?
if [ $rc != $OCF_SUCCESS ]; then
return $rc
fi
if ocf_is_ms; then
# We're configured as a stateful resource. We must start as
# slave by default. At this point we don't know if the CRM has
# already promoted a master. So, we simply start in read only
# mode.
set_read_only on
# Now, let's see whether there is a master. We might be a new
# node that is just joining the cluster, and the CRM may have
# promoted a master before.
master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "`
if [ "$master_host" -a "$master_host" != ${NODENAME} ]; then
ocf_log info "Changing MySQL configuration to replicate from $master_host."
set_master
start_slave
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start slave"
return $OCF_ERR_GENERIC
fi
else
ocf_log info "No MySQL master present - clearing replication state"
unset_master
fi
# We also need to set a master preference, otherwise Pacemaker
# won't ever promote us in the absence of any explicit
# preference set by the administrator. We choose a low
# greater-than-zero preference.
- $CRM_MASTER -v 1
+ ocf_promotion_score -v 1
fi
# Initial monitor action
if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" -a -n "$OCF_RESKEY_test_passwd" ]; then
OCF_CHECK_LEVEL=10
fi
mysql_monitor
rc=$?
if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "Failed initial monitor action"
return $rc
fi
ocf_log info "MySQL started"
return $OCF_SUCCESS
}
mysql_stop() {
if ocf_is_ms; then
# clear preference for becoming master
- $CRM_MASTER -D
+ ocf_promotion_score -D
# Remove VIP capability
set_reader_attr 0
fi
mysql_common_stop
}
mysql_promote() {
local master_info
if ( ! mysql_common_status err ); then
return $OCF_NOT_RUNNING
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE"
# Set Master Info in CIB, cluster level attribute
update_data_master_status
master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
${CRM_ATTR_REPL_INFO} -v "$master_info"
rm -f $tmpfile
set_read_only off || return $OCF_ERR_GENERIC
# Existing master gets a higher-than-default master preference, so
# the cluster manager does not shuffle the master role around
# unnecessarily
- $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1))
+ ocf_promotion_score -v $((${OCF_RESKEY_max_slave_lag}+1))
# A master can accept reads
set_reader_attr 1
return $OCF_SUCCESS
}
mysql_demote() {
if ! mysql_common_status err; then
return $OCF_NOT_RUNNING
fi
# Return master preference to default, so the cluster manager gets
# a chance to select a new master
- $CRM_MASTER -v 1
+ ocf_promotion_score -v 1
}
mysql_notify() {
# If not configured as a Stateful resource, we make no sense of
# notifications.
if ! ocf_is_ms; then
ocf_log info "This agent makes no use of notifications unless running in master/slave mode."
return $OCF_SUCCESS
fi
local type_op
type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
ocf_log debug "Received $type_op notification."
case "$type_op" in
'pre-promote')
# Nothing to do now here, new replication info not yet published
;;
'post-promote')
# The master has completed its promotion. Now is a good
# time to check whether our replication slave is working
# correctly.
master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "`
if [ "$master_host" = ${NODENAME} ]; then
ocf_log info "This will be the new master, ignoring post-promote notification."
else
ocf_log info "Resetting replication"
unset_master
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
ocf_log info "Changing MySQL configuration to replicate from $master_host"
set_master
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
start_slave
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start slave"
return $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
;;
'pre-demote')
demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
if [ $demote_host = ${NODENAME} ]; then
ocf_log info "post-demote notification for $demote_host"
set_read_only on
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to set read-only";
return $OCF_ERR_GENERIC;
fi
# Must kill all existing user threads because they are still Read/write
# in order for the slaves to complete the read of binlogs
local tmpfile
tmpfile=`mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX`
$MYSQL $MYSQL_OPTIONS_REPL \
-e "SHOW PROCESSLIST" > $tmpfile
for thread in `awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile`
do
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "KILL ${thread}"
done
else
ocf_log info "Ignoring post-demote notification execpt for my own demotion."
fi
return $OCF_SUCCESS
;;
'post-demote')
demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
if [ $demote_host = ${NODENAME} ]; then
ocf_log info "Ignoring post-demote notification for my own demotion."
return $OCF_SUCCESS
fi
ocf_log info "post-demote notification for $demote_host."
# The former master has just been gracefully demoted.
unset_master
;;
*)
return $OCF_SUCCESS
;;
esac
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
mysql_common_validate
rc=$?
LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
stop) ;;
monitor)
mysql_common_status "info"
if [ $? -eq $OCF_SUCCESS ]; then
# if validatation fails and pid is active, always treat this as an error
ocf_exit_reason "environment validation failed, active pid is in unknown state."
exit $OCF_ERR_GENERIC
fi
# validation failed and pid is not active, it's safe to say this instance is inactive.
exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
esac
fi
# What kind of method was invoked?
case "$1" in
start) mysql_start;;
stop) mysql_stop;;
status) mysql_common_status err;;
monitor) mysql_monitor;;
promote) mysql_promote;;
demote) mysql_demote;;
notify) mysql_notify;;
validate-all) exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vi:sw=4:ts=4:et:
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index de8763544..34e1c6748 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -1,329 +1,328 @@
#!/bin/sh
#######################################################################
# Use runuser if available for SELinux.
if [ -x /sbin/runuser ]; then
SU=runuser
else
SU=su
fi
# Attempt to detect a default binary
OCF_RESKEY_binary_default=$(which mysqld_safe 2> /dev/null)
if [ "$OCF_RESKEY_binary_default" = "" ]; then
OCF_RESKEY_binary_default=$(which safe_mysqld 2> /dev/null)
fi
# Fill in some defaults if no values are specified
HOSTOS=`uname`
if [ "X${HOSTOS}" = "XOpenBSD" ];then
if [ "$OCF_RESKEY_binary_default" = "" ]; then
OCF_RESKEY_binary_default="/usr/local/bin/mysqld_safe"
fi
OCF_RESKEY_config_default="/etc/my.cnf"
OCF_RESKEY_datadir_default="/var/mysql"
OCF_RESKEY_user_default="_mysql"
OCF_RESKEY_group_default="_mysql"
OCF_RESKEY_log_default="/var/log/mysqld.log"
OCF_RESKEY_pid_default="/var/mysql/mysqld.pid"
OCF_RESKEY_socket_default="/var/run/mysql/mysql.sock"
else
if [ "$OCF_RESKEY_binary_default" = "" ]; then
OCF_RESKEY_binary_default="/usr/bin/safe_mysqld"
fi
OCF_RESKEY_config_default="/etc/my.cnf"
OCF_RESKEY_datadir_default="/var/lib/mysql"
OCF_RESKEY_user_default="mysql"
OCF_RESKEY_group_default="mysql"
OCF_RESKEY_log_default="/var/log/mysqld.log"
OCF_RESKEY_pid_default="/var/run/mysql/mysqld.pid"
OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock"
fi
OCF_RESKEY_client_binary_default="mysql"
OCF_RESKEY_test_user_default="root"
OCF_RESKEY_test_table_default="mysql.user"
OCF_RESKEY_test_passwd_default=""
OCF_RESKEY_enable_creation_default=0
OCF_RESKEY_additional_parameters_default=""
OCF_RESKEY_replication_user_default="root"
OCF_RESKEY_replication_passwd_default=""
OCF_RESKEY_replication_port_default="3306"
OCF_RESKEY_replication_require_ssl_default="false"
OCF_RESKEY_replication_master_ssl_ca_default=""
OCF_RESKEY_replication_master_ssl_cert_default=""
OCF_RESKEY_replication_master_ssl_key_default=""
OCF_RESKEY_max_slave_lag_default="3600"
OCF_RESKEY_evict_outdated_slaves_default="false"
OCF_RESKEY_reader_attribute_default="readable"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}}
: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}}
: ${OCF_RESKEY_test_table=${OCF_RESKEY_test_table_default}}
: ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}}
: ${OCF_RESKEY_enable_creation=${OCF_RESKEY_enable_creation_default}}
: ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}}
: ${OCF_RESKEY_replication_user=${OCF_RESKEY_replication_user_default}}
: ${OCF_RESKEY_replication_passwd=${OCF_RESKEY_replication_passwd_default}}
: ${OCF_RESKEY_replication_port=${OCF_RESKEY_replication_port_default}}
: ${OCF_RESKEY_replication_require_ssl=${OCF_RESKEY_replication_require_ssl_default}}
: ${OCF_RESKEY_replication_master_ssl_ca=${OCF_RESKEY_replication_master_ssl_ca_default}}
: ${OCF_RESKEY_replication_master_ssl_cert=${OCF_RESKEY_replication_master_ssl_cert_default}}
: ${OCF_RESKEY_replication_master_ssl_key=${OCF_RESKEY_replication_master_ssl_key_default}}
: ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
: ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
#######################################################################
# Convenience variables
MYSQL=$OCF_RESKEY_client_binary
if ocf_is_true "$OCF_RESKEY_replication_require_ssl"; then
MYSQL_OPTIONS_LOCAL_SSL_OPTIONS="--ssl-mode=REQUIRED"
else
MYSQL_OPTIONS_LOCAL_SSL_OPTIONS=""
fi
MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket"
MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL_SSL_OPTIONS $MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
MYSQL_TOO_MANY_CONN_ERR=1040
-CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
NODENAME=$(ocf_local_nodename)
CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $NODENAME "
INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
#######################################################################
mysql_common_validate()
{
if ! have_binary "$OCF_RESKEY_binary"; then
ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_binary"
return $OCF_ERR_INSTALLED;
fi
if ! have_binary "$OCF_RESKEY_client_binary"; then
ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_client_binary"
return $OCF_ERR_INSTALLED;
fi
if [ ! -f $OCF_RESKEY_config ]; then
ocf_exit_reason "Config $OCF_RESKEY_config doesn't exist";
return $OCF_ERR_INSTALLED;
fi
if [ ! -d $OCF_RESKEY_datadir ]; then
ocf_exit_reason "Datadir $OCF_RESKEY_datadir doesn't exist";
return $OCF_ERR_INSTALLED;
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
ocf_exit_reason "User $OCF_RESKEY_user doesn't exit";
return $OCF_ERR_INSTALLED;
fi
getent group $OCF_RESKEY_group >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
ocf_exit_reason "Group $OCF_RESKEY_group doesn't exist";
return $OCF_ERR_INSTALLED;
fi
return $OCF_SUCCESS
}
mysql_common_check_pid() {
local pid=$1
if [ -d /proc -a -d /proc/1 ]; then
[ "u$pid" != "u" -a -d /proc/$pid ]
else
kill -s 0 $pid >/dev/null 2>&1
fi
return $?
}
mysql_common_status() {
local loglevel=$1
local pid=$2
if [ -z "$pid" ]; then
if [ ! -e $OCF_RESKEY_pid ]; then
ocf_log $loglevel "MySQL is not running"
return $OCF_NOT_RUNNING;
fi
pid=`cat $OCF_RESKEY_pid`;
fi
mysql_common_check_pid $pid
if [ $? -eq 0 ]; then
return $OCF_SUCCESS;
else
if [ -e $OCF_RESKEY_pid ]; then
ocf_log $loglevel "MySQL not running: removing old PID file"
rm -f $OCF_RESKEY_pid
fi
return $OCF_NOT_RUNNING;
fi
}
mysql_common_prepare_dirs()
{
local rc
touch $OCF_RESKEY_log
chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log
chmod 0640 $OCF_RESKEY_log
[ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log
if ocf_is_true "$OCF_RESKEY_enable_creation" && [ ! -d $OCF_RESKEY_datadir/mysql ] ; then
ocf_log info "Initializing MySQL database: "
$MYSQL_BINDIR/mysql_install_db --datadir=$OCF_RESKEY_datadir
rc=$?
if [ $rc -ne 0 ] ; then
ocf_exit_reason "Initialization failed: $rc";
exit $OCF_ERR_GENERIC
fi
chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_datadir
fi
pid_dir=`dirname $OCF_RESKEY_pid`
if [ ! -d $pid_dir ] ; then
ocf_log info "Creating PID dir: $pid_dir"
mkdir -p $pid_dir
chown $OCF_RESKEY_user:$OCF_RESKEY_group $pid_dir
fi
socket_dir=`dirname $OCF_RESKEY_socket`
if [ ! -d $socket_dir ] ; then
ocf_log info "Creating socket dir: $socket_dir"
mkdir -p $socket_dir
chown $OCF_RESKEY_user:$OCF_RESKEY_group $socket_dir
fi
# Regardless of whether we just created the directory or it
# already existed, check whether it is writable by the configured
# user
for dir in $pid_dir $socket_dir $OCF_RESKEY_datadir; do
if ! $SU -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then
ocf_exit_reason "Directory $dir is not writable by $OCF_RESKEY_user"
exit $OCF_ERR_PERM;
fi
done
}
mysql_common_start()
{
local mysql_extra_params="$1"
local pid
$SU - $OCF_RESKEY_user -s /bin/sh -c \
"${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
--datadir=$OCF_RESKEY_datadir \
--log-error=$OCF_RESKEY_log \
$OCF_RESKEY_additional_parameters \
$mysql_extra_params >/dev/null 2>&1" &
pid=$!
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required.
start_wait=1
while [ $start_wait = 1 ]; do
if ! ps $pid > /dev/null 2>&1; then
wait $pid
ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?), please check your installation"
return $OCF_ERR_GENERIC
fi
mysql_common_status info
rc=$?
if [ $rc = $OCF_SUCCESS ]; then
start_wait=0
elif [ $rc != $OCF_NOT_RUNNING ]; then
ocf_log info "MySQL start failed: $rc"
return $rc
fi
sleep 2
done
return $OCF_SUCCESS
}
mysql_common_stop()
{
local pid
local rc
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log info "MySQL is not running"
return $OCF_SUCCESS
fi
pid=`cat $OCF_RESKEY_pid 2> /dev/null `
mysql_common_check_pid $pid
if [ $? -ne 0 ]; then
rm -f $OCF_RESKEY_pid
ocf_log info "MySQL is already stopped"
return $OCF_SUCCESS;
fi
/bin/kill $pid > /dev/null
rc=$?
if [ $rc != 0 ]; then
ocf_exit_reason "MySQL couldn't be stopped"
return $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
fi
count=0
while [ $count -lt $shutdown_timeout ]
do
mysql_common_status info $pid
rc=$?
if [ $rc = $OCF_NOT_RUNNING ]; then
break
fi
count=`expr $count + 1`
sleep 1
ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
done
mysql_common_status info $pid
if [ $? != $OCF_NOT_RUNNING ]; then
ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
/bin/kill -KILL $pid > /dev/null
fi
ocf_log info "MySQL stopped";
rm -f /var/lock/subsys/mysqld
rm -f $OCF_RESKEY_socket
return $OCF_SUCCESS
}
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index e93d66855..aa8a13a84 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -1,2253 +1,2252 @@
#!/bin/sh
#
# Description: Manages a PostgreSQL Server as an OCF High-Availability
# resource
#
# Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA
# Florian Haas (florian@linbit.com) -- makeover
# Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication
# David Corlette (dcorlette@netiq.com) -- add support for non-standard library locations and non-standard port
#
# Copyright: 2006-2012 Serge Dubrouski <sergeyfd@gmail.com>
# and other Linux-HA contributors
# License: GNU General Public License (GPL)
#
###############################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Use runuser if available for SELinux.
if [ -x /sbin/runuser ]; then
SU=runuser
else
SU=su
fi
#
# Get PostgreSQL Configuration parameter
#
get_pgsql_param() {
local param_name
param_name=$1
perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) {
\$dir=\$1;
\$dir =~ s/\s*\#.*//;
\$dir =~ s/^'(\S*)'/\$1/;
print \$dir;}"
perl -ne "$perl_code" < $OCF_RESKEY_config
}
# Defaults
OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl
OCF_RESKEY_psql_default=/usr/bin/psql
OCF_RESKEY_pgdata_default=/var/lib/pgsql/data
OCF_RESKEY_pgdba_default=postgres
OCF_RESKEY_pghost_default=""
OCF_RESKEY_pgport_default=5432
OCF_RESKEY_pglibs_default=/usr/lib
OCF_RESKEY_start_opt_default=""
OCF_RESKEY_ctl_opt_default=""
OCF_RESKEY_pgdb_default=template1
OCF_RESKEY_logfile_default=/dev/null
OCF_RESKEY_socketdir_default=""
OCF_RESKEY_stop_escalate_default=90
OCF_RESKEY_monitor_user_default=""
OCF_RESKEY_monitor_password_default=""
OCF_RESKEY_monitor_sql_default="select now();"
OCF_RESKEY_check_wal_receiver_default="false"
# Defaults for replication
OCF_RESKEY_rep_mode_default=none
OCF_RESKEY_node_list_default=""
OCF_RESKEY_restore_command_default=""
OCF_RESKEY_archive_cleanup_command_default=""
OCF_RESKEY_recovery_end_command_default=""
OCF_RESKEY_master_ip_default=""
OCF_RESKEY_repuser_default="postgres"
OCF_RESKEY_primary_conninfo_opt_default=""
OCF_RESKEY_restart_on_promote_default="false"
OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp"
OCF_RESKEY_xlog_check_count_default="3"
OCF_RESKEY_crm_attr_timeout_default="5"
OCF_RESKEY_stop_escalate_in_slave_default=90
OCF_RESKEY_replication_slot_name_default=""
: ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}}
: ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}}
: ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}}
: ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}}
: ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}}
: ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}}
: ${OCF_RESKEY_pglibs=${OCF_RESKEY_pglibs_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf}
: ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}}
: ${OCF_RESKEY_ctl_opt=${OCF_RESKEY_ctl_opt_default}}
: ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}}
: ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}}
: ${OCF_RESKEY_socketdir=${OCF_RESKEY_socketdir_default}}
: ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}}
: ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}}
: ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}}
: ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}}
: ${OCF_RESKEY_check_wal_receiver=${OCF_RESKEY_check_wal_receiver_default}}
# for replication
: ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}}
: ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}}
: ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}}
: ${OCF_RESKEY_archive_cleanup_command=${OCF_RESKEY_archive_cleanup_command_default}}
: ${OCF_RESKEY_recovery_end_command=${OCF_RESKEY_recovery_end_command_default}}
: ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}}
: ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}}
: ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}}
: ${OCF_RESKEY_restart_on_promote=${OCF_RESKEY_restart_on_promote_default}}
: ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}}
: ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}}
: ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}}
: ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}}
: ${OCF_RESKEY_replication_slot_name=${OCF_RESKEY_replication_slot_name_default}}
usage() {
cat <<EOF
usage: $0 start|stop|status|monitor|promote|demote|notify|meta-data|validate-all|methods
$0 manages a PostgreSQL Server as an HA resource.
The 'start' operation starts the PostgreSQL server.
The 'stop' operation stops the PostgreSQL server.
The 'status' operation reports whether the PostgreSQL is up.
The 'monitor' operation reports whether the PostgreSQL is running.
The 'promote' operation promotes the PostgreSQL server.
The 'demote' operation demotes the PostgreSQL server.
The 'validate-all' operation reports whether the parameters are valid.
The 'methods' operation reports on the methods $0 supports.
EOF
return $OCF_ERR_ARGS
}
meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="pgsql" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource.
</longdesc>
<shortdesc lang="en">Manages a PostgreSQL database instance</shortdesc>
<parameters>
<parameter name="pgctl" unique="0" required="0">
<longdesc lang="en">
Path to pg_ctl command.
</longdesc>
<shortdesc lang="en">pgctl</shortdesc>
<content type="string" default="${OCF_RESKEY_pgctl_default}" />
</parameter>
<parameter name="start_opt" unique="0" required="0">
<longdesc lang="en">
Start options (-o start_opt in pg_ctl). "-i -p 5432" for example.
</longdesc>
<shortdesc lang="en">start_opt</shortdesc>
<content type="string" default="${OCF_RESKEY_start_opt_default}" />
</parameter>
<parameter name="ctl_opt" unique="0" required="0">
<longdesc lang="en">
Additional pg_ctl options (-w, -W etc..).
</longdesc>
<shortdesc lang="en">ctl_opt</shortdesc>
<content type="string" default="${OCF_RESKEY_ctl_opt_default}" />
</parameter>
<parameter name="psql" unique="0" required="0">
<longdesc lang="en">
Path to psql command.
</longdesc>
<shortdesc lang="en">psql</shortdesc>
<content type="string" default="${OCF_RESKEY_psql_default}" />
</parameter>
<parameter name="pgdata" unique="0" required="0">
<longdesc lang="en">
Path to PostgreSQL data directory.
</longdesc>
<shortdesc lang="en">pgdata</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdata_default}" />
</parameter>
<parameter name="pgdba" unique="0" required="0">
<longdesc lang="en">
User that owns PostgreSQL.
</longdesc>
<shortdesc lang="en">pgdba</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdba_default}" />
</parameter>
<parameter name="pghost" unique="0" required="0">
<longdesc lang="en">
Hostname/IP address where PostgreSQL is listening
</longdesc>
<shortdesc lang="en">pghost</shortdesc>
<content type="string" default="${OCF_RESKEY_pghost_default}" />
</parameter>
<parameter name="pgport" unique="0" required="0">
<longdesc lang="en">
Port where PostgreSQL is listening
</longdesc>
<shortdesc lang="en">pgport</shortdesc>
<content type="integer" default="${OCF_RESKEY_pgport_default}" />
</parameter>
<parameter name="pglibs" unique="0" required="0">
<longdesc lang="en">
Custom location of the Postgres libraries. If not set, the standard location
will be used.
</longdesc>
<shortdesc lang="en">pglibs</shortdesc>
<content type="string" default="${OCF_RESKEY_pglibs_default}" />
</parameter>
<parameter name="monitor_user" unique="0" required="0">
<longdesc lang="en">
PostgreSQL user that pgsql RA will user for monitor operations. If it's not set
pgdba user will be used.
</longdesc>
<shortdesc lang="en">monitor_user</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_user_default}" />
</parameter>
<parameter name="monitor_password" unique="0" required="0">
<longdesc lang="en">
Password for monitor user.
</longdesc>
<shortdesc lang="en">monitor_password</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_password_default}" />
</parameter>
<parameter name="monitor_sql" unique="0" required="0">
<longdesc lang="en">
SQL script that will be used for monitor operations.
</longdesc>
<shortdesc lang="en">monitor_sql</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_sql_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Path to the PostgreSQL configuration file for the instance.
</longdesc>
<shortdesc lang="en">Configuration file</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdata}/postgresql.conf" />
</parameter>
<parameter name="pgdb" unique="0" required="0">
<longdesc lang="en">
Database that will be used for monitoring.
</longdesc>
<shortdesc lang="en">pgdb</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdb_default}" />
</parameter>
<parameter name="logfile" unique="0" required="0">
<longdesc lang="en">
Path to PostgreSQL server log output file.
</longdesc>
<shortdesc lang="en">logfile</shortdesc>
<content type="string" default="${OCF_RESKEY_logfile_default}" />
</parameter>
<parameter name="socketdir" unique="0" required="0">
<longdesc lang="en">
Unix socket directory for PostgreSQL.
If you use PostgreSQL 9.3 or higher and define unix_socket_directories in the postgresql.conf, then you must set socketdir to determine which directory is used for psql command.
</longdesc>
<shortdesc lang="en">socketdir</shortdesc>
<content type="string" default="${OCF_RESKEY_socketdir_default}" />
</parameter>
<parameter name="stop_escalate" unique="0" required="0">
<longdesc lang="en">
Number of seconds to wait for stop (using -m fast) before resorting to -m immediate
</longdesc>
<shortdesc lang="en">stop escalation</shortdesc>
<content type="integer" default="${OCF_RESKEY_stop_escalate_default}" />
</parameter>
<parameter name="rep_mode" unique="0" required="0">
<longdesc lang="en">
Replication mode may be set to "async" or "sync" or "slave".
They require PostgreSQL 9.1 or later.
Once set, "async" and "sync" require node_list, master_ip, and
restore_command parameters,as well as configuring PostgreSQL
for replication (in postgresql.conf and pg_hba.conf).
"slave" means that RA only makes recovery.conf before starting
to connect to primary which is running somewhere.
It doesn't need master/slave setting.
It requires master_ip restore_command parameters.
</longdesc>
<shortdesc lang="en">rep_mode</shortdesc>
<content type="string" default="${OCF_RESKEY_rep_mode_default}" />
</parameter>
<parameter name="node_list" unique="0" required="0">
<longdesc lang="en">
All node names. Please separate each node name with a space.
This is optional for replication. Defaults to all nodes in the cluster
</longdesc>
<shortdesc lang="en">node list</shortdesc>
<content type="string" default="${OCF_RESKEY_node_list_default}" />
</parameter>
<parameter name="restore_command" unique="0" required="0">
<longdesc lang="en">
restore_command for recovery.conf.
This is required for replication.
</longdesc>
<shortdesc lang="en">restore_command</shortdesc>
<content type="string" default="${OCF_RESKEY_restore_command_default}" />
</parameter>
<parameter name="archive_cleanup_command" unique="0" required="0">
<longdesc lang="en">
archive_cleanup_command for recovery.conf.
This is used for replication and is optional.
</longdesc>
<shortdesc lang="en">archive_cleanup_command</shortdesc>
<content type="string" default="${OCF_RESKEY_archive_cleanup_command_default}" />
</parameter>
<parameter name="recovery_end_command" unique="0" required="0">
<longdesc lang="en">
recovery_end_command for recovery.conf.
This is used for replication and is optional.
</longdesc>
<shortdesc lang="en">recovery_end_command</shortdesc>
<content type="string" default="${OCF_RESKEY_recovery_end_command_default}" />
</parameter>
<parameter name="master_ip" unique="0" required="0">
<longdesc lang="en">
Master's floating IP address to be connected from hot standby.
This parameter is used for "primary_conninfo" in recovery.conf.
This is required for replication.
</longdesc>
<shortdesc lang="en">master ip</shortdesc>
<content type="string" default="${OCF_RESKEY_master_ip_default}" />
</parameter>
<parameter name="repuser" unique="0" required="0">
<longdesc lang="en">
User used to connect to the master server.
This parameter is used for "primary_conninfo" in recovery.conf.
This is required for replication.
</longdesc>
<shortdesc lang="en">repuser</shortdesc>
<content type="string" default="${OCF_RESKEY_repuser_default}" />
</parameter>
<parameter name="primary_conninfo_opt" unique="0" required="0">
<longdesc lang="en">
primary_conninfo options of recovery.conf except host, port, user and application_name.
This is optional for replication.
</longdesc>
<shortdesc lang="en">primary_conninfo_opt</shortdesc>
<content type="string" default="${OCF_RESKEY_primary_conninfo_opt_default}" />
</parameter>
<parameter name="restart_on_promote" unique="0" required="0">
<longdesc lang="en">
If this is true, RA deletes recovery.conf and restarts PostgreSQL
on promote to keep Timeline ID. It probably makes fail-over slower.
It's recommended to set on-fail of promote up as fence.
This is optional for replication.
</longdesc>
<shortdesc lang="en">restart_on_promote</shortdesc>
<content type="boolean" default="${OCF_RESKEY_restart_on_promote_default}" />
</parameter>
<parameter name="replication_slot_name" unique="0" required="0">
<longdesc lang="en">
Set this option when using replication slots.
Can only use lower case letters, numbers and underscore for replication_slot_name.
The replication slots would be created for each node, with the name adding the node name as postfix.
For example, replication_slot_name is "sample" and 2 slaves which are "node1" and "node2" connect to
their slots, the slots names are "sample_node1" and "sample_node2".
If the node name contains a upper case letter, hyphen and dot, those characters will be converted to a lower case letter or an underscore.
For example, Node-1.example.com to node_1_example_com.
pgsql RA doesn't monitor and delete the replication slot.
When the slave node has been disconnected in failure or the like, execute one of the following manually.
Otherwise it may eventually cause a disk full because the master node will continue to accumulate the unsent WAL.
1. recover and reconnect the slave node to the master node as soon as possible.
2. delete the slot on the master node by following psql command.
$ select pg_drop_replication_slot('replication_slot_name');
</longdesc>
<shortdesc lang="en">replication_slot_name</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_slot_name_default}" />
</parameter>
<parameter name="tmpdir" unique="0" required="0">
<longdesc lang="en">
Path to temporary directory.
This is optional for replication.
</longdesc>
<shortdesc lang="en">tmpdir</shortdesc>
<content type="string" default="${OCF_RESKEY_tmpdir_default}" />
</parameter>
<parameter name="xlog_check_count" unique="0" required="0">
<longdesc lang="en">
Number of checks of xlog on monitor before promote.
This is optional for replication.
Note: For backward compatibility, the terms are unified with PostgreSQL 9.
If you are using PostgreSQL 10 or later, replace "xlog" with "wal".
Likewise, replacing "location" with "lsn".
</longdesc>
<shortdesc lang="en">xlog check count</shortdesc>
<content type="integer" default="${OCF_RESKEY_xlog_check_count_default}" />
</parameter>
<parameter name="crm_attr_timeout" unique="0" required="0">
<longdesc lang="en">
The timeout of crm_attribute forever update command.
Default value is 5 seconds.
This is optional for replication.
</longdesc>
<shortdesc lang="en">The timeout of crm_attribute forever update command.</shortdesc>
<content type="integer" default="${OCF_RESKEY_crm_attr_timeout_default}" />
</parameter>
<parameter name="stop_escalate_in_slave" unique="0" required="0">
<longdesc lang="en">
Number of seconds to wait for stop (using -m fast) before resorting to -m immediate
in slave state.
This is optional for replication.
</longdesc>
<shortdesc lang="en">stop escalation_in_slave</shortdesc>
<content type="integer" default="${OCF_RESKEY_stop_escalate_in_slave_default}" />
</parameter>
<parameter name="check_wal_receiver" unique="0" required="0">
<longdesc lang="en">
If this is true, RA checks wal_receiver process on monitor
and notifies its status using "(resource name)-receiver-status" attribute.
It's useful for checking whether PostgreSQL (hot standby) connects to primary.
The attribute shows status as "normal" or "normal (master)" or "ERROR".
Note that if you configure PostgreSQL as master/slave resource, then
wal receiver is not running in the master and the attribute shows status as
"normal (master)" consistently because it is normal status.
</longdesc>
<shortdesc lang="en">check_wal_receiver</shortdesc>
<content type="boolean" default="${OCF_RESKEY_check_wal_receiver_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="30s"/>
<action name="monitor" depth="0" timeout="30s" interval="29s" role="Promoted" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5s" />
<action name="methods" timeout="5s" />
</actions>
</resource-agent>
EOF
}
#
# Run the given command in the Resource owner environment...
#
runasowner() {
local quietrun=""
local loglevel="-err"
local var
for var in 1 2
do
case "$1" in
"-q")
quietrun="-q"
shift 1;;
"info"|"warn"|"err")
loglevel="-$1"
shift 1;;
*)
;;
esac
done
ocf_run $quietrun $loglevel $SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*"
}
#
# Shell escape
#
escape_string() {
echo "$*" | sed -e "s/'/'\\\\''/g"
}
#
# methods: What methods/operations do we support?
#
pgsql_methods() {
cat <<EOF
start
stop
status
monitor
promote
demote
notify
methods
meta-data
validate-all
EOF
}
# Execulte SQL and return the result.
exec_sql() {
local sql="$1"
local output
local rc
output=`$SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
$OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
-Atc \"$sql\""`
rc=$?
echo $output
return $rc
}
#pgsql_real_start: Starts PostgreSQL
pgsql_real_start() {
local pgctl_options
local postgres_options
local rc
pgsql_real_monitor info
rc=$?
if [ $rc -eq $OCF_SUCCESS -o $rc -eq $OCF_RUNNING_MASTER ]; then
ocf_log info "PostgreSQL is already running. PID=`cat $PIDFILE`"
if is_replication; then
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
fi
# Remove postmaster.pid if it exists
rm -f $PIDFILE
# Remove backup_label if it exists
if [ -f $BACKUPLABEL ] && ! is_replication; then
ocf_log info "Removing $BACKUPLABEL. The previous backup might have failed."
rm -f $BACKUPLABEL
fi
# Check if we need to create a log file
if ! check_log_file $OCF_RESKEY_logfile
then
ocf_exit_reason "PostgreSQL can't write to the log file: $OCF_RESKEY_logfile"
return $OCF_ERR_PERM
fi
# Check socket directory
if [ -n "$OCF_RESKEY_socketdir" ]
then
check_socket_dir
fi
check_stat_temp_directory
if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
rm -f $RECOVERY_CONF
make_recovery_conf || return $OCF_ERR_GENERIC
fi
# Set options passed to pg_ctl
pgctl_options="$OCF_RESKEY_ctl_opt -D $OCF_RESKEY_pgdata -l $OCF_RESKEY_logfile"
# Set options passed to the PostgreSQL server process
postgres_options="-c config_file=${OCF_RESKEY_config}"
if [ -n "$OCF_RESKEY_pghost" ]; then
postgres_options="$postgres_options -h $OCF_RESKEY_pghost"
fi
if [ -n "$OCF_RESKEY_start_opt" ]; then
postgres_options="$postgres_options $OCF_RESKEY_start_opt"
fi
# Tack pass-through options onto pg_ctl options
pgctl_options="$pgctl_options -o '$postgres_options'"
# Invoke pg_ctl
runasowner "unset PGUSER; unset PGPASSWORD; $OCF_RESKEY_pgctl $pgctl_options -W start"
if [ $? -eq 0 ]; then
# Probably started.....
ocf_log info "PostgreSQL start command sent."
else
ocf_exit_reason "Can't start PostgreSQL."
return $OCF_ERR_GENERIC
fi
while :
do
pgsql_real_monitor warn
rc=$?
if [ $rc -eq $OCF_SUCCESS -o $rc -eq $OCF_RUNNING_MASTER ]; then
break;
fi
sleep 1
ocf_log debug "PostgreSQL still hasn't started yet. Waiting..."
done
# delete replication slots on all nodes. On master node will be created during promotion.
if use_replication_slot; then
delete_replication_slots
if [ $? -eq $OCF_ERR_GENERIC ]; then
ocf_exit_reason "PostgreSQL can't clean up replication_slot."
return $OCF_ERR_GENERIC
fi
fi
ocf_log info "PostgreSQL is started."
return $rc
}
pgsql_replication_start() {
local rc
local synchronous_standby_names
# initializing for replication
change_pgsql_status "$NODENAME" "STOP"
delete_master_baseline
- exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE
+ exec_with_retry 0 ocf_promotion_score -v $CAN_NOT_PROMOTE
rm -f ${XLOG_NOTE_FILE}.* $REP_MODE_CONF $RECOVERY_CONF
if ! make_recovery_conf || ! delete_xlog_location || ! set_async_mode_all; then
return $OCF_ERR_GENERIC
fi
if [ -f $PGSQL_LOCK ]; then
ocf_exit_reason "My data may be inconsistent. You have to remove $PGSQL_LOCK file to force start."
return $OCF_ERR_GENERIC
fi
# start
pgsql_real_start
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
synchronous_standby_names=$(exec_sql "${CHECK_SYNCHRONOUS_STANDBY_NAMES_SQL}")
if [ -n "${synchronous_standby_names}" ]; then
ocf_exit_reason "Invalid synchronous_standby_names is set in postgresql.conf."
return $OCF_ERR_CONFIGURED
fi
change_pgsql_status "$NODENAME" "HS:alone"
return $OCF_SUCCESS
}
#pgsql_start: pgsql_real_start() wrapper for replication
pgsql_start() {
if ! is_replication; then
pgsql_real_start
return $?
else
pgsql_replication_start
return $?
fi
}
#pgsql_promote: Promote PostgreSQL
pgsql_promote() {
local output
local target
local rc
if ! is_replication; then
ocf_exit_reason "Not in a replication mode."
return $OCF_ERR_CONFIGURED
fi
output=`exec_sql "${CHECK_MS_SQL}"`
if [ $? -ne 0 ]; then
report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status on promote."
return $OCF_ERR_GENERIC
fi
if [ "$output" = "f" ]; then
ocf_log info "PostgreSQL is already Master. Don't execute promote."
return $OCF_SUCCESS
fi
rm -f ${XLOG_NOTE_FILE}.*
for target in $NODE_LIST; do
[ "$target" = "$NODENAME" ] && continue
change_data_status "$target" "DISCONNECT"
change_master_score "$target" "$CAN_NOT_PROMOTE"
done
ocf_log info "Creating $PGSQL_LOCK."
touch $PGSQL_LOCK
show_master_baseline
if ocf_is_true ${OCF_RESKEY_restart_on_promote}; then
ocf_log info "Restarting PostgreSQL instead of promote."
#stop : this function returns $OCF_SUCCESS only.
pgsql_real_stop slave
if "${USE_STANDBY_SIGNAL}"; then
rm -f ${OCF_RESKEY_pgdata}/standby.signal
else
rm -f $RECOVERY_CONF
fi
pgsql_real_start
rc=$?
if [ $rc -ne $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "Can't start PostgreSQL as primary on promote."
if [ $rc -ne $OCF_SUCCESS ]; then
change_pgsql_status "$NODENAME" "STOP"
fi
return $OCF_ERR_GENERIC
fi
else
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata -W promote"
if [ $? -eq 0 ]; then
ocf_log info "PostgreSQL promote command sent."
else
ocf_exit_reason "Can't promote PostgreSQL."
return $OCF_ERR_GENERIC
fi
while :
do
pgsql_real_monitor warn
rc=$?
if [ $rc -eq $OCF_RUNNING_MASTER ]; then
break;
elif [ $rc -eq $OCF_ERR_GENERIC ]; then
ocf_exit_reason "Can't promote PostgreSQL."
return $rc
fi
sleep 1
ocf_log debug "PostgreSQL still hasn't promoted yet. Waiting..."
done
ocf_log info "PostgreSQL is promoted."
fi
# create replication slots on master after promotion
if use_replication_slot; then
create_replication_slots
if [ $? -eq $OCF_ERR_GENERIC ]; then
ocf_exit_reason "PostgreSQL can't create replication_slot."
return $OCF_ERR_GENERIC
fi
fi
change_data_status "$NODENAME" "LATEST"
- exec_with_retry 0 $CRM_MASTER -v $PROMOTE_ME
+ exec_with_retry 0 ocf_promotion_score -v $PROMOTE_ME
change_pgsql_status "$NODENAME" "PRI"
return $OCF_SUCCESS
}
#pgsql_demote: Demote PostgreSQL
pgsql_demote() {
local rc
if ! is_replication; then
ocf_exit_reason "Not in a replication mode."
return $OCF_ERR_CONFIGURED
fi
- exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE
+ exec_with_retry 0 ocf_promotion_score -v $CAN_NOT_PROMOTE
delete_master_baseline
if ! pgsql_status; then
ocf_log info "PostgreSQL is already stopped on demote."
else
ocf_log info "Stopping PostgreSQL on demote."
pgsql_real_stop master
rc=$?
if [ "$rc" -ne "$OCF_SUCCESS" ]; then
change_pgsql_status "$NODENAME" "UNKNOWN"
return $rc
fi
fi
change_pgsql_status "$NODENAME" "STOP"
return $OCF_SUCCESS
}
#pgsql_real_stop: Stop PostgreSQL
pgsql_real_stop() {
local rc
local count
local stop_escalate
if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -D
fi
if ! pgsql_status
then
#Already stopped
return $OCF_SUCCESS
fi
stop_escalate=$OCF_RESKEY_stop_escalate
if [ "$1" = "slave" ]; then
stop_escalate="$OCF_RESKEY_stop_escalate_in_slave"
fi
# adjust stop_escalate time when it is longer than the timeout
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ] && \
[ "$stop_escalate" -ge $((OCF_RESKEY_CRM_meta_timeout/1000)) ]; then
stop_escalate=$(((OCF_RESKEY_CRM_meta_timeout/1000) - 10))
ocf_log info "stop_escalate(or stop_escalate_in_slave) time is adjusted to ${stop_escalate} based on the configured timeout."
fi
# Stop PostgreSQL, do not wait for clients to disconnect
if [ $stop_escalate -gt 0 ]; then
runasowner "$OCF_RESKEY_pgctl -W -D $OCF_RESKEY_pgdata stop -m fast"
fi
# stop waiting
count=0
while [ $count -lt $stop_escalate ]
do
if ! pgsql_status
then
#PostgreSQL stopped
break;
fi
count=`expr $count + 1`
sleep 1
done
if pgsql_status
then
#PostgreSQL is still up. Use another shutdown mode.
ocf_log info "PostgreSQL failed to stop after ${stop_escalate}s using -m fast. Trying -m immediate..."
runasowner "$OCF_RESKEY_pgctl -W -D $OCF_RESKEY_pgdata stop -m immediate"
fi
while :
do
pgsql_real_monitor
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
# An unnecessary debug log is prevented.
break;
fi
sleep 1
ocf_log debug "PostgreSQL still hasn't stopped yet. Waiting..."
done
# Remove postmaster.pid if it exists
rm -f $PIDFILE
if [ "$1" = "master" -a "$OCF_RESKEY_CRM_meta_notify_slave_uname" = " " ]; then
ocf_log info "Removing $PGSQL_LOCK."
rm -f $PGSQL_LOCK
fi
return $OCF_SUCCESS
}
pgsql_replication_stop() {
local rc
- exec_with_retry 5 $CRM_MASTER -v $CAN_NOT_PROMOTE
+ exec_with_retry 5 ocf_promotion_score -v $CAN_NOT_PROMOTE
delete_xlog_location
if ! pgsql_status
then
ocf_log info "PostgreSQL is already stopped."
change_pgsql_status "$NODENAME" "STOP"
return $OCF_SUCCESS
fi
pgsql_real_stop slave
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
change_pgsql_status "$NODENAME" "UNKNOWN"
return $rc
fi
change_pgsql_status "$NODENAME" "STOP"
set_async_mode_all
delete_master_baseline
return $OCF_SUCCESS
}
#pgsql_stop: pgsql_real_stop() wrapper for replication
pgsql_stop() {
if ! is_replication; then
pgsql_real_stop
return $?
else
pgsql_replication_stop
return $?
fi
}
#
# pgsql_status: is PostgreSQL up?
#
pgsql_status() {
if [ -f $PIDFILE ]
then
PID=`head -n 1 $PIDFILE`
runasowner "kill -s 0 $PID >/dev/null 2>&1"
return $?
fi
# No PID file
false
}
pgsql_wal_receiver_status() {
local PID
local receiver_parent_pids
local pgsql_real_monitor_status=$1
PID=`head -n 1 $PIDFILE`
receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al\s*receiver" | cut -d " " -f 3`
if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal"
return 0
fi
if [ $pgsql_real_monitor_status -eq "$OCF_RUNNING_MASTER" ]; then
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal (master)"
return 0
fi
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR"
ocf_log warn "wal receiver process is not running"
return 1
}
#
# pgsql_real_monitor
#
pgsql_real_monitor() {
local loglevel
local rc
local output
# Set the log level of the error message
loglevel=${1:-err}
if ! pgsql_status
then
ocf_log info "PostgreSQL is down"
return $OCF_NOT_RUNNING
fi
if is_replication; then
#Check replication state
output=`exec_sql "${CHECK_MS_SQL}"`
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status."
return $OCF_ERR_GENERIC
fi
case "$output" in
f) ocf_log debug "PostgreSQL is running as a primary."
if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then
if ocf_is_probe; then
# Set initial score for primary.
- exec_with_retry 0 $CRM_MASTER -v $PROMOTE_ME
+ exec_with_retry 0 ocf_promotion_score -v $PROMOTE_ME
fi
return $OCF_RUNNING_MASTER
fi
;;
t) ocf_log debug "PostgreSQL is running as a hot standby."
if ocf_is_probe; then
# Set initial score for hot standby.
- exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE
+ exec_with_retry 0 ocf_promotion_score -v $CAN_NOT_PROMOTE
fi
return $OCF_SUCCESS;;
*) ocf_exit_reason "$CHECK_MS_SQL output is $output"
return $OCF_ERR_GENERIC;;
esac
fi
OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"`
runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \
-c '$OCF_RESKEY_monitor_sql'"
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running."
return $OCF_ERR_GENERIC
fi
if is_replication; then
return $OCF_RUNNING_MASTER
fi
return $OCF_SUCCESS
}
pgsql_replication_monitor() {
local rc
rc=$1
if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then
return $rc
fi
# If I am Master
if [ $rc -eq $OCF_RUNNING_MASTER ]; then
change_data_status "$NODENAME" "LATEST"
change_pgsql_status "$NODENAME" "PRI"
control_slave_status || return $OCF_ERR_GENERIC
if [ "$RE_CONTROL_SLAVE" = "true" ]; then
sleep 2
ocf_log info "re-controlling slave status."
RE_CONTROL_SLAVE="none"
control_slave_status || return $OCF_ERR_GENERIC
fi
return $rc
fi
# I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor,
# so I will get master node name using crm_mon -n
print_crm_mon | grep -q -i -E "<resource id=\"${RESOURCE_NAME}\" .* role=\"(Promoted|Master)\""
if [ $? -ne 0 ] ; then
# If I am Slave and Master is not exist
ocf_log info "Master does not exist."
change_pgsql_status "$NODENAME" "HS:alone"
have_master_right
if [ $? -eq 0 ]; then
rm -f ${XLOG_NOTE_FILE}.*
fi
else
output=`exec_with_retry 0 $CRM_ATTR_FOREVER -N "$NODENAME" \
-n "$PGSQL_DATA_STATUS_ATTR" -G -q`
if [ "$output" = "DISCONNECT" ]; then
change_pgsql_status "$NODENAME" "HS:alone"
fi
fi
return $rc
}
#pgsql_monitor: pgsql_real_monitor() wrapper for replication
pgsql_monitor() {
local rc
pgsql_real_monitor
rc=$?
if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
pgsql_wal_receiver_status $rc
fi
if ! is_replication; then
return $rc
else
pgsql_replication_monitor $rc
return $?
fi
}
# pgsql_post_demote
pgsql_post_demote() {
DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE"
if [ "$DEMOTE_NODE" != "$NODENAME" ]; then
if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | tr '[A-Z]' '[a-z]' | grep $NODENAME; then
show_master_baseline
change_pgsql_status "$NODENAME" "HS:alone"
fi
fi
return $OCF_SUCCESS
}
pgsql_pre_promote() {
local master_baseline
local my_master_baseline
local cmp_location
local number_of_nodes
# If my data is newer than new master's one, I fail my resource.
PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
number_of_nodes=`echo $NODE_LIST | wc -w`
if [ $number_of_nodes -ge 3 -a \
"$OCF_RESKEY_rep_mode" = "sync" -a \
"$PROMOTE_NODE" != "$NODENAME" ]; then
master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \
"$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
if [ $? -eq 0 ]; then
my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \
"$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
# get older location
cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\
sort | head -1`
if [ "$cmp_location" != "$my_master_baseline" ]; then
# We used to set the failcount to INF for the resource here in
# order to move the master to the other node. However, setting
# the failcount should be done only by the CRM and so this use
# got deprecated in pacemaker version 1.1.17. Now we do the
# "ban resource from the node".
ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline"
exec_with_retry 0 $CRM_RESOURCE -B -r $OCF_RESOURCE_INSTANCE -N $NODENAME -Q
return $OCF_ERR_GENERIC
fi
fi
fi
return $OCF_SUCCESS
}
pgsql_notify() {
local type="${OCF_RESKEY_CRM_meta_notify_type}"
local op="${OCF_RESKEY_CRM_meta_notify_operation}"
local rc
if ! is_replication; then
return $OCF_SUCCESS
fi
ocf_log debug "notify: ${type} for ${op}"
case $type in
pre)
case $op in
promote)
pgsql_pre_promote
return $?
;;
esac
;;
post)
case $op in
promote)
delete_xlog_location
PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
if [ "$PROMOTE_NODE" != "$NODENAME" ]; then
delete_master_baseline
fi
return $OCF_SUCCESS
;;
demote)
pgsql_post_demote
return $?
;;
start|stop)
MASTER_NODE=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | \
sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
if [ "$NODENAME" = "$MASTER_NODE" ]; then
control_slave_status
fi
return $OCF_SUCCESS
;;
esac
;;
esac
return $OCF_SUCCESS
}
control_slave_status() {
local rc
local data_status
local target
local all_data_status
local tmp_data_status
local number_of_nodes
all_data_status=`exec_sql "${CHECK_REPLICATION_STATE_SQL}"`
rc=$?
if [ $rc -eq 0 ]; then
if [ -n "$all_data_status" ]; then
all_data_status=`echo $all_data_status | sed "s/\n/ /g"`
fi
else
report_psql_error $rc err "Can't get PostgreSQL replication status."
return 1
fi
number_of_nodes=`echo $NODE_LIST | wc -w`
for target in $NODE_LIST; do
if [ "$target" = "$NODENAME" ]; then
continue
fi
data_status="DISCONNECT"
if [ -n "$all_data_status" ]; then
for tmp_data_status in $all_data_status; do
if ! echo $tmp_data_status | grep -q "^${target}|"; then
continue
fi
data_status=`echo $tmp_data_status | cut -d "|" -f 2,3`
ocf_log debug "node_name and data_status is $tmp_data_status"
break
done
fi
case "$data_status" in
"STREAMING|SYNC")
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_PROMOTE"
change_pgsql_status "$target" "HS:sync"
;;
"STREAMING|ASYNC")
change_data_status "$target" "$data_status"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
change_master_score "$target" "$CAN_NOT_PROMOTE"
set_sync_mode "$target"
else
if [ $number_of_nodes -le 2 ]; then
change_master_score "$target" "$CAN_PROMOTE"
else
# I can't determine which slave's data is newest in async mode.
change_master_score "$target" "$CAN_NOT_PROMOTE"
fi
fi
change_pgsql_status "$target" "HS:async"
;;
"STREAMING|POTENTIAL")
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_NOT_PROMOTE"
change_pgsql_status "$target" "HS:potential"
;;
"DISCONNECT")
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_NOT_PROMOTE"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
set_async_mode "$target"
fi
;;
*)
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_NOT_PROMOTE"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
set_async_mode "$target"
fi
change_pgsql_status "$target" "HS:connected"
;;
esac
done
return 0
}
have_master_right() {
local old
local new
local output
local data_status
local node
local mylocation
local count
local newestXlog
local oldfile
local newfile
ocf_log debug "Checking if I have a master right."
data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \
"$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null`
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
"$data_status" != "LATEST" ]; then
ocf_log warn "My data is out-of-date. status=$data_status"
return 1
fi
else
if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
"$data_status" != "STREAMING|ASYNC" -a \
"$data_status" != "LATEST" ]; then
ocf_log warn "My data is out-of-date. status=$data_status"
return 1
fi
fi
ocf_log info "My data status=$data_status."
show_xlog_location
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to show my xlog location."
exit $OCF_ERR_GENERIC
fi
old=0
for count in `seq $OCF_RESKEY_xlog_check_count`; do
if [ -f ${XLOG_NOTE_FILE}.$count ]; then
old=$count
continue
fi
break
done
new=`expr $old + 1`
# get xlog locations of all nodes
for node in ${NODE_LIST}; do
output=`$CRM_ATTR_REBOOT -N "$node" -n \
"$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null`
if [ $? -ne 0 ]; then
ocf_log warn "Can't get $node xlog location."
continue
else
ocf_log info "$node xlog location : $output"
echo "$node $output" >> ${XLOG_NOTE_FILE}.${new}
if [ "$node" = "$NODENAME" ]; then
mylocation=$output
fi
fi
done
oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null`
newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null`
if [ "$oldfile" != "$newfile" ]; then
# reset counter
rm -f ${XLOG_NOTE_FILE}.*
printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
return 1
fi
if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then
newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \
head -1 | cut -d " " -f 2`
if [ "$newestXlog" = "$mylocation" ]; then
ocf_log info "I have a master right."
- exec_with_retry 5 $CRM_MASTER -v $PROMOTE_ME
+ exec_with_retry 5 ocf_promotion_score -v $PROMOTE_ME
return 0
fi
change_data_status "$NODENAME" "DISCONNECT"
ocf_log info "I don't have correct master data."
# reset counter
rm -f ${XLOG_NOTE_FILE}.*
printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
fi
return 1
}
is_replication() {
if [ "$OCF_RESKEY_rep_mode" != "none" -a "$OCF_RESKEY_rep_mode" != "slave" ]; then
return 0
fi
return 1
}
use_replication_slot() {
if [ -n "$OCF_RESKEY_replication_slot_name" ]; then
return 0
fi
return 1
}
create_replication_slot_name() {
local number_of_nodes=0
local target
local replication_slot_name
local replication_slot_name_list_tmp
local replication_slot_name_list
if [ -n "$NODE_LIST" ]; then
number_of_nodes=`echo $NODE_LIST | wc -w`
fi
if [ $number_of_nodes -le 0 ]; then
replication_slot_name_list=""
# The Master node should have some slots equal to the number of Slaves, and
# the Slave nodes connect to their dedicated slot on the Master.
# To ensuring that the slots name are each unique, add postfix to $OCF_RESKEY_replication_slot.
# The postfix is "_$target".
else
for target in $NODE_LIST
do
if [ "$target" != "$NODENAME" ]; then
# The Uppercase, "-" and "." don't allow to use in slot_name.
# If the NODENAME contains them, convert upper case to lower case and "_" and "." to "_".
target=`echo "$target" | tr 'A-Z.-' 'a-z__'`
replication_slot_name="$OCF_RESKEY_replication_slot_name"_"$target"
replication_slot_name_list_tmp="$replication_slot_name_list"
replication_slot_name_list="$replication_slot_name_list_tmp $replication_slot_name"
fi
done
fi
echo $replication_slot_name_list
}
delete_replication_slot(){
DELETE_REPLICATION_SLOT_sql="SELECT pg_drop_replication_slot('$1');"
output=`exec_sql "$DELETE_REPLICATION_SLOT_sql"`
return $?
}
delete_replication_slots() {
local replication_slot_name_list
local replication_slot_name
replication_slot_name_list=`create_replication_slot_name`
ocf_log debug "replication slot names are $replication_slot_name_list."
for replication_slot_name in $replication_slot_name_list
do
if [ `check_replication_slot $replication_slot_name` = "1" ]; then
delete_replication_slot $replication_slot_name
if [ $? -eq 0 ]; then
ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)."
else
ocf_exit_reason "$output"
return $OCF_ERR_GENERIC
fi
fi
done
}
create_replication_slots() {
local replication_slot_name
local replication_slot_name_list
local output
local rc
local CREATE_REPLICATION_SLOT_sql
local DELETE_REPLICATION_SLOT_sql
replication_slot_name_list=`create_replication_slot_name`
ocf_log debug "replication slot names are $replication_slot_name_list."
for replication_slot_name in $replication_slot_name_list
do
# If the same name slot is already exists, initialize(delete and create) the slot.
if [ `check_replication_slot $replication_slot_name` = "1" ]; then
delete_replication_slot $replication_slot_name
if [ $? -eq 0 ]; then
ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)."
else
ocf_exit_reason "$output"
return $OCF_ERR_GENERIC
fi
fi
CREATE_REPLICATION_SLOT_sql="SELECT pg_create_physical_replication_slot('$replication_slot_name');"
output=`exec_sql "$CREATE_REPLICATION_SLOT_sql"`
rc=$?
if [ $rc -eq 0 ]; then
ocf_log info "PostgreSQL creates the replication slot($replication_slot_name)."
else
ocf_exit_reason "$output"
return $OCF_ERR_GENERIC
fi
done
return 0
}
# This function check the replication slot does exists.
check_replication_slot(){
local replication_slot_name=$1
local output
local CHECK_REPLICATION_SLOT_sql="SELECT count(*) FROM pg_replication_slots WHERE slot_name = '$replication_slot_name'"
output=`exec_sql "$CHECK_REPLICATION_SLOT_sql"`
echo "$output"
}
# On postgreSQL 10 or later, "location" means "lsn".
get_my_location() {
local rc
local output
local replay_loc
local receive_loc
local output1
local output2
local log1
local log2
local newer_location
output=`exec_sql "$CHECK_XLOG_LOC_SQL"`
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc err "Can't get my xlog location."
return 1
fi
replay_loc=`echo $output | cut -d "|" -f 1`
receive_loc=`echo $output | cut -d "|" -f 2`
output1=`echo "$replay_loc" | cut -d "/" -f 1`
output2=`echo "$replay_loc" | cut -d "/" -f 2`
log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
replay_loc="${log1}${log2}"
output1=`echo "$receive_loc" | cut -d "/" -f 1`
output2=`echo "$receive_loc" | cut -d "/" -f 2`
log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
receive_loc="${log1}${log2}"
newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1`
echo "$newer_location"
return 0
}
# On postgreSQL 10 or later, "xlog_location" means "wal_lsn".
show_xlog_location() {
local location
location=`get_my_location` || return 1
exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location"
}
# On postgreSQL 10 or later, "xlog_location" means "wal_lsn".
delete_xlog_location() {
exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D
}
show_master_baseline() {
local rc
local location
location=`get_my_location`
ocf_log info "My master baseline : $location."
exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location"
}
delete_master_baseline() {
exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D
}
set_async_mode_all() {
[ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0
ocf_log info "Set all nodes into async mode."
runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\""
if [ $? -ne 0 ]; then
ocf_exit_reason "Can't set all nodes into async mode."
return 1
fi
return 0
}
set_async_mode() {
cat $REP_MODE_CONF | grep -q -E "(\"$1\")|([,' ]$1[,' ])"
if [ $? -eq 0 ]; then
ocf_log info "Setup $1 into async mode."
runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\""
else
ocf_log debug "$1 is already in async mode."
return 0
fi
exec_with_retry 0 reload_conf
}
set_sync_mode() {
local sync_node_in_conf
sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2`
if [ -n "$sync_node_in_conf" ]; then
ocf_log debug "$sync_node_in_conf is already sync mode."
else
ocf_log info "Setup $1 into sync mode."
runasowner -q err "echo \"synchronous_standby_names = '\\\"$1\\\"'\" > \"$REP_MODE_CONF\""
[ "$RE_CONTROL_SLAVE" = "false" ] && RE_CONTROL_SLAVE="true"
exec_with_retry 0 reload_conf
fi
}
reload_conf() {
# Invoke pg_ctl
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload"
if [ $? -eq 0 ]; then
ocf_log info "Reload configuration file."
else
ocf_exit_reason "Can't reload configuration file."
return 1
fi
return 0
}
user_recovery_conf() {
local nodename_tmp
# put archive_cleanup_command and recovery_end_command only when defined by user
if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then
echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'"
fi
if [ -n "$OCF_RESKEY_recovery_end_command" ]; then
echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'"
fi
if use_replication_slot; then
nodename_tmp=`echo "$NODENAME" | tr 'A-Z.-' 'a-z__'`
echo "primary_slot_name = '${OCF_RESKEY_replication_slot_name}_$nodename_tmp'"
fi
}
make_recovery_conf() {
runasowner "touch $RECOVERY_CONF"
if [ $? -ne 0 ]; then
ocf_exit_reason "Can't create recovery.conf."
return 1
fi
cat > $RECOVERY_CONF <<END
primary_conninfo = 'host=${OCF_RESKEY_master_ip} port=${OCF_RESKEY_pgport} user=${OCF_RESKEY_repuser} application_name=${NODENAME} ${OCF_RESKEY_primary_conninfo_opt}'
restore_command = '${OCF_RESKEY_restore_command}'
recovery_target_timeline = 'latest'
END
if "${USE_STANDBY_SIGNAL}"; then
# create a standby.signal to start standby server.
runasowner "touch ${OCF_RESKEY_pgdata}/standby.signal"
if [ $? -ne 0 ]; then
ocf_exit_reason "Can't create ${OCF_RESKEY_pgdata}/standby.signal."
return 1
fi
else
cat >> $RECOVERY_CONF <<END
standby_mode = 'on'
END
fi
user_recovery_conf >> $RECOVERY_CONF
ocf_log debug "Created recovery.conf. host=${OCF_RESKEY_master_ip}, user=${OCF_RESKEY_repuser}"
return 0
}
# change pgsql-status.
# arg1:node, arg2: value
change_pgsql_status() {
local output
if ! is_node_online $1; then
return 0
fi
output=`$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -G -q 2>/dev/null`
if [ "$output" != "$2" ]; then
# If slave's disk is broken, RA cannot read PID file
# and misjudges the PostgreSQL as down while it is running.
# It causes overwriting of pgsql-status by Master because replication is still connected.
if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then
if [ "$1" != "$NODENAME" ]; then
ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited."
return 0
fi
fi
ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2."
exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2"
fi
return 0
}
# change pgsql-data-status.
# arg1:node, arg2: value
change_data_status() {
local output
if ! node_exist $1; then
return 0
fi
while :
do
output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null`
if [ "$output" != "$2" ]; then
ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2."
exec_with_retry 0 exec_with_timeout 0 "$CRM_ATTR_FOREVER" -N $1 -n $PGSQL_DATA_STATUS_ATTR -v "$2"
else
break
fi
done
return 0
}
# set master-score
# arg1:node, arg2: score, arg3: resoure
set_master_score() {
local current_score
current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null`
if [ -n "$current_score" -a "$current_score" != "$2" ]; then
ocf_log info "Changing $3 master score on $1 : $current_score->$2."
exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "master-$3" -v "$2"
fi
return 0
}
# change master-score
# arg1:node, arg2: score
change_master_score() {
local instance
if ! is_node_online $1; then
return 0
fi
if echo $OCF_RESOURCE_INSTANCE | grep -q ":"; then
# If Pacemaker version is 1.0.x
instance=0
while :
do
if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then
break
fi
if [ "${RESOURCE_NAME}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then
instance=`expr $instance + 1`
continue
fi
set_master_score $1 $2 "${RESOURCE_NAME}:${instance}" || return 1
instance=`expr $instance + 1`
done
else
# If globally-unique=false and Pacemaker version is 1.1.8 or higher
# Master/Slave resource has no instance number
set_master_score $1 $2 ${RESOURCE_NAME} || return 1
fi
return 0
}
report_psql_error()
{
local rc
local loglevel
local message
rc=$1
loglevel=${2:-err}
message="$3"
ocf_log $loglevel "$message rc=$rc"
if [ $rc -eq 1 ]; then
ocf_exit_reason "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command."
elif [ $rc -eq 2 ]; then
ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command."
elif [ $rc -eq 3 ]; then
ocf_exit_reason "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command."
fi
}
#
# timeout management function
# arg1 timeout >= 0 (if arg1 is 0, OCF_RESKEY_crm_attr_timeout is used.)
# arg2 : command
# arg3 : command's args
exec_with_timeout() {
local func_pid
local count=$OCF_RESKEY_crm_attr_timeout
local rc
if [ "$1" -ne 0 ]; then
count=$1
fi
shift
$* &
func_pid=$!
sleep .1
while kill -s 0 $func_pid >/dev/null 2>&1; do
sleep 1
count=`expr $count - 1`
if [ $count -le 0 ]; then
ocf_exit_reason "\"$*\" (pid=$func_pid) timed out."
kill -s 9 $func_pid >/dev/null 2>&1
return 1
fi
ocf_log info "Waiting($count). \"$*\" (pid=$func_pid)."
done
wait $func_pid
}
# retry command when command doesn't return 0
# arg1 : count >= 0 (if arg1 is 0, it retries command in infinitum(1day))
# arg2..argN : command and args
exec_with_retry() {
local count="86400"
local output
local rc
if [ "$1" -ne 0 ]; then
count=$1
fi
shift
while [ $count -gt 0 ]; do
output=`$*`
rc=$?
if [ $rc -ne 0 ]; then
ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"."
count=`expr $count - 1`
sleep 1
else
printf "${output}"
return 0
fi
done
ocf_exit_reason "giving up executing \"$*\""
return $rc
}
is_node_online() {
print_crm_mon | grep -q -i "<node name=\"$1\" .* online=\"true\""
}
node_exist() {
print_crm_mon | grep -q -i "<node name=\"$1\" .* online"
}
check_binary2() {
if ! have_binary "$1"; then
ocf_exit_reason "Setup problem: couldn't find command: $1"
return 1
fi
return 0
}
check_config() {
local rc=0
if [ ! -f "$1" ]; then
if ocf_is_probe; then
ocf_log info "Configuration file is $1 not readable during probe."
rc=1
else
ocf_exit_reason "Configuration file $1 doesn't exist"
rc=2
fi
fi
return $rc
}
# Validate most critical parameters
pgsql_validate_all() {
local version
local check_config_rc
local rep_mode_string
local recovery_conf_string
local socket_directories
local rc
version=`cat $OCF_RESKEY_pgdata/PG_VERSION`
if ! check_binary2 "$OCF_RESKEY_pgctl" ||
! check_binary2 "$OCF_RESKEY_psql"; then
return $OCF_ERR_INSTALLED
fi
check_config "$OCF_RESKEY_config"
check_config_rc=$?
[ $check_config_rc -eq 2 ] && return $OCF_ERR_INSTALLED
if [ $check_config_rc -eq 0 ]; then
ocf_version_cmp "$version" "9.3"
if [ $? -eq 0 ]; then
: ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`}
else
# unix_socket_directories is used by PostgreSQL 9.3 or higher.
socket_directories=`get_pgsql_param unix_socket_directories`
if [ -n "$socket_directories" ]; then
# unix_socket_directories may have multiple socket directories and the pgsql RA can not know which directory is used for psql command.
# Therefore, the user must set OCF_RESKEY_socketdir explicitly.
if [ -z "$OCF_RESKEY_socketdir" ]; then
ocf_exit_reason "In PostgreSQL 9.3 or higher, socketdir can't be empty if you define unix_socket_directories in the postgresql.conf."
return $OCF_ERR_CONFIGURED
fi
fi
fi
fi
getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist";
return $OCF_ERR_INSTALLED;
fi
if ocf_is_probe; then
ocf_log info "Don't check $OCF_RESKEY_pgdata during probe"
else
if ! runasowner "test -w $OCF_RESKEY_pgdata"; then
ocf_exit_reason "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba"
return $OCF_ERR_PERM;
fi
fi
if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ]
then
ocf_exit_reason "monitor password can't be empty"
return $OCF_ERR_CONFIGURED
fi
if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ]
then
ocf_exit_reason "monitor_user has to be set if monitor_password is set"
return $OCF_ERR_CONFIGURED
fi
if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then
ocf_exit_reason "Replication mode needs PostgreSQL 9.1 or higher."
return $OCF_ERR_INSTALLED
fi
ocf_version_cmp "$version" "12"
rc=$?
if [ $rc -eq 1 ]||[ $rc -eq 2 ]; then
# change the standby method for PosrgreSQL 12 or later.
USE_STANDBY_SIGNAL=true
# change the path to recovery.conf because it cause PostgreSQL start error.
RECOVERY_CONF=${OCF_RESKEY_tmpdir}/recovery.conf
if [ $check_config_rc -eq 0 ]; then
# adding recovery parameters to postgresql.conf.
recovery_conf_string="include '$RECOVERY_CONF' # added by pgsql RA"
if ! grep -q "^[[:space:]]*$recovery_conf_string" $OCF_RESKEY_config; then
ocf_log info "adding include directive $recovery_conf_string into $OCF_RESKEY_config"
echo "$recovery_conf_string" >> $OCF_RESKEY_config
fi
fi
fi
if [ ! -n "$OCF_RESKEY_master_ip" ]; then
ocf_exit_reason "master_ip can't be empty."
return $OCF_ERR_CONFIGURED
fi
fi
if is_replication; then
REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf
PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock
XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note
- CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot"
CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever"
CRM_RESOURCE="${HA_SBIN_DIR}/crm_resource"
CAN_NOT_PROMOTE="-INFINITY"
CAN_PROMOTE="100"
PROMOTE_ME="1000"
CHECK_MS_SQL="select pg_is_in_recovery()"
CHECK_SYNCHRONOUS_STANDBY_NAMES_SQL="show synchronous_standby_names"
ocf_version_cmp "$version" "10"
rc=$?
if [ $rc -eq 1 ]||[ $rc -eq 2 ]; then
CHECK_XLOG_LOC_SQL="select pg_last_wal_replay_lsn(),pg_last_wal_receive_lsn()"
else
CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()"
fi
CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication"
PGSQL_STATUS_ATTR="${RESOURCE_NAME}-status"
PGSQL_DATA_STATUS_ATTR="${RESOURCE_NAME}-data-status"
PGSQL_XLOG_LOC_NAME="${RESOURCE_NAME}-xlog-loc"
PGSQL_MASTER_BASELINE="${RESOURCE_NAME}-master-baseline"
NODE_LIST=`echo $OCF_RESKEY_node_list | tr '[A-Z]' '[a-z]'`
RE_CONTROL_SLAVE="false"
if ! ocf_is_ms; then
ocf_exit_reason "Replication(rep_mode=async or sync) requires Master/Slave configuration."
return $OCF_ERR_CONFIGURED
fi
if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then
ocf_exit_reason "Invalid rep_mode : $OCF_RESKEY_rep_mode"
return $OCF_ERR_CONFIGURED
fi
if [ ! -n "$NODE_LIST" ]; then
ocf_exit_reason "node_list can't be empty."
return $OCF_ERR_CONFIGURED
fi
if [ $check_config_rc -eq 0 ]; then
rep_mode_string="include '$REP_MODE_CONF' # added by pgsql RA"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
if ! grep -q "^[[:space:]]*$rep_mode_string" $OCF_RESKEY_config; then
ocf_log info "adding include directive into $OCF_RESKEY_config"
echo "$rep_mode_string" >> $OCF_RESKEY_config
fi
else
if grep -q "$rep_mode_string" $OCF_RESKEY_config; then
ocf_log info "deleting include directive from $OCF_RESKEY_config"
rep_mode_string=`echo $rep_mode_string | sed -e 's|/|\\\\/|g'`
sed -i "/$rep_mode_string/d" $OCF_RESKEY_config
fi
fi
fi
if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then
ocf_exit_reason "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba"
return $OCF_ERR_PERM
fi
fi
if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
if ocf_is_ms; then
ocf_exit_reason "Replication(rep_mode=slave) does not support Master/Slave configuration."
return $OCF_ERR_CONFIGURED
fi
fi
if use_replication_slot; then
ocf_version_cmp "$version" "9.4"
rc=$?
if [ $rc -eq 0 ]||[ $rc -eq 3 ]; then
ocf_exit_reason "Replication slot needs PostgreSQL 9.4 or higher."
return $OCF_ERR_CONFIGURED
fi
echo "$OCF_RESKEY_replication_slot_name" | grep -q -e '[^a-z0-9_]'
if [ $? -eq 0 ]; then
ocf_exit_reason "Invalid replication_slot_name($OCF_RESKEY_replication_slot_name). only use lower case letters, numbers, and the underscore character."
return $OCF_ERR_CONFIGURED
fi
fi
return $OCF_SUCCESS
}
#
# Check if we need to create a log file
#
check_log_file() {
if [ ! -e "$1" ]
then
touch $1 > /dev/null 2>&1
chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1
fi
#Check if $OCF_RESKEY_pgdba can write to the log file
if ! runasowner "test -w $1"
then
return 1
fi
return 0
}
#
# Check if we need to create stats temp directory in tmpfs
#
check_stat_temp_directory() {
local stats_temp
stats_temp=`get_pgsql_param stats_temp_directory`
if [ -z "$stats_temp" ]; then
return
fi
if [ "${stats_temp#/}" = "$stats_temp" ]; then
stats_temp="$OCF_RESKEY_pgdata/$stats_temp"
fi
if [ -d "$stats_temp" ]; then
return
fi
if ! mkdir -p "$stats_temp"; then
ocf_exit_reason "Can't create directory $stats_temp"
exit $OCF_ERR_PERM
fi
if ! chown $OCF_RESKEY_pgdba: "$stats_temp"; then
ocf_exit_reason "Can't change ownership for $stats_temp"
exit $OCF_ERR_PERM
fi
if ! chmod 700 "$stats_temp"; then
ocf_exit_reason "Can't change permissions for $stats_temp"
exit $OCF_ERR_PERM
fi
}
#
# Check socket directory
#
check_socket_dir() {
if [ ! -d "$OCF_RESKEY_socketdir" ]; then
if ! mkdir "$OCF_RESKEY_socketdir"; then
ocf_exit_reason "Can't create directory $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
if ! chown $OCF_RESKEY_pgdba:`getent passwd \
$OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir"
then
ocf_exit_reason "Can't change ownership for $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
if ! chmod 2775 "$OCF_RESKEY_socketdir"; then
ocf_exit_reason "Can't change permissions for $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
else
if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then
ocf_exit_reason "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
rm $OCF_RESKEY_socketdir/test.$$
fi
}
print_crm_mon() {
if [ -z "$CRM_MON_OUTPUT" ]; then
ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.1.0"
res=$?
if [ -z "$OCF_RESKEY_crm_feature_set" ] || [ $res -eq 2 ]; then
XMLOPT="--output-as=xml"
ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.2.0"
if [ $? -eq 1 ]; then
crm_mon -1 $XMLOPT >/dev/null 2>&1
if [ $? -ne 0 ]; then
XMLOPT="--as-xml"
fi
fi
else
XMLOPT="--as-xml"
fi
CRM_MON_OUTPUT=`exec_with_retry 0 crm_mon -1 $XMLOPT`
fi
printf "${CRM_MON_OUTPUT}\n"
}
#
# 'main' starts here...
#
if [ $# -ne 1 ]
then
usage
exit $OCF_ERR_GENERIC
fi
PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid
BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label
RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1`
PGSQL_WAL_RECEIVER_STATUS_ATTR="${RESOURCE_NAME}-receiver-status"
RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf
NODENAME=$(ocf_local_nodename | tr '[A-Z]' '[a-z]')
USE_STANDBY_SIGNAL=false
case "$1" in
methods) pgsql_methods
exit $?;;
meta-data) meta_data
exit $OCF_SUCCESS;;
esac
pgsql_validate_all
rc=$?
[ "$1" = "validate-all" ] && exit $rc
if [ $rc -ne 0 ]
then
case "$1" in
stop) if is_replication; then
change_pgsql_status "$NODENAME" "UNKNOWN"
fi
exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $OCF_NOT_RUNNING;;
*) exit $rc;;
esac
fi
US=`id -u -n`
if [ $US != root -a $US != $OCF_RESKEY_pgdba ]
then
ocf_exit_reason "$0 must be run as root or $OCF_RESKEY_pgdba"
exit $OCF_ERR_GENERIC
fi
# make psql command options
if [ -n "$OCF_RESKEY_monitor_user" ]; then
PGUSER=$OCF_RESKEY_monitor_user; export PGUSER
PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD
psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb"
else
psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb"
fi
if [ -n "$OCF_RESKEY_pghost" ]; then
psql_options="$psql_options -h $OCF_RESKEY_pghost"
else
if [ -n "$OCF_RESKEY_socketdir" ]; then
psql_options="$psql_options -h $OCF_RESKEY_socketdir"
fi
fi
if [ -n "$OCF_RESKEY_pgport" ]; then
export PGPORT=$OCF_RESKEY_pgport
fi
if [ -n "$OCF_RESKEY_pglibs" ]; then
if [ -n "$LD_LIBRARY_PATH" ]; then
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$OCF_RESKEY_pglibs
else
export LD_LIBRARY_PATH=$OCF_RESKEY_pglibs
fi
fi
# What kind of method was invoked?
case "$1" in
status) if pgsql_status
then
ocf_log info "PostgreSQL is up"
exit $OCF_SUCCESS
else
ocf_log info "PostgreSQL is down"
exit $OCF_NOT_RUNNING
fi;;
monitor) pgsql_monitor
exit $?;;
start) pgsql_start
exit $?;;
promote) pgsql_promote
exit $?;;
demote) pgsql_demote
exit $?;;
notify) pgsql_notify
exit $?;;
stop) pgsql_stop
exit $?;;
*)
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/sg_persist.in b/heartbeat/sg_persist.in
index 0497cc469..16048ea6f 100644
--- a/heartbeat/sg_persist.in
+++ b/heartbeat/sg_persist.in
@@ -1,695 +1,694 @@
#!@BASH_SHELL@
#
#
# OCF Resource Agent compliant PERSISTENT SCSI RESERVATION resource script.
#
#
# Copyright (c) 2011 Evgeny Nifontov and lwang@suse.com All Rights Reserved.
#
# "Heartbeat drbd OCF Resource Agent: 2007, Lars Marowsky-Bree" was used
# as example of multistate OCF Resource Agent.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#
# OCF instance parameters
# OCF_RESKEY_binary
# OCF_RESKEY_devs
# OCF_RESKEY_required_devs_nof
# OCF_RESKEY_reservation_type
# OCF_RESKEY_master_score_base
# OCF_RESKEY_master_score_dev_factor
# OCF_RESKEY_master_score_delay
#
# TODO
#
# 1) PROBLEM: devices which were not accessible during 'start' action, will be never registered/reserved
# TODO: 'Master' and 'Salve' registers new devs in 'monitor' action
# TODO: 'Master' reserves new devs in 'monitor' action
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Parameter defaults
OCF_RESKEY_binary_default="sg_persist" # binary name for the resource
OCF_RESKEY_devs_default="" # device list
OCF_RESKEY_required_devs_nof_default="1" # number of required devices
OCF_RESKEY_reservation_type_default="1" # reservation type
OCF_RESKEY_master_score_base_default="0" # master score base
OCF_RESKEY_master_score_dev_factor_default="100" # device factor for master score
OCF_RESKEY_master_score_delay_default="30" # delay for master score
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_devs=${OCF_RESKEY_devs_default}}
: ${OCF_RESKEY_required_devs_nof=${OCF_RESKEY_required_devs_nof_default}}
: ${OCF_RESKEY_reservation_type=${OCF_RESKEY_reservation_type_default}}
: ${OCF_RESKEY_master_score_base=${OCF_RESKEY_master_score_base_default}}
: ${OCF_RESKEY_master_score_dev_factor=${OCF_RESKEY_master_score_dev_factor_default}}
: ${OCF_RESKEY_master_score_delay=${OCF_RESKEY_master_score_delay_default}}
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="sg_persist" version="1.1">
<version>1.0</version>
<longdesc lang="en">
This resource agent manages SCSI PERSISTENT RESERVATIONS.
"sg_persist" from sg3_utils is used, please see its documentation.
Should be used as multistate (Master/Slave) resource
Slave registers its node id ("crm_node -i") as reservation key ( --param-rk ) on each device in the "devs" list.
Master reserves all devices from "devs" list with reservation "--prout-type" value from "reservation_type" parameter.
</longdesc>
<shortdesc lang="en">Manages SCSI PERSISTENT RESERVATIONS</shortdesc>
<parameters>
<parameter name="binary" unique="0">
<longdesc lang="en">
The name of the binary that manages the resource.
</longdesc>
<shortdesc lang="en">the binary name of the resource</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}"/>
</parameter>
<parameter name="devs" unique="0" required="1">
<longdesc lang="en">
Device list. Multiple devices can be listed with blank space as separator.
Shell wildcards are allowed.
</longdesc>
<shortdesc lang="en">device list</shortdesc>
<content type="string"/>
</parameter>
<parameter name="required_devs_nof" unique="0" required="0">
<longdesc lang="en">
Minimum number of "working" devices from device list
1) existing
2) "sg_persist --read-keys \$device" works (Return code 0)
resource actions "start","monitor","promote" and "validate-all" return "\$OCF_ERR_INSTALLED"
if the actual number of "working" devices is less then "required_devs_nof".
resource actions "stop" and "demote" tries to remove reservations and registration keys from
all working devices, but always return "\$OCF_SUCCESS"
</longdesc>
<shortdesc lang="en">minimum number of working devices</shortdesc>
<content type="string" default="${OCF_RESKEY_required_devs_nof_default}"/>
</parameter>
<parameter name="reservation_type" unique="0" required="0">
<longdesc lang="en">
reservation type
</longdesc>
<shortdesc lang="en">reservation type</shortdesc>
<content type="string" default="${OCF_RESKEY_reservation_type_default}" />
</parameter>
<parameter name="master_score_base" unique="0" required="0">
<longdesc lang="en">
master_score_base value
"master_score_base" value is used in "master_score" calculation:
master_score = \$master_score_base + \$master_score_dev_factor * \$working_devs
if set to bigger value in sg_persist resource configuration on some node, this node will be "preferred" for master role.
</longdesc>
<shortdesc lang="en">base master_score value</shortdesc>
<content type="string" default="${OCF_RESKEY_master_score_base_default}" />
</parameter>
<parameter name="master_score_dev_factor" unique="0" required="0">
<longdesc lang="en">
Working device factor in master_score calculation
each "working" device provides additional value to "master_score",
so the node that sees more devices will be preferred for the "Master"-role
Setting it to 0 will disable this behavior.
</longdesc>
<shortdesc lang="en">working device factor in master_score calculation</shortdesc>
<content type="string" default="${OCF_RESKEY_master_score_dev_factor_default}" />
</parameter>
<parameter name="master_score_delay" unique="0" required="0">
<longdesc lang="en">
master/slave decreases/increases its master_score after delay of \$master_score_delay seconds
so if some device gets inaccessible, the slave decreases its master_score first and the resource will no be watched
and after this device reappears again the master increases its master_score first
this can work only if the master_score_delay is bigger then monitor interval on both master and slave
Setting it to 0 will disable this behavior.
</longdesc>
<shortdesc lang="en">master_score decrease/increase delay time</shortdesc>
<content type="string" default="${OCF_RESKEY_master_score_delay_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="30s" />
<action name="promote" timeout="30s" />
<action name="demote" timeout="30s" />
<action name="notify" timeout="30s" />
<action name="stop" timeout="30s" />
<action name="monitor" depth="0" timeout="20s" interval="29s" role="Unpromoted" />
<action name="monitor" depth="0" timeout="20s" interval="60s" role="Promoted" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="30s" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
sg_persist_init() {
if ! ocf_is_root ; then
ocf_log err "You must be root to perform this operation."
exit $OCF_ERR_PERM
fi
SG_PERSIST=${OCF_RESKEY_binary}
check_binary $SG_PERSIST
ROLE=$OCF_RESKEY_CRM_meta_role
NOW=$(date +%s)
RESOURCE="${OCF_RESOURCE_INSTANCE}"
MASTER_SCORE_VAR_NAME="master-${OCF_RESOURCE_INSTANCE//:/-}"
PENDING_VAR_NAME="pending-$MASTER_SCORE_VAR_NAME"
#only works with corocync
CRM_NODE="${HA_SBIN_DIR}/crm_node"
NODE_ID_DEC=$($CRM_NODE -i)
NODE=$($CRM_NODE -l | $GREP -w ^$NODE_ID_DEC)
NODE=${NODE#$NODE_ID_DEC }
NODE=${NODE% *}
MASTER_SCORE_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$MASTER_SCORE_VAR_NAME --node=$NODE"
- CRM_MASTER="${HA_SBIN_DIR}/crm_master --lifetime=reboot"
PENDING_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$PENDING_VAR_NAME --node=$NODE"
NODE_ID_HEX=$(printf '0x%x' $NODE_ID_DEC)
if [ -z "$NODE_ID_HEX" ]; then
ocf_log err "Couldn't get node id with \"$CRM_NODE\""
exit $OCF_ERR_INSTALLED
fi
ocf_log debug "$RESOURCE: NODE:$NODE, ROLE:$ROLE, NODE_ID DEC:$NODE_ID_DEC HEX:$NODE_ID_HEX"
DEVS=${OCF_RESKEY_devs}
REQUIRED_DEVS_NOF=${OCF_RESKEY_required_devs_nof}
RESERVATION_TYPE=${OCF_RESKEY_reservation_type}
MASTER_SCORE_BASE=${OCF_RESKEY_master_score_base}
MASTER_SCORE_DEV_FACTOR=${OCF_RESKEY_master_score_dev_factor}
MASTER_SCORE_DELAY=${OCF_RESKEY_master_score_delay}
ocf_log debug "$RESOURCE: DEVS=$DEVS"
ocf_log debug "$RESOURCE: REQUIRED_DEVS_NOF=$REQUIRED_DEVS_NOF"
ocf_log debug "$RESOURCE: RESERVATION_TYPE=$RESERVATION_TYPE"
ocf_log debug "$RESOURCE: MASTER_SCORE_BASE=$MASTER_SCORE_BASE"
ocf_log debug "$RESOURCE: MASTER_SCORE_DEV_FACTOR=$MASTER_SCORE_DEV_FACTOR"
ocf_log debug "$RESOURCE: MASTER_SCORE_DELAY=$MASTER_SCORE_DELAY"
#expand path wildcards
DEVS=$(echo $DEVS)
if [ -z "$DEVS" ]; then
ocf_log err "\"devs\" not defined"
exit $OCF_ERR_INSTALLED
fi
sg_persist_check_devs
sg_persist_get_status
}
sg_persist_action_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|promote|demote|notify|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
sg_persist_get_status() {
unset WORKING_DEVS[*]
for dev in ${EXISTING_DEVS[*]}
do
READ_KEYS=`$SG_PERSIST --in --read-keys $dev 2>&1`
[ $? -eq 0 ] || continue
WORKING_DEVS+=($dev)
echo "$READ_KEYS" | $GREP -qw $NODE_ID_HEX\$
[ $? -eq 0 ] || continue
REGISTERED_DEVS+=($dev)
READ_RESERVATION=`$SG_PERSIST --in --read-reservation $dev 2>&1`
[ $? -eq 0 ] || continue
echo "$READ_RESERVATION" | $GREP -qw $NODE_ID_HEX\$
if [ $? -eq 0 ]; then
RESERVED_DEVS+=($dev)
fi
reservation_key=`echo $READ_RESERVATION | $GREP -o 'Key=0x[0-9a-f]*' | $GREP -o '0x[0-9a-f]*'`
if [ -n "$reservation_key" ]; then
DEVS_WITH_RESERVATION+=($dev)
RESERVATION_KEYS+=($reservation_key)
fi
done
WORKING_DEVS_NOF=${#WORKING_DEVS[*]}
ocf_log debug "$RESOURCE: working devices: `sg_persist_echo_array ${WORKING_DEVS[*]}`"
ocf_log debug "$RESOURCE: number of working devices: $WORKING_DEVS_NOF"
ocf_log debug "$RESOURCE: registered devices: `sg_persist_echo_array ${REGISTERED_DEVS[*]}`"
ocf_log debug "$RESOURCE: reserved devices: `sg_persist_echo_array ${RESERVED_DEVS[*]}`"
ocf_log debug "$RESOURCE: devices with reservation: `sg_persist_echo_array ${DEVS_WITH_RESERVATION[*]}`"
ocf_log debug "$RESOURCE: reservation keys: `sg_persist_echo_array ${RESERVATION_KEYS[*]}`"
MASTER_SCORE=$(($MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF))
ocf_log debug "$RESOURCE: master_score: $MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF = $MASTER_SCORE"
}
sg_persist_check_devs() {
for dev in $DEVS
do
if [ -e "$dev" ]; then
EXISTING_DEVS+=($dev)
fi
done
EXISTING_DEVS_NOF=${#EXISTING_DEVS[*]}
if [ $EXISTING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then
ocf_log err "Number of existing devices=$EXISTING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF"
exit $OCF_ERR_INSTALLED
fi
}
sg_persist_is_registered() {
for registered_dev in ${REGISTERED_DEVS[*]}
do
if [ "$registered_dev" == "$1" ]; then
return 0
fi
done
return 1
}
sg_persist_get_reservation_key() {
for array_index in ${!DEVS_WITH_RESERVATION[*]}
do
if [ "${DEVS_WITH_RESERVATION[$array_index]}" == "$1" ]; then
echo ${RESERVATION_KEYS[$array_index]}
return 0
fi
done
echo ""
}
sg_persist_echo_array() {
str_count=0
arr_str=""
for str in "$@"
do
arr_str="$arr_str[$str_count]:$str "
str_count=$(($str_count+1))
done
echo $arr_str
}
sg_persist_parse_act_pending() {
ACT_PENDING_TS=0
ACT_PENDING_SCORE=0
if [ -n "$ACT_PENDING" ]; then
ACT_PENDING_TS=${ACT_PENDING%%_*}
ACT_PENDING_SCORE=${ACT_PENDING##*_}
fi
}
sg_persist_clear_pending() {
if [ -n "$ACT_PENDING" ]; then
DO_PENDING_UPDATE="YES"
NEW_PENDING=""
fi
}
sg_persist_new_master_score() {
DO_MASTER_SCORE_UPDATE="YES"
NEW_MASTER_SCORE=$1
}
sg_persist_new_pending() {
DO_PENDING_UPDATE="YES"
NEW_PENDING=$1
}
# Functions invoked by resource manager actions
sg_persist_action_start() {
ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE
ocf_run $PENDING_ATTRIBUTE --update=""
if [ $WORKING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then
ocf_log err "$RESOURCE: Number of working devices=$WORKING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF"
exit $OCF_ERR_GENERIC
fi
for dev in ${WORKING_DEVS[*]}
do
if sg_persist_is_registered $dev ; then
: OK
else
ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=0 --param-sark=$NODE_ID_HEX $dev
if [ $? -ne $OCF_SUCCESS ]
then
return $OCF_ERR_GENERIC
fi
fi
done
return $OCF_SUCCESS
}
sg_persist_action_stop() {
if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then
ocf_log debug "$RESOURCE stop: already no registrations"
else
# Clear preference for becoming master
ocf_run $MASTER_SCORE_ATTRIBUTE --delete
ocf_run $PENDING_ATTRIBUTE --delete
for dev in ${REGISTERED_DEVS[*]}
do
ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev
done
fi
return $OCF_SUCCESS
}
sg_persist_action_monitor() {
ACT_MASTER_SCORE=`$MASTER_SCORE_ATTRIBUTE --query --quiet 2>/dev/null`
ocf_log debug "$RESOURCE monitor: ACT_MASTER_SCORE=$ACT_MASTER_SCORE"
ACT_PENDING=`$PENDING_ATTRIBUTE --query --quiet 2>/dev/null`
ocf_log debug "$RESOURCE monitor: ACT_PENDING=$ACT_PENDING"
sg_persist_parse_act_pending
ocf_log debug "$RESOURCE monitor: ACT_PENDING_TS=$ACT_PENDING_TS"
ocf_log debug "$RESOURCE monitor: ACT_PENDING_VAL=$ACT_PENDING_SCORE"
ocf_log debug "$MASTER_SCORE, $ACT_MASTER_SCORE, $ROLE"
DO_MASTER_SCORE_UPDATE="NO"
DO_PENDING_UPDATE="NO"
if [ -n "$ACT_MASTER_SCORE" ]
then
if [ $ACT_MASTER_SCORE -eq $MASTER_SCORE ]; then
sg_persist_clear_pending
else
case $ROLE in
Master)
if [ $MASTER_SCORE -lt $ACT_MASTER_SCORE ]; then
if [ -n "$ACT_PENDING" ]
then
if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then
sg_persist_new_master_score $MASTER_SCORE
sg_persist_clear_pending
fi
else
if [ $MASTER_SCORE_DELAY -eq 0 ]; then
sg_persist_new_master_score $MASTER_SCORE
sg_persist_clear_pending
else
sg_persist_new_pending "${NOW}_${MASTER_SCORE}"
fi
fi
else
sg_persist_new_master_score $MASTER_SCORE
sg_persist_clear_pending
fi
;;
Slave)
if [ $MASTER_SCORE -gt $ACT_MASTER_SCORE ]; then
if [ -n "$ACT_PENDING" ]; then
if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then
sg_persist_new_master_score $MASTER_SCORE
sg_persist_clear_pending
fi
else
if [ $MASTER_SCORE_DELAY -eq 0 ]; then
sg_persist_new_master_score $MASTER_SCORE
sg_persist_clear_pending
else
sg_persist_new_pending "${NOW}_${MASTER_SCORE}"
fi
fi
else
sg_persist_new_master_score $MASTER_SCORE
sg_persist_clear_pending
fi
;;
*)
;;
esac
fi
fi
if [ $DO_MASTER_SCORE_UPDATE == "YES" ]; then
ocf_run $MASTER_SCORE_ATTRIBUTE --update=$NEW_MASTER_SCORE
fi
if [ $DO_PENDING_UPDATE == "YES" ]; then
ocf_run $PENDING_ATTRIBUTE --update=$NEW_PENDING
fi
if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then
ocf_log debug "$RESOURCE monitor: no registrations"
if [ -n "$ACT_MASTER_SCORE" ]; then
ocf_run $MASTER_SCORE_ATTRIBUTE --delete
ocf_run $PENDING_ATTRIBUTE --delete
fi
return $OCF_NOT_RUNNING
fi
if [ ${#RESERVED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then
if [ -z "$ACT_MASTER_SCORE" ]; then
ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE
ocf_run $PENDING_ATTRIBUTE --update=""
fi
return $OCF_RUNNING_MASTER
fi
if [ ${#REGISTERED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then
if [ -z "$ACT_MASTER_SCORE" ]; then
ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE
ocf_run $PENDING_ATTRIBUTE --update=""
fi
if [ $RESERVATION_TYPE -eq 7 ] || [ $RESERVATION_TYPE -eq 8 ]; then
if [ ${#DEVS_WITH_RESERVATION[*]} -gt 0 ]; then
return $OCF_RUNNING_MASTER
else
return $OCF_SUCCESS
fi
else
return $OCF_SUCCESS
fi
fi
ocf_log err "$RESOURCE monitor: unexpected state"
return $OCF_ERR_GENERIC
}
sg_persist_action_promote() {
if [ ${#RESERVED_DEVS[*]} -gt 0 ]; then
ocf_log info "$RESOURCE promote: already master"
return $OCF_SUCCESS
fi
for dev in ${WORKING_DEVS[*]}
do
reservation_key=`sg_persist_get_reservation_key $dev`
case $RESERVATION_TYPE in
1|3|5|6)
if [ -z "$reservation_key" ]; then
ocf_run $SG_PERSIST --out --no-inquiry --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
else
ocf_run $SG_PERSIST --out --no-inquiry --preempt --param-sark=$reservation_key --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
fi
;;
7|8)
if [ -z "$reservation_key" ]; then
ocf_run $SG_PERSIST --out --no-inquiry --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
if [ $? -ne $OCF_SUCCESS ]
then
return $OCF_ERR_GENERIC
fi
else
ocf_log info "$RESOURCE promote: there already exist an reservation holder, all registrants become reservation holders"
return $OCF_SUCCESS
fi
;;
*)
return $OCF_ERR_ARGS
;;
esac
done
return $OCF_SUCCESS
}
sg_persist_action_demote() {
case $RESERVATION_TYPE in
1|3|5|6)
if [ ${#RESERVED_DEVS[*]} -eq 0 ]; then
ocf_log info "$RESOURCE demote: already slave"
return $OCF_SUCCESS
fi
for dev in ${RESERVED_DEVS[*]}
do
ocf_run $SG_PERSIST --out --no-inquiry --release --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
done
;;
7|8) #in case of 7/8, --release won't release the reservation unless unregister the key.
if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then
ocf_log info "$RESOURCE demote: already slave"
return $OCF_SUCCESS
fi
for dev in ${REGISTERED_DEVS[*]}
do
ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
done
;;
*)
return $OCF_ERR_ARGS
;;
esac
return $OCF_SUCCESS
}
sg_persist_action_notify() {
local n_type="$OCF_RESKEY_CRM_meta_notify_type"
local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
set -- $OCF_RESKEY_CRM_meta_notify_active_resource
local n_active="$#"
set -- $OCF_RESKEY_CRM_meta_notify_stop_resource
local n_stop="$#"
set -- $OCF_RESKEY_CRM_meta_notify_start_resource
local n_start="$#"
ocf_log debug "$RESOURCE notify: $n_type for $n_op - counts: active $n_active - starting $n_start - stopping $n_stop"
return $OCF_SUCCESS
}
sg_persist_action_validate_all () {
if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
ocf_log err "Master options misconfigured."
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
if [ $# -ne 1 ]; then
echo "Incorrect parameter count."
sg_persist_action_usage
exit $OCF_ERR_ARGS
fi
ACTION=$1
case $ACTION in
meta-data)
meta_data
;;
validate-all)
sg_persist_init
sg_persist_action_validate_all
;;
start|promote|monitor|stop|demote)
ocf_log debug "$RESOURCE: starting action \"$ACTION\""
sg_persist_init
sg_persist_action_$ACTION
exit $?
;;
notify)
sg_persist_action_notify
exit $?
;;
usage|help)
sg_persist_action_usage
exit $OCF_SUCCESS
;;
*)
sg_persist_action_usage
exit $OCF_ERR_ARGS
;;
esac

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 7:19 PM (4 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665491
Default Alt Text
(242 KB)

Event Timeline