Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/heartbeat/clvm b/heartbeat/clvm
index 23e6f9faf..94a43927a 100755
--- a/heartbeat/clvm
+++ b/heartbeat/clvm
@@ -1,428 +1,428 @@
#!/bin/bash
#
-# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/ocf-directories
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="clvm" version="0.9">
<version>1.0</version>
<longdesc lang="en">
This agent manages the clvmd daemon.
</longdesc>
<shortdesc lang="en">clvmd</shortdesc>
<parameters>
<parameter name="with_cmirrord" unique="0" required="0">
<longdesc lang="en">
Start with cmirrord (cluster mirror log daemon).
</longdesc>
<shortdesc lang="en">activate cmirrord</shortdesc>
<content type="boolean" default="false" />
</parameter>
<parameter name="daemon_options" unique="0">
<longdesc lang="en">
Options to clvmd. Refer to clvmd.8 for detailed descriptions.
</longdesc>
<shortdesc lang="en">Daemon Options</shortdesc>
<content type="string" default="-d0"/>
</parameter>
<parameter name="activate_vgs" unique="0">
<longdesc lang="en">
Whether or not to activate all cluster volume groups after starting
the clvmd or not. Note that clustered volume groups will always be
deactivated before the clvmd stops regardless of what this option
is set to.
</longdesc>
<shortdesc lang="en">Activate volume groups</shortdesc>
<content type="boolean" default="true"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="90" />
<action name="monitor" timeout="90" interval="30" depth="0" />
<action name="reload" timeout="90" />
<action name="meta-data" timeout="10" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
END
}
#######################################################################
: ${OCF_RESKEY_daemon_options:="-d0"}
: ${OCF_RESKEY_activate_vgs:="true"}
sbindir=$HA_SBIN_DIR
if [ -z $sbindir ]; then
sbindir=/usr/sbin
fi
DAEMON="clvmd"
CMIRROR="cmirrord"
DAEMON_PATH="${sbindir}/clvmd"
CMIRROR_PATH="${sbindir}/cmirrord"
LVMCONF="${sbindir}/lvmconf"
LOCK_FILE="/var/lock/subsys/$DAEMON"
# attempt to detect where the vg tools are located
# for some reason this isn't consistent with sbindir
# in some distros.
vgtoolsdir=$(dirname $(which vgchange 2> /dev/null) 2> /dev/null)
if [ -z "$vgtoolsdir" ]; then
vgtoolsdir="$sbindir"
fi
LVM_VGCHANGE=${vgtoolsdir}/vgchange
LVM_VGDISPLAY=${vgtoolsdir}/vgdisplay
LVM_VGSCAN=${vgtoolsdir}/vgscan
# Leaving this in for legacy. We do not want to advertize
# the abilty to set options in the systconfig exists, we want
# to expand the OCF style options as necessary instead.
[ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster
[ -f /etc/sysconfig/$DAEMON ] && . /etc/sysconfig/$DAEMON
CLVMD_TIMEOUT="90"
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
CLVMD_TIMEOUT=$(($OCF_RESKEY_CRM_meta_timeout/1000))
fi
clvmd_usage()
{
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
clvmd_validate()
{
# check_binary will exit with OCF_ERR_INSTALLED
# when binary is missing
check_binary "pgrep"
check_binary $DAEMON_PATH
if ocf_is_true $OCF_RESKEY_with_cmirrord; then
check_binary $CMIRROR_PATH
fi
if [ "$__OCF_ACTION" != "monitor" ]; then
check_binary "killall"
check_binary $LVM_VGCHANGE
check_binary $LVM_VGDISPLAY
check_binary $LVM_VGSCAN
fi
# Future validation checks here.
return $OCF_SUCCESS
}
check_process()
{
local binary=$1
local pidfile="${HA_RSCTMP}/${binary}-${OCF_RESOURCE_INSTANCE}.pid"
local pid
ocf_log debug "Checking status for ${binary}."
if [ -e "$pidfile" ]; then
cat /proc/$(cat $pidfile)/cmdline 2>/dev/null | grep -a "${binary}" > /dev/null 2>&1
if [ $? -eq 0 ];then
# shortcut without requiring pgrep to search through all procs
return $OCF_SUCCESS
fi
fi
pid=$(pgrep ${binary})
case $? in
0)
ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}."
echo "$pid" > $pidfile
return $OCF_SUCCESS;;
1)
rm -f "$pidfile" > /dev/null 2>&1
ocf_log info "$binary is not running"
return $OCF_NOT_RUNNING;;
*)
rm -f "$pidfile" > /dev/null 2>&1
ocf_exit_reason "Error encountered detecting pid status of $binary"
return $OCF_ERR_GENERIC;;
esac
}
clvmd_status()
{
local rc
local mirror_rc
clvmd_validate
if [ $? -ne $OCF_SUCCESS ]; then
ocf_exit_reason "Unable to monitor, Environment validation failed."
return $?
fi
check_process $DAEMON
rc=$?
mirror_rc=$rc
if ocf_is_true $OCF_RESKEY_with_cmirrord; then
check_process $CMIRROR
mirror_rc=$?
fi
# If these ever don't match, return error to force recovery
if [ $mirror_rc -ne $rc ]; then
return $OCF_ERR_GENERIC
fi
return $rc
}
# NOTE: replace this with vgs, once display filter per attr is implemented.
clustered_vgs() {
${LVM_VGDISPLAY} 2>/dev/null | awk 'BEGIN {RS="VG Name"} {if (/Clustered/) print $1;}'
}
wait_for_process()
{
local binary=$1
local timeout=$2
local count=0
ocf_log info "Waiting for $binary to exit"
while [ $count -le $timeout ]; do
check_process $binary
if [ $? -eq $OCF_NOT_RUNNING ]; then
ocf_log info "$binary terminated"
return $OCF_SUCCESS
fi
sleep 1
count=$((count+1))
done
return $OCF_ERR_GENERIC
}
time_left()
{
local end=$1
local default=$2
local now=$SECONDS
local result=0
result=$(( $end - $now ))
if [ $result -lt $default ]; then
return $default
fi
return $result
}
clvmd_stop()
{
local LVM_VGS
local rc=$OCF_SUCCESS
local end=$(( $SECONDS + $CLVMD_TIMEOUT ))
clvmd_status
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi
check_process $DAEMON
if [ $? -ne $OCF_NOT_RUNNING ]; then
LVM_VGS="$(clustered_vgs)"
if [ -n "$LVM_VGS" ]; then
ocf_log info "Deactivating clustered VG(s):"
ocf_run ${LVM_VGCHANGE} -anl $LVM_VGS
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to deactivate volume groups, cluster vglist = $LVM_VGS"
return $OCF_ERR_GENERIC
fi
fi
ocf_log info "Signaling $DAEMON to exit"
killall -TERM $DAEMON
if [ $? != 0 ]; then
ocf_exit_reason "Failed to signal -TERM to $DAEMON"
return $OCF_ERR_GENERIC
fi
wait_for_process $DAEMON $CLVMD_TIMEOUT
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
ocf_exit_reason "$DAEMON failed to exit"
return $rc
fi
rm -f $LOCK_FILE
fi
check_process $CMIRROR
if [ $? -ne $OCF_NOT_RUNNING ] && ocf_is_true $OCF_RESKEY_with_cmirrord; then
local timeout
ocf_log info "Signaling $CMIRROR to exit"
killall -INT $CMIRROR
time_left $end 10; timeout=$?
wait_for_process $CMIRROR $timeout
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
killall -KILL $CMIRROR
time_left $end 10; timeout=$?
wait_for_process $CMIRROR $(time_left $end 10)
rc=$?
fi
fi
return $rc
}
start_process()
{
local binary_path=$1
local opts=$2
check_process "$(basename $binary_path)"
if [ $? -ne $OCF_SUCCESS ]; then
ocf_log info "Starting $binary_path: "
ocf_run $binary_path $opts
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "Failed to launch $binary_path, exit code $rc"
exit $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
}
clvmd_activate_all()
{
if ! ocf_is_true "$OCF_RESKEY_activate_vgs"; then
ocf_log info "skipping vg activation, activate_vgs is set to $OCF_RESKEY_activate_vgs"
return $OCF_SUCCESS
fi
# Activate all volume groups by leaving the
# "volume group name" parameter empty
ocf_run ${LVM_VGCHANGE} -aay
if [ $? -ne 0 ]; then
ocf_log info "Failed to activate VG(s):"
clvmd_stop
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
clvmd_start()
{
local rc=0
local CLVMDOPTS="-T${CLVMD_TIMEOUT} $OCF_RESKEY_daemon_options"
clvmd_validate
if [ $? -ne $OCF_SUCCESS ]; then
ocf_exit_reason "Unable to start, Environment validation failed."
return $?
fi
clvmd_status
if [ $? -eq $OCF_SUCCESS ]; then
ocf_log debug "$DAEMON already started"
clvmd_activate_all
return $?;
fi
# autoset locking type to clusted when lvmconf tool is available
if [ -x "$LVMCONF" ]; then
$LVMCONF --enable-cluster > /dev/null 2>&1
fi
# if either of these fail, script will exit OCF_ERR_GENERIC
if ocf_is_true $OCF_RESKEY_with_cmirrord; then
start_process $CMIRROR_PATH
fi
start_process $DAEMON_PATH $CLVMDOPTS
# Refresh local cache.
#
# It's possible that new PVs were added to this, or other VGs
# while this node was down. So we run vgscan here to avoid
# any potential "Missing UUID" messages with subsequent
# LVM commands.
# The following step would be better and more informative to the user:
# 'action "Refreshing VG(s) local cache:" ${LVM_VGSCAN}'
# but it could show warnings such as:
# 'clvmd not running on node x-y-z Unable to obtain global lock.'
# and the action would be shown as FAILED when in reality it didn't.
# Ideally vgscan should have a startup mode that would not print
# unnecessary warnings.
${LVM_VGSCAN} > /dev/null 2>&1
touch $LOCK_FILE
clvmd_activate_all
clvmd_status
return $?
}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS;;
start) clvmd_start;;
stop) clvmd_stop;;
monitor) clvmd_status;;
validate-all) clvmd_validate;;
usage|help) clvmd_usage;;
*) clvmd_usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/docker b/heartbeat/docker
index 7cf10b253..00ef0da16 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -1,436 +1,436 @@
#!/bin/sh
#
# The docker HA resource agent creates and launches a docker container
# based off a supplied docker image. Containers managed by this agent
# are both created and removed upon the agent's start and stop actions.
#
-# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
meta_data()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="docker" version="0.9">
<version>1.0</version>
<longdesc lang="en">
The docker HA resource agent creates and launches a docker container
based off a supplied docker image. Containers managed by this agent
are both created and removed upon the agent's start and stop actions.
</longdesc>
<shortdesc lang="en">Docker container resource agent.</shortdesc>
<parameters>
<parameter name="image" required="1" unique="0">
<longdesc lang="en">
The docker image to base this container off of.
</longdesc>
<shortdesc lang="en">docker image</shortdesc>
<content type="string"/>
</parameter>
<parameter name="name" required="0" unique="0">
<longdesc lang="en">
The name to give the created container. By default this will
be that resource's instance name.
</longdesc>
<shortdesc lang="en">docker container name</shortdesc>
<content type="string"/>
</parameter>
<parameter name="allow_pull" unique="0">
<longdesc lang="en">
Allow the image to be pulled from the configured docker registry when
the image does not exist locally. NOTE, this can drastically increase
the time required to start the container if the image repository is
pulled over the network.
</longdesc>
<shortdesc lang="en">Allow pulling non-local images</shortdesc>
<content type="boolean"/>
</parameter>
<parameter name="run_opts" required="0" unique="0">
<longdesc lang="en">
Add options to be appended to the 'docker run' command which is used
when creating the container during the start action. This option allows
users to do things such as setting a custom entry point and injecting
environment variables into the newly created container. Note the '-d'
option is supplied regardless of this value to force containers to run
in the background.
NOTE: Do not explicitly specify the --name argument in the run_opts. This
agent will set --name using either the resource's instance or the name
provided in the 'name' argument of this agent.
</longdesc>
<shortdesc lang="en">run options</shortdesc>
<content type="string"/>
</parameter>
<parameter name="run_cmd" required="0" unique="0">
<longdesc lang="en">
Specifiy a command to launch within the container once
it has initialized.
</longdesc>
<shortdesc lang="en">run command</shortdesc>
<content type="string"/>
</parameter>
<parameter name="monitor_cmd" required="0" unique="0">
<longdesc lang="en">
Specifiy the full path of a command to launch within the container to check
the health of the container. This command must return 0 to indicate that
the container is healthy. A non-zero return code will indicate that the
container has failed and should be recovered.
The command is executed using nsenter. In the future 'docker exec' will
be used once it is more widely supported.
</longdesc>
<shortdesc lang="en">monitor command</shortdesc>
<content type="string"/>
</parameter>
<parameter name="force_kill" required="0" unique="0">
<longdesc lang="en">
Kill a container immediately rather than waiting for it to gracefully
shutdown
</longdesc>
<shortdesc lang="en">force kill</shortdesc>
<content type="boolean"/>
</parameter>
<parameter name="reuse" required="0" unique="0">
<longdesc lang="en">
Allow the container to be reused after stopping the container. By default
containers are removed after stop. With the reuse option containers
will persist after the container stops.
</longdesc>
<shortdesc lang="en">reuse container</shortdesc>
<content type="boolean"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="90" />
<action name="monitor" timeout="30" interval="30" depth="0" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="30" />
</actions>
</resource-agent>
END
}
#######################################################################
REQUIRE_IMAGE_PULL=0
docker_usage()
{
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
monitor_cmd_exec()
{
local rc=$OCF_SUCCESS
local out
if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
return $rc
fi
out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
rc=$?
if [ $rc -ne 0 ]; then
ocf_log info "monitor cmd exit code = $rc"
ocf_log info "stdout/stderr: $out"
if [ $rc -eq 127 ]; then
ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container."
# there is no recovering from this, exit immediately
exit $OCF_ERR_ARGS
fi
rc=$OCF_ERR_GENERIC
else
ocf_log info "monitor cmd passed: exit code = $rc"
fi
return $rc
}
container_exists()
{
docker inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1
}
remove_container()
{
if ocf_is_true "$OCF_RESKEY_reuse"; then
# never remove the container if we have reuse enabled.
return 0
fi
container_exists
if [ $? -ne 0 ]; then
# don't attempt to remove a container that doesn't exist
return 0
fi
ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
ocf_run docker rm $CONTAINER
}
docker_simple_status()
{
local val
container_exists
if [ $? -ne 0 ]; then
return $OCF_NOT_RUNNING
fi
# retrieve the 'Running' attribute for the container
val=$(docker inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
if [ $? -ne 0 ]; then
#not running as a result of container not being found
return $OCF_NOT_RUNNING
fi
if ocf_is_true "$val"; then
# container exists and is running
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}
docker_monitor()
{
local rc=0
docker_simple_status
rc=$?
if [ $rc -ne 0 ]; then
return $rc
fi
monitor_cmd_exec
}
docker_start()
{
local run_opts="-d --name=${CONTAINER}"
# check to see if the container has already started
docker_simple_status
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
if [ -n "$OCF_RESKEY_run_opts" ]; then
run_opts="$run_opts $OCF_RESKEY_run_opts"
fi
if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then
ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}"
docker pull "${OCF_RESKEY_image}"
if [ $? -ne 0 ]; then
ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}"
return $OCF_ERR_GENERIC
fi
fi
if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then
ocf_log info "starting existing container $CONTAINER."
ocf_run docker start $CONTAINER
else
# make sure any previous container matching our container name is cleaned up first.
# we already know at this point it wouldn't be running
remove_container
ocf_log info "running container $CONTAINER for the first time"
ocf_run docker run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
fi
if [ $? -ne 0 ]; then
ocf_exit_reason "docker failed to launch container"
return $OCF_ERR_GENERIC
fi
# wait for monitor to pass before declaring that the container is started
while true; do
docker_simple_status
if [ $? -ne $OCF_SUCCESS ]; then
ocf_exit_reason "Newly created docker container exited after start"
return $OCF_ERR_GENERIC
fi
monitor_cmd_exec
if [ $? -eq $OCF_SUCCESS ]; then
ocf_log notice "Container $CONTAINER started successfully"
return $OCF_SUCCESS
fi
ocf_exit_reason "waiting on monitor_cmd to pass after start"
sleep 1
done
}
docker_stop()
{
local timeout=60
docker_simple_status
if [ $? -eq $OCF_NOT_RUNNING ]; then
remove_container
return $OCF_SUCCESS
fi
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 ))
if [ $timeout -lt 10 ]; then
timeout=10
fi
fi
if ocf_is_true "$OCF_RESKEY_force_kill"; then
ocf_run docker kill $CONTAINER
else
ocf_log debug "waiting $timeout second[s] before killing container"
ocf_run docker stop -t=$timeout $CONTAINER
fi
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
return $OCF_ERR_GENERIC
fi
remove_container
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
image_exists()
{
# assume that OCF_RESKEY_name have been validated
local IMAGE_NAME="$(echo ${OCF_RESKEY_name} | awk -F':' '{print $1}')"
# if no tag was specified, use default "latest"
local COLON_FOUND=0
local IMAGE_TAG="latest"
COLON_FOUND="$(echo "${OCF_RESKEY_name}" | grep -o ':' | grep -c .)"
if [ ${COLON_FOUND} -ne 0 ]; then
IMAGE_TAG="$(echo ${OCF_RESKEY_name} | awk -F':' '{print $NF}')"
fi
# IMAGE_NAME might be following formats:
# - image
# - repository/image
# - docker.io/image (some distro will display "docker.io/" as prefix)
docker images | awk '{print $1 ":" $2}' | egrep -q -s "^(docker.io\/)?${IMAGE_NAME}:${IMAGE_TAG}\$"
if [ $? -eq 0 ]; then
# image found
return 0
fi
if ocf_is_true "$OCF_RESKEY_allow_pull"; then
REQUIRE_IMAGE_PULL=1
ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start"
return 0
fi
# image not found.
return 1
}
docker_validate()
{
check_binary docker
if [ -z "$OCF_RESKEY_image" ]; then
ocf_exit_reason "'image' option is required"
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified"
check_binary nsenter
fi
image_exists
if [ $? -ne 0 ]; then
ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
: ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}}
if [ -n "$OCF_RESKEY_container" ]; then
# we'll keep the container attribute around for a bit in order not to break
# any existing deployments. The 'name' attribute is prefered now though.
CONTAINER=$OCF_RESKEY_container
ocf_log warn "The 'container' attribute is depreciated"
else
CONTAINER=$OCF_RESKEY_name
fi
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS;;
start)
docker_validate
docker_start;;
stop) docker_stop;;
monitor) docker_monitor;;
validate-all) docker_validate;;
usage|help) docker_usage
exit $OCF_SUCCESS
;;
*) docker_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/galera b/heartbeat/galera
index d74a70daa..920507bc0 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -1,721 +1,721 @@
#!/bin/sh
#
-# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
##
# README.
#
# This agent only supports being configured as a multistate Master
# resource.
#
# Slave vs Master role:
#
# During the 'Slave' role, galera instances are in read-only mode and
# will not attempt to connect to the cluster. This role exists only as
# a means to determine which galera instance is the most up-to-date. The
# most up-to-date node will be used to bootstrap a galera cluster that
# has no current members.
#
# The galera instances will only begin to be promoted to the Master role
# once all the nodes in the 'wsrep_cluster_address' connection address
# have entered read-only mode. At that point the node containing the
# database that is most current will be promoted to Master. Once the first
# Master instance bootstraps the galera cluster, the other nodes will be
# promoted to Master as well.
#
# Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3
#
# pcs resource create db galera enable_creation=true \
# wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master
#
# By setting the 'enable_creation' option, the database will be automatically
# generated at startup. The meta attribute 'master-max=3' means that all 3
# nodes listed in the wsrep_cluster_address list will be allowed to connect
# to the galera cluster and perform replication.
#
# NOTE: If you have more nodes in the pacemaker cluster then you wish
# to have in the galera cluster, make sure to use location contraints to prevent
# pacemaker from attempting to place a galera instance on a node that is
# not in the 'wsrep_cluster_address" list.
#
##
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/mysql-common.sh
# It is common for some galera instances to store
# check user that can be used to query status
# in this file
if [ -f "/etc/sysconfig/clustercheck" ]; then
. /etc/sysconfig/clustercheck
fi
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote)
$0 manages a galera Database as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'promote' operation makes this mysql server run as master
The 'demote' operation makes this mysql server run as slave
The 'validate-all' operation reports whether the parameters are valid
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="galera">
<version>1.0</version>
<longdesc lang="en">
Resource script for managing galara database.
</longdesc>
<shortdesc lang="en">Manages a galara instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL server binary
</longdesc>
<shortdesc lang="en">MySQL server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL client binary
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Configuration file
</longdesc>
<shortdesc lang="en">MySQL config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="datadir" unique="0" required="0">
<longdesc lang="en">
Directory containing databases
</longdesc>
<shortdesc lang="en">MySQL datadir</shortdesc>
<content type="string" default="${OCF_RESKEY_datadir_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running MySQL daemon
</longdesc>
<shortdesc lang="en">MySQL user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running MySQL daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">MySQL group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}"/>
</parameter>
<parameter name="log" unique="0" required="0">
<longdesc lang="en">
The logfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_default}"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pidfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}"/>
</parameter>
<parameter name="socket" unique="0" required="0">
<longdesc lang="en">
The socket to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL socket</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_default}"/>
</parameter>
<parameter name="enable_creation" unique="0" required="0">
<longdesc lang="en">
If the MySQL database does not exist, it will be created
</longdesc>
<shortdesc lang="en">Create the database if it does not exist</shortdesc>
<content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
</parameter>
<parameter name="wsrep_cluster_address" unique="0" required="1">
<longdesc lang="en">
The galera cluster address. This takes the form of:
gcomm://node,node,node
Only nodes present in this node list will be allowed to start a galera instance.
It is expected that the galera node names listed in this address match valid
pacemaker node names.
</longdesc>
<shortdesc lang="en">Galera cluster address</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="check_user" unique="0" required="0">
<longdesc lang="en">
Cluster check user.
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="root" />
</parameter>
<parameter name="check_passwd" unique="0" required="0">
<longdesc lang="en">
Cluster check user password
</longdesc>
<shortdesc lang="en">check password</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30" interval="20" />
<action name="monitor" role="Master" depth="0" timeout="30" interval="10" />
<action name="monitor" role="Slave" depth="0" timeout="30" interval="30" />
<action name="promote" timeout="300" />
<action name="demote" timeout="120" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
get_option_variable()
{
local key=$1
$MYSQL $MYSQL_OPTIONS_CHECK -e "SHOW VARIABLES like '$key';" | tail -1
}
get_status_variable()
{
local key=$1
$MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" | tail -1
}
set_bootstrap_node()
{
local node=$1
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true"
}
clear_bootstrap_node()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D
}
is_bootstrap()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -Q 2>/dev/null
}
clear_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D
}
set_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1
}
get_last_commit()
{
local node=$1
if [ -z "$node" ]; then
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null
else
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null
fi
}
wait_for_sync()
{
local state=$(get_status_variable "wsrep_local_state")
ocf_log info "Waiting for database to sync with the cluster. "
while [ "$state" != "4" ]; do
sleep 1
state=$(get_status_variable "wsrep_local_state")
done
ocf_log info "Database synced."
}
is_primary()
{
cluster_status=$(get_status_variable "wsrep_cluster_status")
if [ "$cluster_status" = "Primary" ]; then
return 0
fi
if [ -z "$cluster_status" ]; then
ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
else
ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}"
fi
return 1
}
is_readonly()
{
local res=$(get_option_variable "read_only")
if ! ocf_is_true "$res"; then
return 1
fi
cluster_status=$(get_status_variable "wsrep_cluster_status")
if ! [ "$cluster_status" = "Disconnected" ]; then
return 1
fi
return 0
}
master_exists()
{
if [ "$__OCF_ACTION" = "demote" ]; then
# We don't want to detect master instances during demote.
# 1. we could be detecting ourselves as being master, which is no longer the case.
# 2. we could be detecting other master instances that are in the process of shutting down.
# by not detecting other master instances in "demote" we are deferring this check
# to the next recurring monitor operation which will be much more accurate
return 1
fi
# determine if a master instance is already up and is healthy
crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
return $?
}
clear_master_score()
{
local node=$1
if [ -z "$node" ]; then
$CRM_MASTER -D
else
$CRM_MASTER -D -N $node
fi
}
set_master_score()
{
local node=$1
if [ -z "$node" ]; then
$CRM_MASTER -v 100
else
$CRM_MASTER -N $node -v 100
fi
}
promote_everyone()
{
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
set_master_score $node
done
}
greater_than_equal_long()
{
# there are values we need to compare in this script
# that are too large for shell -gt to process
echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true"
}
detect_first_master()
{
local best_commit=0
local best_node="$NODENAME"
local last_commit=0
local missing_nodes=0
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
last_commit=$(get_last_commit $node)
if [ -z "$last_commit" ]; then
ocf_log info "Waiting on node <${node}> to report database status before Master instances can start."
missing_nodes=1
continue
fi
# this means -1, or that no commit has occured yet.
if [ "$last_commit" = "18446744073709551615" ]; then
last_commit="0"
fi
greater_than_equal_long "$last_commit" "$best_commit"
if [ $? -eq 0 ]; then
best_node=$node
best_commit=$last_commit
fi
done
if [ $missing_nodes -eq 1 ]; then
return
fi
ocf_log info "Promoting $best_node to be our bootstrap node"
set_master_score $best_node
set_bootstrap_node $best_node
}
# For galera, promote is really start
galera_promote()
{
local rc
local extra_opts
local bootstrap
master_exists
if [ $? -eq 0 ]; then
# join without bootstrapping
extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}"
else
bootstrap=$(is_bootstrap)
if ocf_is_true $bootstrap; then
ocf_log info "Node <${NODENAME}> is bootstrapping the cluster"
extra_opts="--wsrep-cluster-address=gcomm://"
else
ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected."
clear_last_commit
return $OCF_ERR_GENERIC
fi
fi
galera_monitor
if [ $? -eq $OCF_RUNNING_MASTER ]; then
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
ocf_log info "boostrap node already up, promoting the rest of the galera instances."
fi
clear_last_commit
return $OCF_SUCCESS
fi
# last commit is no longer relevant once promoted
clear_last_commit
mysql_common_prepare_dirs
mysql_common_start "$extra_opts"
rc=$?
if [ $rc != $OCF_SUCCESS ]; then
return $rc
fi
galera_monitor
rc=$?
if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "Failed initial monitor action"
return $rc
fi
is_readonly
if [ $? -eq 0 ]; then
ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration."
return $OCF_ERR_GENERIC
fi
is_primary
if [ $? -ne 0 ]; then
ocf_exit_reason "Failure. Master instance started, but is not in Primary mode."
return $OCF_ERR_GENERIC
fi
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
ocf_log info "Bootstrap complete, promoting the rest of the galera instances."
else
# if this is not the bootstrap node, make sure this instance
# syncs with the rest of the cluster before promotion returns.
wait_for_sync
fi
ocf_log info "Galera started"
return $OCF_SUCCESS
}
galera_demote()
{
mysql_common_stop
rc=$?
if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_exit_reason "Failed to stop Master galera instance during demotion to Master"
return $rc
fi
# if this node was previously a bootstrap node, that is no longer the case.
clear_bootstrap_node
clear_last_commit
# record last commit by "starting" galera. start is just detection of the last sequence number
galera_start
}
galera_start()
{
local last_commit
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
if [ $? -ne 0 ]; then
ocf_exit_reason "local node <${NODENAME}> must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>to start this galera instance"
return $OCF_ERR_CONFIGURED
fi
galera_monitor
if [ $? -eq $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "master galera instance started outside of the cluster's control"
return $OCF_ERR_GENERIC
fi
mysql_common_prepare_dirs
ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
local tmp=$(mktemp)
${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
--datadir=$OCF_RESKEY_datadir \
--user=$OCF_RESKEY_user \
--wsrep-recover > $tmp 2>&1
last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')"
rm -f $tmp
if [ "$last_commit" = "-1" ]; then
last_commit="0"
fi
fi
if [ -z "$last_commit" ]; then
ocf_exit_reason "Unable to detect last known write sequence number"
clear_last_commit
return $OCF_ERR_GENERIC
fi
ocf_log info "Last commit version found: $last_commit"
set_last_commit $last_commit
master_exists
if [ $? -eq 0 ]; then
ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster."
set_master_score $NODENAME
else
clear_master_score
detect_first_master
fi
return $OCF_SUCCESS
}
galera_monitor()
{
local rc
local status_loglevel="err"
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
mysql_common_status $status_loglevel
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
last_commit=$(get_last_commit $node)
if [ -n "$last_commit" ]; then
# if last commit is set, this instance is considered started in slave mode
rc=$OCF_SUCCESS
master_exists
if [ $? -ne 0 ]; then
detect_first_master
else
# a master instance exists and is healthy, promote this
# local read only instance
# so it can join the master galera cluster.
set_master_score
fi
fi
return $rc
elif [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
# if we make it here, mysql is running. Check cluster status now.
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
if [ $? -ne 0 ]; then
ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
return $OCF_ERR_GENERIC
fi
is_primary
if [ $? -eq 0 ]; then
if ocf_is_probe; then
# restore master score during probe
# if we detect this is a master instance
set_master_score
fi
rc=$OCF_RUNNING_MASTER
else
ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
rc=$OCF_ERR_GENERIC
fi
return $rc
}
galera_stop()
{
local rc
# make sure the process is stopped
mysql_common_stop
rc=$1
clear_last_commit
clear_master_score
clear_bootstrap_node
return $rc
}
galera_validate()
{
if ! ocf_is_ms; then
ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
return $OCF_ERR_CONFIGURED
fi
if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then
ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value."
return $OCF_ERR_CONFIGURED
fi
mysql_common_validate
}
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
galera_validate
rc=$?
LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
esac
fi
if [ -z "${OCF_RESKEY_check_passwd}" ]; then
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
OCF_RESKEY_check_passwd=${MYSQL_PASSWORD}
fi
if [ -z "${OCF_RESKEY_check_user}" ]; then
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
OCF_RESKEY_check_user=${MYSQL_USERNAME}
fi
: ${OCF_RESKEY_check_user="root"}
MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}"
if [ -n "${OCF_RESKEY_check_passwd}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
fi
# What kind of method was invoked?
case "$1" in
start) galera_start;;
stop) galera_stop;;
status) mysql_common_status err;;
monitor) galera_monitor;;
promote) galera_promote;;
demote) galera_demote;;
validate-all) exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vi:sw=4:ts=4:et:
diff --git a/heartbeat/nfsnotify b/heartbeat/nfsnotify
index 5f72d586a..b8dc1e408 100755
--- a/heartbeat/nfsnotify
+++ b/heartbeat/nfsnotify
@@ -1,315 +1,315 @@
#!/bin/bash
#
-# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/ocf-directories
#######################################################################
sbindir=$HA_SBIN_DIR
if [ -z "$sbindir" ]; then
sbindir=/usr/sbin
fi
SELINUX_ENABLED=-1
NFSNOTIFY_TMP_DIR="${HA_RSCTMP}/nfsnotify_${OCF_RESOURCE_INSTANCE}/"
HA_STATD_PIDFILE="$NFSNOTIFY_TMP_DIR/rpc.statd_${OCF_RESOURCE_INSTANCE}.pid"
HA_STATD_PIDFILE_PREV="$NFSNOTIFY_TMP_DIR/rpc.statd_${OCF_RESOURCE_INSTANCE}.pid.prev"
STATD_PATH="/var/lib/nfs/statd"
SM_NOTIFY_BINARY="${sbindir}/sm-notify"
IS_RENOTIFY=0
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="nfsnotify" version="0.9">
<version>1.0</version>
<longdesc lang="en">
This agent sends NFSv3 reboot notifications to clients which informs clients to reclaim locks.
</longdesc>
<shortdesc lang="en">sm-notify reboot notifications</shortdesc>
<parameters>
<parameter name="source_host" unique="0" required="0">
<longdesc lang="en">
Comma separated list of floating IP addresses or host names that clients use
to access the nfs service. This will be used to set the source address and
mon_name of the SN_NOTIFY reboot notifications.
</longdesc>
<shortdesc lang="en">source IP addresses</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="notify_args" unique="0" required="0">
<longdesc lang="en">
Additional arguments to send to the sm-notify command. By default
this agent will always set sm-notify's '-f' option. When the
source_host option is set, the '-v' option will be used automatically
to set the proper source address. Any additional sm-notify arguments
set with this option will be used in addition to the previous default
arguments.
</longdesc>
<shortdesc lang="en">sm-notify arguments</shortdesc>
<content type="string" default="false" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="90" />
<action name="monitor" timeout="90" interval="30" depth="0" />
<action name="reload" timeout="90" />
<action name="meta-data" timeout="10" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
END
}
v3notify_usage()
{
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
v3notify_validate()
{
# check_binary will exit with OCF_ERR_INSTALLED when binary is missing
check_binary "$SM_NOTIFY_BINARY"
check_binary "pgrep"
check_binary "killall"
return $OCF_SUCCESS
}
killall_smnotify()
{
# killall sm-notify
killall -TERM $SM_NOTIFY_BINARY > /dev/null 2>&1
if [ $? -eq 0 ]; then
# it is useful to know if sm-notify processes were actually left around
# or not during the stop/start operation. Whether this condition is true
# or false does not indicate a failure. It does indicate that
# there are probably some unresponsive nfs clients out there that are keeping
# the sm-notify processes retrying.
ocf_log info "previous sm-notify processes terminated before $__OCF_ACTION action."
fi
}
v3notify_stop()
{
killall_smnotify
rm -f $HA_STATD_PIDFILE_PREV > /dev/null 2>&1
mv $HA_STATD_PIDFILE $HA_STATD_PIDFILE_PREV > /dev/null 2>&1
return $OCF_SUCCESS
}
check_statd_pidfile()
{
local binary="rpc.statd"
local pidfile="$HA_STATD_PIDFILE"
ocf_log debug "Checking status for ${binary}."
if [ -e "$pidfile" ]; then
cat /proc/$(cat $pidfile)/cmdline 2>/dev/null | grep -a "${binary}" > /dev/null 2>&1
if [ $? -eq 0 ]; then
return $OCF_SUCCESS
fi
ocf_exit_reason "$(cat $pidfile) for $binary is no longer running, sm-notify needs to re-notify clients"
return $OCF_ERR_GENERIC
fi
# if we don't have a pid file for rpc.statd, we have not yet sent the notifications
return $OCF_NOT_RUNNING
}
write_statd_pid()
{
local binary="rpc.statd"
local pidfile="$HA_STATD_PIDFILE"
local pid
pid=$(pgrep ${binary})
case $? in
0)
ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}."
mkdir -p $(dirname $pidfile)
echo "$pid" > $pidfile
return $OCF_SUCCESS;;
1)
rm -f "$pidfile" > /dev/null 2>&1
ocf_log info "$binary is not running"
return $OCF_NOT_RUNNING;;
*)
rm -f "$pidfile" > /dev/null 2>&1
ocf_exit_reason "Error encountered detecting pid status of $binary"
return $OCF_ERR_GENERIC;;
esac
}
copy_statd()
{
local src=$1
local dest=$2
if ! [ -d "$dest" ]; then
mkdir -p "$dest"
fi
cp -rpfn $src/sm $src/sm.bak $src/state $dest > /dev/null 2>&1
# make sure folder ownership and selinux lables stay consistent
[ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$dest"
[ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$dest"
}
v3notify_start()
{
local rc=$OCF_SUCCESS
local cur_statd
local statd_backup
local is_renotify=0
# monitor, see if we need to notify or not
v3notify_monitor
if [ $? -eq 0 ]; then
return $OCF_SUCCESS
fi
# kill off any other sm-notify processes that might already be running.
killall_smnotify
# record the pid of rpc.statd. if this pid ever changes, we have to re-notify
write_statd_pid
rc=$?
if [ $rc -ne 0 ]; then
return $rc
fi
# if the last time we ran nfs-notify, it was with the same statd process,
# consider this a re-notification. During re-notifications we do not let the
# sm-notify binary have access to the real statd directory.
if [ "$(cat $HA_STATD_PIDFILE)" = "$(cat $HA_STATD_PIDFILE_PREV 2>/dev/null)" ]; then
ocf_log info "Renotifying clients"
is_renotify=1
fi
statd_backup="$STATD_PATH/nfsnotify.bu"
copy_statd "$STATD_PATH" "$statd_backup"
if [ -z "$OCF_RESKEY_source_host" ]; then
if [ "$is_renotify" -eq 0 ]; then
cur_statd="$STATD_PATH"
else
cur_statd="$statd_backup"
fi
ocf_log info "sending notifications on default source address."
$SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -P $cur_statd
if [ $? -ne 0 ]; then
ocf_exit_reason "sm-notify execution failed, view syslog for more information"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
fi
# do sm-notify for each ip
for ip in `echo ${OCF_RESKEY_source_host} | sed 's/,/ /g'`; do
# have the first sm-notify use the actual statd directory so the
# notify list can be managed properly.
if [ "$is_renotify" -eq 0 ]; then
cur_statd="$STATD_PATH"
# everything after the first notify we are considering a renotification
# which means we don't use the real statd directory.
is_renotify=1
else
# use our copied statd directory for the remaining ip addresses
cur_statd="$STATD_PATH/nfsnotify_${OCF_RESOURCE_INSTANCE}_${ip}"
copy_statd "$statd_backup" "$cur_statd"
fi
ocf_log info "sending notifications with source address $ip"
$SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -v $ip -P "$cur_statd"
if [ $? -ne 0 ]; then
ocf_exit_reason "sm-notify with source host set to [ $ip ] failed. view syslog for more information"
return $OCF_ERR_GENERIC
fi
done
return $OCF_SUCCESS
}
v3notify_monitor()
{
# verify rpc.statd is up, and that the rpc.statd pid is the same one we
# found during the start. otherwise rpc.statd recovered and we need to notify
# again.
check_statd_pidfile
}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) v3notify_usage
exit $OCF_SUCCESS;;
*)
;;
esac
which restorecon > /dev/null 2>&1 && selinuxenabled
SELINUX_ENABLED=$?
if [ $SELINUX_ENABLED -eq 0 ]; then
export SELINUX_LABEL="$(ls -ldZ $STATD_PATH | cut -f4 -d' ')"
fi
case $__OCF_ACTION in
start) v3notify_start;;
stop) v3notify_stop;;
monitor) v3notify_monitor;;
validate-all) v3notify_validate;;
*) v3notify_usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 5b55f690b..cc45f09ad 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -1,370 +1,370 @@
#!/bin/sh
#
-# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
RMQ_SERVER=/usr/sbin/rabbitmq-server
RMQ_CTL=/usr/sbin/rabbitmqctl
RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
RMQ_PID_DIR="/var/run/rabbitmq"
RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid"
RMQ_LOG_DIR="/var/log/rabbitmq"
NODENAME=$(ocf_local_nodename)
RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}"
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="rabbitmq-cluster" version="0.9">
<version>1.0</version>
<longdesc lang="en">
Starts cloned rabbitmq cluster instance
</longdesc>
<shortdesc lang="en">rabbitmq clustered</shortdesc>
<parameters>
<parameter name="set_policy" unique="1">
<longdesc lang="en">
Policy string to pass to 'rabbitmqctl set_policy' right after bootstrapping the first rabbitmq instance.
</longdesc>
<shortdesc lang="en">rabbitmqctl set_policy args</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="100" />
<action name="stop" timeout="90" />
<action name="monitor" timeout="40" interval="10" depth="0" />
<action name="meta-data" timeout="10" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
END
}
#######################################################################
rmq_usage() {
cat <<END
usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
rmq_wipe_data()
{
rm -rf $RMQ_DATA_DIR > /dev/null 2>&1
}
rmq_local_node()
{
local node_name=$(rabbitmqctl status 2>&1 | sed -n -e "s/^.*[S|s]tatus of node \(.*\)\s.*$/\1/p" | tr -d "'")
if [ -z "$node_name" ]; then
node_name=$(cat /etc/rabbitmq/rabbitmq-env.conf 2>/dev/null | grep "\s*RABBITMQ_NODENAME=" | awk -F= '{print $2}')
fi
echo "$node_name"
}
rmq_join_list()
{
cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p"
}
rmq_write_nodename()
{
local node_name=$(rmq_local_node)
if [ -z "$node_name" ]; then
ocf_log err "Failed to determine rabbitmq node name, exiting"
exit $OCF_ERR_GENERIC
fi
# store the pcmknode to rmq node mapping as an attribute
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name"
}
rmq_delete_nodename()
{
# remove node-name
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -D
}
prepare_dir () {
if [ ! -d ${1} ] ; then
mkdir -p ${1}
chown -R rabbitmq:rabbitmq ${1}
chmod 755 ${1}
fi
}
remove_pid () {
rm -f ${RMQ_PID_FILE} > /dev/null 2>&1
}
rmq_monitor() {
local rc
$RMQ_CTL cluster_status > /dev/null 2>&1
rc=$?
case "$rc" in
0)
ocf_log debug "RabbitMQ server is running normally"
rmq_write_nodename
return $OCF_SUCCESS
;;
2)
ocf_log info "RabbitMQ server is not running"
rmq_delete_nodename
return $OCF_NOT_RUNNING
;;
*)
ocf_log err "Unexpected return code from '$RMQ_CTL cluster status' exit code: $rc"
rmq_delete_nodename
return $OCF_ERR_GENERIC
;;
esac
}
rmq_init_and_wait()
{
local rc
prepare_dir $RMQ_PID_DIR
prepare_dir $RMQ_LOG_DIR
remove_pid
# the server startup script uses this environment variable
export RABBITMQ_PID_FILE="$RMQ_PID_FILE"
setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" &
ocf_log info "Waiting for server to start"
$RMQ_CTL wait $RMQ_PID_FILE
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
remove_pid
ocf_log info "rabbitmq-server start failed: $rc"
return $OCF_ERR_GENERIC
fi
rmq_monitor
return $?
}
rmq_set_policy()
{
$RMQ_CTL set_policy $@ > /dev/null 2>&1
}
rmq_start_first()
{
local rc
ocf_log info "Bootstrapping rabbitmq cluster"
rmq_wipe_data
rmq_init_and_wait
rc=$?
if [ $rc -eq 0 ]; then
rc=$OCF_SUCCESS
ocf_log info "cluster bootstrapped"
if [ -n "$OCF_RESKEY_set_policy" ]; then
# do not quote set_policy, we are passing in arguments
rmq_set_policy $OCF_RESKEY_set_policy > /dev/null 2>&1
if [ $? -ne 0 ]; then
ocf_log err "Failed to set policy: $OCF_RESKEY_set_policy"
rc=$OCF_ERR_GENERIC
else
ocf_log info "Policy set: $OCF_RESKEY_set_policy"
fi
fi
else
ocf_log info "failed to bootstrap cluster. Check SELINUX policy"
rc=$OCF_ERR_GENERIC
fi
return $rc
}
rmq_join_existing()
{
local join_list="$1"
local rc=$OCF_ERR_GENERIC
ocf_log info "Joining existing cluster with [ $(echo $join_list | tr '\n' ' ') ] nodes."
rmq_init_and_wait
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
# unconditionally join the cluster
$RMQ_CTL stop_app > /dev/null 2>&1
for node in $(echo "$join_list"); do
ocf_log info "Attempting to join cluster with target node $node"
$RMQ_CTL join_cluster $node
if [ $? -eq 0 ]; then
ocf_log info "Joined cluster by connecting to node $node, starting app"
$RMQ_CTL start_app
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "'$RMQ_CTL start_app' failed"
fi
break;
fi
done
if [ "$rc" -ne 0 ]; then
ocf_log info "Join process incomplete, shutting down."
return $OCF_ERR_GENERIC
fi
ocf_log info "Successfully joined existing rabbitmq cluster"
return $OCF_SUCCESS
}
rmq_start() {
local join_list=""
local rc
rmq_monitor
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
join_list=$(rmq_join_list)
# No join list means no active instances are up. This instance
# is the first, so it needs to bootstrap the rest
if [ -z "$join_list" ]; then
rmq_start_first
rc=$?
return $rc
fi
# first try to join without wiping mnesia data
rmq_join_existing "$join_list"
if [ $? -ne 0 ]; then
ocf_log info "node failed to join, wiping data directory and trying again"
# if the graceful join fails, use the hammer and reset all the data.
rmq_stop
rmq_wipe_data
rmq_join_existing "$join_list"
if [ $? -ne 0 ]; then
ocf_log info "node failed to join even after reseting local data. Check SELINUX policy"
return $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
}
rmq_stop() {
rmq_monitor
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi
$RMQ_CTL stop
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "rabbitmq-server stop command failed: $RMQ_CTL stop, $rc"
return $rc
fi
#TODO add kill logic
stop_wait=1
while [ $stop_wait = 1 ]; do
rmq_monitor
rc=$?
if [ "$rc" -eq $OCF_NOT_RUNNING ]; then
stop_wait=0
break
elif [ "$rc" -ne $OCF_SUCCESS ]; then
ocf_log info "rabbitmq-server stop failed: $rc"
exit $OCF_ERR_GENERIC
fi
sleep 1
done
remove_pid
return $OCF_SUCCESS
}
rmq_validate() {
check_binary $RMQ_SERVER
check_binary $RMQ_CTL
# This resource only makes sense as a clone right now. at some point
# we may want to verify the following.
#TODO verify cloned
#TODO verify ordered=true
# Given that this resource does the cluster join explicitly,
# having a cluster_nodes list in the static config file will
# likely conflict with this agent.
#TODO verify no cluster list in rabbitmq conf
#cat /etc/rabbitmq/rabbitmq.config | grep "cluster_nodes"
return $OCF_SUCCESS
}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) rmq_start;;
stop) rmq_stop;;
monitor) rmq_monitor;;
validate-all) rmq_validate;;
usage|help) rmq_usage
exit $OCF_SUCCESS
;;
*) rmq_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/rgmanager/src/resources/db2.sh b/rgmanager/src/resources/db2.sh
index f396ff62f..57991f926 100755
--- a/rgmanager/src/resources/db2.sh
+++ b/rgmanager/src/resources/db2.sh
@@ -1,133 +1,133 @@
#!/bin/bash
#
# Copyright (c) 2011 Holger Teutsch <holger.teutsch@web.de>
-# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# NOTE:
#
# This agent is a wrapper around the heartbeat/db2 agent which limits the heartbeat
# db2 agent to Standard role support. This allows cluster managers such as rgmanager
# which do not have multi-state resource support to manage db2 instances with
# a limited feature set.
#
export LC_ALL=C
export LANG=C
export PATH=/bin:/sbin:/usr/bin:/usr/sbin
. $(dirname $0)/ocf-shellfuncs
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="db2.sh">
<version>1.0</version>
<longdesc lang="en">
Resource Agent that manages an IBM DB2 LUW databases in Standard role. Multiple partitions are supported.
When partitions are in use, each partition must be configured as a separate primitive resource.
</longdesc>
<shortdesc lang="en">Resource Agent that manages an IBM DB2 LUW databases in Standard role with multiple partition support.</shortdesc>
<parameters>
<parameter name="instance" unique="1" required="1">
<longdesc lang="en">
The instance of the database(s).
</longdesc>
<shortdesc lang="en">instance</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="dblist" unique="0" required="0">
<longdesc lang="en">
List of databases to be managed, e.g "db1 db2".
Defaults to all databases in the instance.
</longdesc>
<shortdesc lang="en">List of databases to be managed</shortdesc>
<content type="string"/>
</parameter>
<parameter name="dbpartitionnum" unique="0" required="0">
<longdesc lang="en">
The number of the partion (DBPARTITIONNUM) to be managed.
</longdesc>
<shortdesc lang="en">database partition number (DBPARTITIONNUM)</shortdesc>
<content type="string" default="0" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120"/>
<action name="stop" timeout="120"/>
<action name="monitor" depth="0" timeout="60" interval="20"/>
<action name="monitor" depth="0" timeout="60" role="Master" interval="22"/>
<action name="validate-all" timeout="5"/>
<action name="meta-data" timeout="5"/>
</actions>
</resource-agent>
END
}
heartbeat_db2_wrapper()
{
# default heartbeat agent ocf root.
export OCF_ROOT=/usr/lib/ocf
heartbeat_db2="${OCF_ROOT}/resource.d/heartbeat/db2"
if ! [ -a $heartbeat_db2 ]; then
echo "heartbeat db2 agent not found at '${heartbeat_db2}'"
exit $OCF_ERR_INSTALLED
fi
$heartbeat_db2 $1
}
case $1 in
meta-data)
meta_data
exit 0
;;
validate-all)
heartbeat_db2_wrapper $1
exit $?
;;
start)
heartbeat_db2_wrapper $1
exit $?
;;
stop)
heartbeat_db2_wrapper $1
exit $?
;;
status|monitor)
heartbeat_db2_wrapper "monitor"
exit $?
;;
restart)
heartbeat_db2_wrapper "stop"
rc=$?
if [ $rc -ne 0 ]; then
exit $rc
fi
heartbeat_db2_wrapper "start"
exit $?
;;
*)
echo "Usage: db2.sh {start|stop|monitor|validate-all|meta-data}"
exit $OCF_ERR_UNIMPLEMENTED
;;
esac

File Metadata

Mime Type
text/x-diff
Expires
Thu, Jul 10, 1:40 AM (22 h, 37 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002934
Default Alt Text
(67 KB)

Event Timeline