Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/configure.ac b/configure.ac
index 951a05430..bb2bbaded 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,1110 +1,1110 @@
dnl
dnl autoconf for Agents
dnl
dnl License: GNU General Public License (GPL)
dnl ===============================================
dnl Bootstrap
dnl ===============================================
AC_PREREQ(2.63)
dnl Suggested structure:
dnl information on the package
dnl checks for programs
dnl checks for libraries
dnl checks for header files
dnl checks for types
dnl checks for structures
dnl checks for compiler characteristics
dnl checks for library functions
dnl checks for system services
AC_INIT([resource-agents],
m4_esyscmd([make/git-version-gen .tarball-version]),
[developers@clusterlabs.org])
AC_USE_SYSTEM_EXTENSIONS
CRM_DTD_VERSION="1.0"
AC_CONFIG_AUX_DIR(.)
AC_CONFIG_MACRO_DIR([m4])
AC_CANONICAL_HOST
dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below)
dnl
dnl Internal header: include/config.h
dnl - Contains ALL defines
dnl - include/config.h.in is generated automatically by autoheader
dnl - NOT to be included in any header files except lha_internal.h
dnl (which is also not to be included in any other header files)
dnl
dnl External header: include/agent_config.h
dnl - Contains a subset of defines checked here
dnl - Manually edit include/agent_config.h.in to have configure include new defines
dnl - Should not include HAVE_* defines
dnl - Safe to include anywhere
AM_CONFIG_HEADER(include/config.h include/agent_config.h)
ALL_LINGUAS="en fr"
AC_ARG_WITH(version,
[ --with-version=version Override package version (if you're a packager needing to pretend) ],
[ PACKAGE_VERSION="$withval" ])
AC_ARG_WITH(pkg-name,
[ --with-pkg-name=name Override package name (if you're a packager needing to pretend) ],
[ PACKAGE_NAME="$withval" ])
dnl
dnl AM_INIT_AUTOMAKE([1.11.1 foreign dist-bzip2 dist-xz])
dnl
AM_INIT_AUTOMAKE([1.10.1 foreign dist-bzip2])
AC_DEFINE_UNQUOTED(AGENTS_VERSION, "$PACKAGE_VERSION", Current agents version)
CC_IN_CONFIGURE=yes
export CC_IN_CONFIGURE
LDD=ldd
dnl ========================================================================
dnl Compiler characteristics
dnl ========================================================================
# check stolen from gnulib/m4/gnu-make.m4
if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then
AC_MSG_ERROR([you don't seem to have GNU make; it is required])
fi
AC_PROG_CC dnl Can force other with environment variable "CC".
AM_PROG_CC_C_O
AC_PROG_CC_STDC
AC_PROG_CPP
AC_PROG_AWK
AC_PROG_LN_S
AC_PROG_INSTALL
AC_PROG_MAKE_SET
AC_C_STRINGIZE
AC_C_INLINE
AC_TYPE_SIZE_T
AC_TYPE_SSIZE_T
AC_TYPE_UID_T
AC_TYPE_UINT16_T
AC_TYPE_UINT8_T
AC_TYPE_UINT32_T
AC_CHECK_SIZEOF(char)
AC_CHECK_SIZEOF(short)
AC_CHECK_SIZEOF(int)
AC_CHECK_SIZEOF(long)
AC_CHECK_SIZEOF(long long)
AC_STRUCT_TIMEZONE
dnl ===============================================
dnl Helpers
dnl ===============================================
cc_supports_flag() {
local CPPFLAGS="$@"
AC_MSG_CHECKING(whether $CC supports "$@")
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([])],
[RC=0; AC_MSG_RESULT([yes])],
[RC=1; AC_MSG_RESULT([no])])
return $RC
}
extract_header_define() {
AC_MSG_CHECKING(for $2 in $1)
Cfile=$srcdir/extract_define.$2.${$}
printf "#include <stdio.h>\n" > ${Cfile}.c
printf "#include <%s>\n" $1 >> ${Cfile}.c
printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c
$CC $CFLAGS ${Cfile}.c -o ${Cfile}
value=`${Cfile}`
AC_MSG_RESULT($value)
printf $value
rm -f ${Cfile}.c ${Cfile}
}
AC_MSG_NOTICE(Sanitizing prefix: ${prefix})
case $prefix in
NONE)
prefix=/usr
dnl Fix default variables - "prefix" variable if not specified
if test "$localstatedir" = "\${prefix}/var"; then
localstatedir="/var"
fi
if test "$sysconfdir" = "\${prefix}/etc"; then
sysconfdir="/etc"
fi
;;
esac
# ordering is important, PKG_PROG_PKG_CONFIG is to be invoked before any other PKG_* related stuff
PKG_PROG_PKG_CONFIG(0.18)
# PKG_CHECK_MODULES will fail if systemd is not found by default, so make sure
# we set the proper vars and deal with it
PKG_CHECK_MODULES([systemd], [systemd], [HAS_SYSTEMD=yes], [HAS_SYSTEMD=no])
if test "x$HAS_SYSTEMD" = "xyes"; then
PKG_CHECK_VAR([SYSTEMD_UNIT_DIR], [systemd], [systemdsystemunitdir])
if test "x$SYSTEMD_UNIT_DIR" = "x"; then
AC_MSG_ERROR([Unable to detect systemd unit dir automatically])
fi
PKG_CHECK_VAR([SYSTEMD_TMPFILES_DIR], [systemd], [tmpfilesdir])
if test "x$SYSTEMD_TMPFILES_DIR" = "x"; then
AC_MSG_ERROR([Unable to detect systemd tmpfiles directory automatically])
fi
# sanitize systed vars when using non standard prefix
if test "$prefix" != "/usr"; then
SYSTEMD_UNIT_DIR="$prefix/$SYSTEMD_UNIT_DIR"
AC_SUBST([SYSTEMD_UNIT_DIR])
SYSTEMD_TMPFILES_DIR="$prefix/$SYSTEMD_TMPFILES_DIR"
AC_SUBST([SYSTEMD_TMPFILES_DIR])
fi
fi
AM_CONDITIONAL(HAVE_SYSTEMD, [test "x$HAS_SYSTEMD" = xyes ])
dnl ===============================================
dnl Configure Options
dnl ===============================================
dnl Some systems, like Solaris require a custom package name
AC_ARG_WITH(pkgname,
[ --with-pkgname=name name for pkg (typically for Solaris) ],
[ PKGNAME="$withval" ],
[ PKGNAME="LXHAhb" ],
)
AC_SUBST(PKGNAME)
AC_ARG_ENABLE([ansi],
[ --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers.
[default=no]])
AC_ARG_ENABLE([fatal-warnings],
[ --enable-fatal-warnings very pedantic and fatal warnings for gcc
[default=yes]])
INITDIR=""
AC_ARG_WITH(initdir,
[ --with-initdir=DIR directory for init (rc) scripts [${INITDIR}]],
[ INITDIR="$withval" ])
OCF_ROOT_DIR="${prefix}/lib/ocf"
AC_ARG_WITH(ocf-root,
[ --with-ocf-root=DIR directory for OCF scripts [${OCF_ROOT_DIR}]],
[ OCF_ROOT_DIR="$withval" ])
HA_RSCTMPDIR=${localstatedir}/run/resource-agents
AC_ARG_WITH(rsctmpdir,
[ --with-rsctmpdir=DIR directory for resource agents state files [${HA_RSCTMPDIR}]],
[ HA_RSCTMPDIR="$withval" ])
AC_ARG_ENABLE([libnet],
[ --enable-libnet Use libnet for ARP based functionality, [default=try]],
[enable_libnet="$enableval"], [enable_libnet=try])
BUILD_RGMANAGER=0
BUILD_LINUX_HA=0
RASSET=linux-ha
AC_ARG_WITH(ras-set,
[ --with-ras-set=SET build/install only linux-ha, rgmanager or all resource-agents [default: linux-ha]],
[ RASSET="$withval" ])
if test x$RASSET = xyes || test x$RASSET = xall ; then
BUILD_RGMANAGER=1
BUILD_LINUX_HA=1
fi
if test x$RASSET = xlinux-ha; then
BUILD_LINUX_HA=1
fi
if test x$RASSET = xrgmanager; then
BUILD_RGMANAGER=1
fi
if test $BUILD_LINUX_HA -eq 0 && test $BUILD_RGMANAGER -eq 0; then
AC_MSG_ERROR([Are you really sure you want this package?])
exit 1
fi
AM_CONDITIONAL(BUILD_LINUX_HA, test $BUILD_LINUX_HA -eq 1)
AM_CONDITIONAL(BUILD_RGMANAGER, test $BUILD_RGMANAGER -eq 1)
AC_ARG_WITH(compat-habindir,
[ --with-compat-habindir use HA_BIN directory with compatibility for the Heartbeat stack [${libexecdir}]],
[],
[with_compat_habindir=no])
AM_CONDITIONAL(WITH_COMPAT_HABINDIR, test "x$with_compat_habindir" != "xno")
dnl ===============================================
dnl General Processing
dnl ===============================================
echo Our Host OS: $host_os/$host
AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix})
case $exec_prefix in
dnl For consistency with Heartbeat, map NONE->$prefix
NONE) exec_prefix=$prefix;;
prefix) exec_prefix=$prefix;;
esac
AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR})
case $INITDIR in
prefix) INITDIR=$prefix;;
"")
AC_MSG_CHECKING(which init (rc) directory to use)
for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \
/usr/local/etc/rc.d /etc/rc.d
do
if
test -d $initdir
then
INITDIR=$initdir
break
fi
done
if
test -z $INITDIR
then
INITDIR=${sysconfdir}/init.d
fi
AC_MSG_RESULT($INITDIR);;
esac
AC_SUBST(INITDIR)
if test "${prefix}" = "/usr"; then
INITDIRPREFIX="$INITDIR"
else
INITDIRPREFIX="${prefix}/$INITDIR"
fi
AC_SUBST(INITDIRPREFIX)
AC_MSG_NOTICE(Sanitizing libdir: ${libdir})
case $libdir in
dnl For consistency with Heartbeat, map NONE->$prefix
*prefix*|NONE)
AC_MSG_CHECKING(which lib directory to use)
for aDir in lib64 lib
do
trydir="${exec_prefix}/${aDir}"
if
test -d ${trydir}
then
libdir=${trydir}
break
fi
done
AC_MSG_RESULT($libdir);
;;
esac
if test "x$with_compat_habindir" != "xno" ; then
libexecdir=${libdir}
fi
dnl Expand autoconf variables so that we dont end up with '${prefix}'
dnl in #defines and python scripts
dnl NOTE: Autoconf deliberately leaves them unexpanded to allow
dnl make exec_prefix=/foo install
dnl No longer being able to do this seems like no great loss to me...
eval prefix="`eval echo ${prefix}`"
eval exec_prefix="`eval echo ${exec_prefix}`"
eval bindir="`eval echo ${bindir}`"
eval sbindir="`eval echo ${sbindir}`"
eval libexecdir="`eval echo ${libexecdir}`"
eval datadir="`eval echo ${datadir}`"
eval sysconfdir="`eval echo ${sysconfdir}`"
eval sharedstatedir="`eval echo ${sharedstatedir}`"
eval localstatedir="`eval echo ${localstatedir}`"
eval libdir="`eval echo ${libdir}`"
eval includedir="`eval echo ${includedir}`"
eval oldincludedir="`eval echo ${oldincludedir}`"
eval infodir="`eval echo ${infodir}`"
eval mandir="`eval echo ${mandir}`"
dnl docdir is a recent addition to autotools
eval docdir="`eval echo ${docdir}`"
if test "x$docdir" = "x"; then
docdir="`eval echo ${datadir}/doc`"
fi
AC_SUBST(docdir)
dnl Home-grown variables
eval INITDIR="${INITDIR}"
for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \
sharedstatedir localstatedir libdir includedir oldincludedir infodir \
mandir INITDIR docdir
do
dirname=`eval echo '${'${j}'}'`
if
test ! -d "$dirname"
then
AC_MSG_WARN([$j directory ($dirname) does not exist!])
fi
done
dnl This OS-based decision-making is poor autotools practice;
dnl feature-based mechanisms are strongly preferred.
dnl
dnl So keep this section to a bare minimum; regard as a "necessary evil".
REBOOT_OPTIONS="-f"
POWEROFF_OPTIONS="-f"
case "$host_os" in
*bsd*) LIBS="-L/usr/local/lib"
CPPFLAGS="$CPPFLAGS -I/usr/local/include"
;;
*solaris*)
REBOOT_OPTIONS="-n"
POWEROFF_OPTIONS="-n"
LDFLAGS+=" -lssp -lssp_nonshared"
;;
*linux*)
AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform)
POWEROFF_OPTIONS="-nf"
REBOOT_OPTIONS="-nf"
;;
darwin*)
AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform)
LIBS="$LIBS -L${prefix}/lib"
CFLAGS="$CFLAGS -I${prefix}/include"
;;
esac
AC_DEFINE_UNQUOTED(HA_LOG_FACILITY, LOG_DAEMON, Default logging facility)
AC_MSG_NOTICE(Host CPU: $host_cpu)
case "$host_cpu" in
ppc64|powerpc64)
case $CFLAGS in
*powerpc64*) ;;
*) if test "$GCC" = yes; then
CFLAGS="$CFLAGS -m64"
fi ;;
esac
esac
AC_MSG_CHECKING(which format is needed to print uint64_t)
case "$host_cpu" in
s390x)U64T="%lu";;
*64*) U64T="%lu";;
*) U64T="%llu";;
esac
AC_MSG_RESULT($U64T)
AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t)
dnl Variables needed for substitution
AC_CHECK_HEADERS(heartbeat/glue_config.h)
if test "$ac_cv_header_heartbeat_glue_config_h" != "yes"; then
enable_libnet=no
fi
AC_DEFINE_UNQUOTED(OCF_ROOT_DIR,"$OCF_ROOT_DIR", OCF root directory - specified by the OCF standard)
AC_SUBST(OCF_ROOT_DIR)
GLUE_STATE_DIR=${localstatedir}/run
AC_DEFINE_UNQUOTED(GLUE_STATE_DIR,"$GLUE_STATE_DIR", Where to keep state files and sockets)
AC_SUBST(GLUE_STATE_DIR)
AC_DEFINE_UNQUOTED(HA_VARRUNDIR,"$GLUE_STATE_DIR", Where Heartbeat keeps state files and sockets - old name)
HA_VARRUNDIR="$GLUE_STATE_DIR"
AC_SUBST(HA_VARRUNDIR)
# Expand $prefix
eval HA_RSCTMPDIR="`eval echo ${HA_RSCTMPDIR}`"
AC_DEFINE_UNQUOTED(HA_RSCTMPDIR,"$HA_RSCTMPDIR", Where Resource agents keep state files)
AC_SUBST(HA_RSCTMPDIR)
dnl Eventually move out of the heartbeat dir tree and create symlinks when needed
HA_VARLIBHBDIR=${localstatedir}/lib/heartbeat
AC_DEFINE_UNQUOTED(HA_VARLIBHBDIR,"$HA_VARLIBHBDIR", Whatever this used to mean)
AC_SUBST(HA_VARLIBHBDIR)
OCF_RA_DIR="${OCF_ROOT_DIR}/resource.d"
AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs)
AC_SUBST(OCF_RA_DIR)
OCF_RA_DIR_PREFIX="$OCF_RA_DIR"
AC_SUBST(OCF_RA_DIR_PREFIX)
OCF_LIB_DIR="${OCF_ROOT_DIR}/lib"
AC_DEFINE_UNQUOTED(OCF_LIB_DIR,"$OCF_LIB_DIR", Location for shared code for OCF RAs)
AC_SUBST(OCF_LIB_DIR)
OCF_LIB_DIR_PREFIX="$OCF_LIB_DIR"
AC_SUBST(OCF_LIB_DIR_PREFIX)
dnl ===============================================
dnl rgmanager ras bits
dnl ===============================================
LOGDIR=${localstatedir}/log/cluster
CLUSTERDATA=${datadir}/cluster
AC_SUBST([LOGDIR])
AC_SUBST([CLUSTERDATA])
dnl ===============================================
dnl Program Paths
dnl ===============================================
PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin"
export PATH
AC_CHECK_PROGS(MAKE, gmake make)
AC_CHECK_PROGS(SHELLCHECK, shellcheck)
AM_CONDITIONAL(CI_CHECKS, test "x$SHELLCHECK" != "x" )
AC_PATH_PROGS(BASH_SHELL, bash)
if test x"${BASH_SHELL}" = x""; then
AC_MSG_ERROR(You need bash installed in order to build ${PACKAGE})
fi
AC_PATH_PROGS(XSLTPROC, xsltproc)
AM_CONDITIONAL(BUILD_DOC, test "x$XSLTPROC" != "x" )
if test "x$XSLTPROC" = "x"; then
AC_MSG_WARN([xsltproc not installed, unable to (re-)build manual pages])
fi
AC_SUBST(XSLTPROC)
AC_PATH_PROGS(XMLCATALOG, xmlcatalog)
AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh)
AC_PATH_PROGS(SCP, scp, /usr/bin/scp)
AC_PATH_PROGS(TAR, tar)
AC_PATH_PROGS(MD5, md5)
AC_PATH_PROGS(TEST, test)
AC_PATH_PROGS(PING, ping, /bin/ping)
AC_PATH_PROGS(IFCONFIG, ifconfig, /sbin/ifconfig)
AC_PATH_PROGS(MAILCMD, mailx mail, mail)
-AC_PATH_PROGS(EGREP, egrep)
AC_PATH_PROGS(RM, rm)
+AC_PROG_EGREP
+AC_PROG_FGREP
AC_SUBST(BASH_SHELL)
AC_SUBST(MAILCMD)
-AC_SUBST(EGREP)
AC_SUBST(SHELL)
AC_SUBST(PING)
AC_SUBST(RM)
AC_SUBST(TEST)
AM_PATH_PYTHON([3.6])
if test -z "$PYTHON"; then
echo "*** Essential program python not found" 1>&2
exit 1
fi
dnl Ensure PYTHON is an absolute path
AC_PATH_PROG([PYTHON], [$PYTHON])
AM_PATH_PYTHON
if test -z "$PYTHON"; then
echo "*** Essential program python not found" 1>&2
fi
AC_PYTHON_MODULE(json)
AC_PYTHON_MODULE(pyroute2)
AC_PYTHON_MODULE(requests)
AC_PYTHON_MODULE(urllib3)
AC_PYTHON_MODULE(ibm_cloud_fail_over)
AS_VERSION_COMPARE([$PYTHON_VERSION], [3.6], [BUILD_OCF_PY=0], [BUILD_OCF_PY=1], [BUILD_OCF_PY=1])
BUILD_AZURE_EVENTS=1
if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
BUILD_AZURE_EVENTS=0
AC_MSG_WARN("Not building azure-events")
fi
AM_CONDITIONAL(BUILD_AZURE_EVENTS, test $BUILD_AZURE_EVENTS -eq 1)
BUILD_AZURE_EVENTS_AZ=1
if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
BUILD_AZURE_EVENTS_AZ=0
AC_MSG_WARN("Not building azure-events-az")
fi
AM_CONDITIONAL(BUILD_AZURE_EVENTS_AZ, test $BUILD_AZURE_EVENTS_AZ -eq 1)
BUILD_GCP_PD_MOVE=1
if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
BUILD_GCP_PD_MOVE=0
AC_MSG_WARN("Not building gcp-pd-move")
fi
AM_CONDITIONAL(BUILD_GCP_PD_MOVE, test $BUILD_GCP_PD_MOVE -eq 1)
BUILD_GCP_VPC_MOVE_ROUTE=1
if test -z "$PYTHON" || test "x${HAVE_PYMOD_PYROUTE2}" != xyes || test $BUILD_OCF_PY -eq 0; then
BUILD_GCP_VPC_MOVE_ROUTE=0
AC_MSG_WARN("Not building gcp-vpc-move-route")
fi
AM_CONDITIONAL(BUILD_GCP_VPC_MOVE_ROUTE, test $BUILD_GCP_VPC_MOVE_ROUTE -eq 1)
BUILD_GCP_VPC_MOVE_VIP=1
if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
BUILD_GCP_VPC_MOVE_VIP=0
AC_MSG_WARN("Not building gcp-vpc-move-vip")
fi
AM_CONDITIONAL(BUILD_GCP_VPC_MOVE_VIP, test $BUILD_GCP_VPC_MOVE_VIP -eq 1)
BUILD_POWERVS_SUBNET=1
if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0 || test "x${HAVE_PYMOD_REQUESTS}" != xyes || test "x${HAVE_PYMOD_URLLIB3}" != xyes; then
BUILD_POWERVS_SUBNET=0
AC_MSG_WARN("Not building powervs-subnet")
fi
AM_CONDITIONAL(BUILD_POWERVS_SUBNET, test $BUILD_POWERVS_SUBNET -eq 1)
BUILD_IBM_CLOUD_VPC_MOVE_ROUTE=1
if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0 || test "x${HAVE_PYMOD_IBM_CLOUD_FAIL_OVER}" != xyes; then
BUILD_IBM_CLOUD_VPC_MOVE_ROUTE=0
AC_MSG_WARN("Not building ibm-cloud-vpc-cr-vip")
fi
AM_CONDITIONAL(BUILD_IBM_CLOUD_VPC_MOVE_ROUTE, test $BUILD_IBM_CLOUD_VPC_MOVE_ROUTE -eq 1)
AC_PATH_PROGS(ROUTE, route)
AC_DEFINE_UNQUOTED(ROUTE, "$ROUTE", path to route command)
AC_MSG_CHECKING(ifconfig option to list interfaces)
for IFCONFIG_A_OPT in "-A" "-a" ""
do
$IFCONFIG $IFCONFIG_A_OPT > /dev/null 2>&1
if
test "$?" = 0
then
AC_DEFINE_UNQUOTED(IFCONFIG_A_OPT, "$IFCONFIG_A_OPT", option for ifconfig command)
AC_MSG_RESULT($IFCONFIG_A_OPT)
break
fi
done
AC_SUBST(IFCONFIG_A_OPT)
if test x"${MAKE}" = x""; then
AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE})
fi
STYLESHEET_PREFIX=""
if test x"${XSLTPROC}" != x""; then
AC_MSG_CHECKING(docbook to manpage transform)
# first try to figure out correct template using xmlcatalog query,
# resort to extensive (semi-deterministic) file search if that fails
DOCBOOK_XSL_URI='http://docbook.sourceforge.net/release/xsl/current'
DOCBOOK_XSL_PATH='manpages/docbook.xsl'
STYLESHEET_PREFIX=$(${XMLCATALOG} "" ${DOCBOOK_XSL_URI} \
| sed -n 's|^file://||p;q')
if test x"${STYLESHEET_PREFIX}" = x""; then
DIRS=$(find "${datadir}" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \
-type d | LC_ALL=C sort)
if test x"${DIRS}" = x""; then
# when datadir is not standard OS path, we cannot find docbook.xsl
# use standard OS path as backup
DIRS=$(find "/usr/share" "/usr/local/share" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \
-type d | LC_ALL=C sort)
fi
XSLT=$(basename ${DOCBOOK_XSL_PATH})
for d in ${DIRS}; do
if test -f "${d}/${XSLT}"; then
STYLESHEET_PREFIX=$(echo "${d}" | sed 's/\/manpages//')
break
fi
done
fi
if test x"${STYLESHEET_PREFIX}" = x""; then
AC_MSG_ERROR(You need docbook-style-xsl installed in order to build ${PACKAGE})
fi
fi
AC_MSG_RESULT($STYLESHEET_PREFIX)
AC_SUBST(STYLESHEET_PREFIX)
dnl ===============================================
dnl Libraries
dnl ===============================================
AC_CHECK_LIB(socket, socket)
AC_CHECK_LIB(gnugetopt, getopt_long) dnl if available
if test "x${enable_thread_safe}" = "xyes"; then
GPKGNAME="gthread-2.0"
else
GPKGNAME="glib-2.0"
fi
PKG_CHECK_MODULES([GLIB], [$GPKGNAME])
CPPFLAGS="$CPPFLAGS $GLIB_CFLAGS"
LIBS="$LIBS $GLIB_LIBS"
PKG_CHECK_MODULES([LIBQB], "libqb")
dnl ========================================================================
dnl Headers
dnl ========================================================================
AC_HEADER_STDC
AC_CHECK_HEADERS(sys/socket.h)
AC_CHECK_HEADERS(sys/sockio.h)
AC_CHECK_HEADERS([arpa/inet.h])
AC_CHECK_HEADERS([fcntl.h])
AC_CHECK_HEADERS([limits.h])
AC_CHECK_HEADERS([malloc.h])
AC_CHECK_HEADERS([netdb.h])
AC_CHECK_HEADERS([netinet/in.h])
AC_CHECK_HEADERS([sys/file.h])
AC_CHECK_HEADERS([sys/ioctl.h])
AC_CHECK_HEADERS([sys/param.h])
AC_CHECK_HEADERS([sys/time.h])
AC_CHECK_HEADERS([syslog.h])
dnl ========================================================================
dnl Functions
dnl ========================================================================
AC_FUNC_FORK
AC_FUNC_STRNLEN
AC_CHECK_FUNCS([alarm gettimeofday inet_ntoa memset mkdir socket uname])
AC_CHECK_FUNCS([strcasecmp strchr strdup strerror strrchr strspn strstr strtol strtoul])
AC_PATH_PROGS(REBOOT, reboot, /sbin/reboot)
AC_SUBST(REBOOT)
AC_SUBST(REBOOT_OPTIONS)
AC_DEFINE_UNQUOTED(REBOOT, "$REBOOT", path to the reboot command)
AC_DEFINE_UNQUOTED(REBOOT_OPTIONS, "$REBOOT_OPTIONS", reboot options)
AC_PATH_PROGS(POWEROFF_CMD, poweroff, /sbin/poweroff)
AC_SUBST(POWEROFF_CMD)
AC_SUBST(POWEROFF_OPTIONS)
AC_DEFINE_UNQUOTED(POWEROFF_CMD, "$POWEROFF_CMD", path to the poweroff command)
AC_DEFINE_UNQUOTED(POWEROFF_OPTIONS, "$POWEROFF_OPTIONS", poweroff options)
AC_PATH_PROGS(POD2MAN, pod2man)
AM_CONDITIONAL(BUILD_POD_DOC, test "x$POD2MAN" != "x" )
if test "x$POD2MAN" = "x"; then
AC_MSG_WARN([pod2man not installed, unable to (re-)build ldirector manual page])
fi
AC_SUBST(POD2MAN)
dnl ========================================================================
dnl Functions
dnl ========================================================================
AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function]))
dnl ========================================================================
dnl sfex
dnl ========================================================================
build_sfex=no
case $host_os in
*Linux*|*linux*)
if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then
build_sfex=yes
fi
;;
esac
AM_CONDITIONAL(BUILD_SFEX, test "$build_sfex" = "yes" )
dnl ========================================================================
dnl tickle (needs port to BSD platforms)
dnl ========================================================================
AC_CHECK_MEMBERS([struct iphdr.saddr],,,[[#include <netinet/ip.h>]])
AM_CONDITIONAL(BUILD_TICKLE, test "$ac_cv_member_struct_iphdr_saddr" = "yes" )
dnl ========================================================================
dnl libnet
dnl ========================================================================
libnet=""
libnet_version="none"
LIBNETLIBS=""
LIBNETDEFINES=""
AC_MSG_CHECKING(if libnet is required)
libnet_fatal=$enable_libnet
case $enable_libnet in
no) ;;
yes|libnet10|libnet11|10|11) libnet_fatal=yes;;
try)
case $host_os in
*Linux*|*linux*) libnet_fatal=no;;
*) libnet_fatal=yes;; dnl legacy behavior
esac
;;
*) libnet_fatal=yes; enable_libnet=try;;
esac
AC_MSG_RESULT($libnet_fatal)
if test "x$enable_libnet" != "xno"; then
AC_PATH_PROGS(LIBNETCONFIG, libnet-config)
AC_CHECK_LIB(nsl, t_open) dnl -lnsl
AC_CHECK_LIB(socket, socket) dnl -lsocket
AC_CHECK_LIB(net, libnet_get_hwaddr, LIBNETLIBS=" -lnet", [])
fi
AC_MSG_CHECKING(for libnet)
if test "x$LIBNETLIBS" != "x" -o "x$enable_libnet" = "xlibnet11"; then
LIBNETDEFINES=""
if test "$ac_cv_lib_nsl_t_open" = yes; then
LIBNETLIBS="-lnsl $LIBNETLIBS"
fi
if test "$ac_cv_lib_socket_socket" = yes; then
LIBNETLIBS="-lsocket $LIBNETLIBS"
fi
libnet=net
libnet_version="libnet1.1"
fi
if test "x$enable_libnet" = "xtry" -o "x$enable_libnet" = "xlibnet10"; then
if test "x$LIBNETLIBS" = x -a "x${LIBNETCONFIG}" != "x" ; then
LIBNETDEFINES="`$LIBNETCONFIG --defines` `$LIBNETCONFIG --cflags`";
LIBNETLIBS="`$LIBNETCONFIG --libs`";
libnet_version="libnet1.0 (old)"
case $LIBNETLIBS in
*-l*) libnet=`echo $LIBNETLIBS | sed 's%.*-l%%'`;;
*) libnet_version=none;;
esac
CPPFLAGS="$CPPFLAGS $LIBNETDEFINES"
AC_CHECK_HEADERS(libnet.h)
if test "$ac_cv_header_libnet_h" = no; then
libnet_version=none
fi
fi
fi
AC_MSG_RESULT(found $libnet_version)
if test "$libnet_version" = none; then
LIBNETLIBS=""
LIBNETDEFINES=""
if test $libnet_fatal = yes; then
AC_MSG_ERROR(libnet not found)
fi
else
AC_CHECK_LIB($libnet,libnet_init,
[new_libnet=yes; AC_DEFINE(HAVE_LIBNET_1_1_API, 1, Libnet 1.1 API)],
[new_libnet=no; AC_DEFINE(HAVE_LIBNET_1_0_API, 1, Libnet 1.0 API)],$LIBNETLIBS)
AC_SUBST(LIBNETLIBS)
fi
if test "$new_libnet" = yes; then
AC_MSG_CHECKING(for libnet API 1.1.4: )
save_CFLAGS="$CFLAGS"
CFLAGS="$CFLAGS -fgnu89-inline -Wall -Werror"
AC_COMPILE_IFELSE([
AC_LANG_SOURCE(#include <libnet.h>
int main(){libnet_t *l=NULL; libnet_pblock_record_ip_offset(l, l->total_size); return(0); })],
[AC_MSG_RESULT(no)],
[AC_DEFINE(HAVE_LIBNET_1_1_4_API, 1, Libnet 1.1.4 API) AC_MSG_RESULT(yes)])
CFLAGS="$save_CFLAGS"
fi
sendarp_linux=0
case $host_os in
*Linux*|*linux*) sendarp_linux=1;;
esac
redhat_based=0
AC_CHECK_FILE(/etc/redhat-release, [redhat_based=1])
AC_SUBST(LIBNETLIBS)
AC_SUBST(LIBNETDEFINES)
AM_CONDITIONAL(SENDARP_LINUX, test $sendarp_linux = 1 )
AM_CONDITIONAL(USE_LIBNET, test "x$libnet_version" != "xnone" )
AM_CONDITIONAL(NFSCONVERT, test $redhat_based = 1 )
dnl ************************************************************************
dnl * Check for netinet/icmp6.h to enable the IPv6addr resource agent
AC_CHECK_HEADERS(netinet/icmp6.h,[],[],[#include <sys/types.h>])
AM_CONDITIONAL(USE_IPV6ADDR_AGENT, test "$ac_cv_header_netinet_icmp6_h" = yes && test "$ac_cv_header_heartbeat_glue_config_h" = yes)
AM_CONDITIONAL(IPV6ADDR_COMPATIBLE, test "$ac_cv_header_netinet_icmp6_h" = yes)
dnl ========================================================================
dnl Compiler flags
dnl ========================================================================
dnl Make sure that CFLAGS is not exported. If the user did
dnl not have CFLAGS in their environment then this should have
dnl no effect. However if CFLAGS was exported from the user's
dnl environment, then the new CFLAGS will also be exported
dnl to sub processes.
CC_ERRORS=""
CC_EXTRAS=""
if export -p | fgrep " CFLAGS=" > /dev/null; then
SAVED_CFLAGS="$CFLAGS"
unset CFLAGS
CFLAGS="$SAVED_CFLAGS"
unset SAVED_CFLAGS
fi
if test "$GCC" != yes; then
CFLAGS="$CFLAGS -g"
enable_fatal_warnings=no
else
CFLAGS="$CFLAGS -ggdb3"
# We had to eliminate -Wnested-externs because of libtool changes
# Also remove -Waggregate-return because we use one libnet
# call which returns a struct
EXTRA_FLAGS="-fgnu89-inline
-fstack-protector-all
-Wall
-Wbad-function-cast
-Wcast-qual
-Wdeclaration-after-statement
-Wendif-labels
-Wfloat-equal
-Wformat=2
-Wformat-security
-Wformat-nonliteral
-Winline
-Wmissing-prototypes
-Wmissing-declarations
-Wmissing-format-attribute
-Wnested-externs
-Wno-long-long
-Wno-strict-aliasing
-Wpointer-arith
-Wstrict-prototypes
-Wunsigned-char
-Wwrite-strings
-Wno-maybe-uninitialized"
# Additional warnings it might be nice to enable one day
# -Wshadow
# -Wunreachable-code
for j in $EXTRA_FLAGS
do
if
cc_supports_flag $j
then
CC_EXTRAS="$CC_EXTRAS $j"
fi
done
dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x
GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'`
AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4)
dnl System specific options
case "$host_os" in
*linux*|*bsd*)
if test "${enable_fatal_warnings}" = "unknown"; then
enable_fatal_warnings=yes
fi
;;
esac
if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then
enable_fatal_warnings=yes
else
enable_fatal_warnings=no
fi
if test "x${enable_ansi}" = xyes && cc_supports_flag -std=iso9899:199409; then
AC_MSG_NOTICE(Enabling ANSI Compatibility)
CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY"
fi
AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS})
fi
CFLAGS="$CFLAGS $CC_EXTRAS"
NON_FATAL_CFLAGS="$CFLAGS"
AC_SUBST(NON_FATAL_CFLAGS)
dnl
dnl We reset CFLAGS to include our warnings *after* all function
dnl checking goes on, so that our warning flags don't keep the
dnl AC_*FUNCS() calls above from working. In particular, -Werror will
dnl *always* cause us troubles if we set it before here.
dnl
dnl
if test "x${enable_fatal_warnings}" = xyes ; then
AC_MSG_NOTICE(Enabling Fatal Warnings)
CFLAGS="$CFLAGS -Werror"
fi
AC_SUBST(CFLAGS)
dnl This is useful for use in Makefiles that need to remove one specific flag
CFLAGS_COPY="$CFLAGS"
AC_SUBST(CFLAGS_COPY)
AC_SUBST(LOCALE)
AC_SUBST(CC)
AC_SUBST(MAKE)
dnl The Makefiles and shell scripts we output
AC_CONFIG_FILES(Makefile \
resource-agents.pc \
include/Makefile \
heartbeat/Makefile \
heartbeat/ocf-binaries \
heartbeat/ocf-directories \
heartbeat/ocf-shellfuncs \
heartbeat/shellfuncs \
systemd/Makefile \
systemd/resource-agents.conf \
tools/Makefile \
tools/nfsconvert \
tools/ocf-tester \
tools/ocft/Makefile \
tools/ocft/ocft \
tools/ocft/caselib \
tools/ocft/README \
tools/ocft/README.zh_CN \
ldirectord/Makefile \
ldirectord/ldirectord \
ldirectord/init.d/Makefile \
ldirectord/init.d/ldirectord \
ldirectord/init.d/ldirectord.debian \
ldirectord/init.d/ldirectord.debian.default \
ldirectord/systemd/Makefile \
ldirectord/systemd/ldirectord.service \
ldirectord/logrotate.d/Makefile \
ldirectord/OCF/Makefile \
ldirectord/OCF/ldirectord \
doc/Makefile \
doc/man/Makefile \
rgmanager/Makefile \
rgmanager/src/Makefile \
rgmanager/src/resources/Makefile \
rgmanager/src/resources/ocf-shellfuncs \
rgmanager/src/resources/svclib_nfslock \
rgmanager/src/resources/lvm_by_lv.sh \
rgmanager/src/resources/lvm_by_vg.sh \
rgmanager/src/resources/utils/Makefile \
rgmanager/src/resources/utils/fs-lib.sh \
rgmanager/src/resources/utils/messages.sh \
rgmanager/src/resources/utils/config-utils.sh \
rgmanager/src/resources/utils/member_util.sh \
rgmanager/src/resources/utils/ra-skelet.sh \
)
dnl Files we output that need to be executable
AC_CONFIG_FILES([heartbeat/azure-events], [chmod +x heartbeat/azure-events])
AC_CONFIG_FILES([heartbeat/azure-events-az], [chmod +x heartbeat/azure-events-az])
AC_CONFIG_FILES([heartbeat/AoEtarget], [chmod +x heartbeat/AoEtarget])
AC_CONFIG_FILES([heartbeat/ManageRAID], [chmod +x heartbeat/ManageRAID])
AC_CONFIG_FILES([heartbeat/ManageVE], [chmod +x heartbeat/ManageVE])
AC_CONFIG_FILES([heartbeat/Squid], [chmod +x heartbeat/Squid])
AC_CONFIG_FILES([heartbeat/SysInfo], [chmod +x heartbeat/SysInfo])
AC_CONFIG_FILES([heartbeat/aws-vpc-route53], [chmod +x heartbeat/aws-vpc-route53])
AC_CONFIG_FILES([heartbeat/clvm], [chmod +x heartbeat/clvm])
AC_CONFIG_FILES([heartbeat/conntrackd], [chmod +x heartbeat/conntrackd])
AC_CONFIG_FILES([heartbeat/dnsupdate], [chmod +x heartbeat/dnsupdate])
AC_CONFIG_FILES([heartbeat/dummypy], [chmod +x heartbeat/dummypy])
AC_CONFIG_FILES([heartbeat/eDir88], [chmod +x heartbeat/eDir88])
AC_CONFIG_FILES([heartbeat/fio], [chmod +x heartbeat/fio])
AC_CONFIG_FILES([heartbeat/galera], [chmod +x heartbeat/galera])
AC_CONFIG_FILES([heartbeat/gcp-pd-move], [chmod +x heartbeat/gcp-pd-move])
AC_CONFIG_FILES([heartbeat/gcp-vpc-move-ip], [chmod +x heartbeat/gcp-vpc-move-ip])
AC_CONFIG_FILES([heartbeat/gcp-vpc-move-vip], [chmod +x heartbeat/gcp-vpc-move-vip])
AC_CONFIG_FILES([heartbeat/gcp-vpc-move-route], [chmod +x heartbeat/gcp-vpc-move-route])
AC_CONFIG_FILES([heartbeat/ibm-cloud-vpc-cr-vip], [chmod +x heartbeat/ibm-cloud-vpc-cr-vip])
AC_CONFIG_FILES([heartbeat/iSCSILogicalUnit], [chmod +x heartbeat/iSCSILogicalUnit])
AC_CONFIG_FILES([heartbeat/iSCSITarget], [chmod +x heartbeat/iSCSITarget])
AC_CONFIG_FILES([heartbeat/jira], [chmod +x heartbeat/jira])
AC_CONFIG_FILES([heartbeat/kamailio], [chmod +x heartbeat/kamailio])
AC_CONFIG_FILES([heartbeat/lxc], [chmod +x heartbeat/lxc])
AC_CONFIG_FILES([heartbeat/lxd-info], [chmod +x heartbeat/lxd-info])
AC_CONFIG_FILES([heartbeat/machine-info], [chmod +x heartbeat/machine-info])
AC_CONFIG_FILES([heartbeat/mariadb], [chmod +x heartbeat/mariadb])
AC_CONFIG_FILES([heartbeat/mpathpersist], [chmod +x heartbeat/mpathpersist])
AC_CONFIG_FILES([heartbeat/nfsnotify], [chmod +x heartbeat/nfsnotify])
AC_CONFIG_FILES([heartbeat/openstack-info], [chmod +x heartbeat/openstack-info])
AC_CONFIG_FILES([heartbeat/powervs-subnet], [chmod +x heartbeat/powervs-subnet])
AC_CONFIG_FILES([heartbeat/rabbitmq-cluster], [chmod +x heartbeat/rabbitmq-cluster])
AC_CONFIG_FILES([heartbeat/redis], [chmod +x heartbeat/redis])
AC_CONFIG_FILES([heartbeat/rsyslog], [chmod +x heartbeat/rsyslog])
AC_CONFIG_FILES([heartbeat/smb-share], [chmod +x heartbeat/smb-share])
AC_CONFIG_FILES([heartbeat/sg_persist], [chmod +x heartbeat/sg_persist])
AC_CONFIG_FILES([heartbeat/slapd], [chmod +x heartbeat/slapd])
AC_CONFIG_FILES([heartbeat/storage-mon], [chmod +x heartbeat/storage-mon])
AC_CONFIG_FILES([heartbeat/sybaseASE], [chmod +x heartbeat/sybaseASE])
AC_CONFIG_FILES([heartbeat/syslog-ng], [chmod +x heartbeat/syslog-ng])
AC_CONFIG_FILES([heartbeat/vsftpd], [chmod +x heartbeat/vsftpd])
AC_CONFIG_FILES([heartbeat/CTDB], [chmod +x heartbeat/CTDB])
AC_CONFIG_FILES([rgmanager/src/resources/ASEHAagent.sh], [chmod +x rgmanager/src/resources/ASEHAagent.sh])
AC_CONFIG_FILES([rgmanager/src/resources/apache.sh], [chmod +x rgmanager/src/resources/apache.sh])
AC_CONFIG_FILES([rgmanager/src/resources/bind-mount.sh], [chmod +x rgmanager/src/resources/bind-mount.sh])
AC_CONFIG_FILES([rgmanager/src/resources/clusterfs.sh], [chmod +x rgmanager/src/resources/clusterfs.sh])
AC_CONFIG_FILES([rgmanager/src/resources/db2.sh], [chmod +x rgmanager/src/resources/db2.sh])
AC_CONFIG_FILES([rgmanager/src/resources/drbd.sh], [chmod +x rgmanager/src/resources/drbd.sh])
AC_CONFIG_FILES([rgmanager/src/resources/fs.sh], [chmod +x rgmanager/src/resources/fs.sh])
AC_CONFIG_FILES([rgmanager/src/resources/ip.sh], [chmod +x rgmanager/src/resources/ip.sh])
AC_CONFIG_FILES([rgmanager/src/resources/lvm.sh], [chmod +x rgmanager/src/resources/lvm.sh])
AC_CONFIG_FILES([rgmanager/src/resources/mysql.sh], [chmod +x rgmanager/src/resources/mysql.sh])
AC_CONFIG_FILES([rgmanager/src/resources/named.sh], [chmod +x rgmanager/src/resources/named.sh])
AC_CONFIG_FILES([rgmanager/src/resources/netfs.sh], [chmod +x rgmanager/src/resources/netfs.sh])
AC_CONFIG_FILES([rgmanager/src/resources/nfsclient.sh], [chmod +x rgmanager/src/resources/nfsclient.sh])
AC_CONFIG_FILES([rgmanager/src/resources/nfsexport.sh], [chmod +x rgmanager/src/resources/nfsexport.sh])
AC_CONFIG_FILES([rgmanager/src/resources/nfsserver.sh], [chmod +x rgmanager/src/resources/nfsserver.sh])
AC_CONFIG_FILES([rgmanager/src/resources/openldap.sh], [chmod +x rgmanager/src/resources/openldap.sh])
AC_CONFIG_FILES([rgmanager/src/resources/oracledb.sh], [chmod +x rgmanager/src/resources/oracledb.sh])
AC_CONFIG_FILES([rgmanager/src/resources/oradg.sh], [chmod +x rgmanager/src/resources/oradg.sh])
AC_CONFIG_FILES([rgmanager/src/resources/orainstance.sh], [chmod +x rgmanager/src/resources/orainstance.sh])
AC_CONFIG_FILES([rgmanager/src/resources/oralistener.sh], [chmod +x rgmanager/src/resources/oralistener.sh])
AC_CONFIG_FILES([rgmanager/src/resources/postgres-8.sh], [chmod +x rgmanager/src/resources/postgres-8.sh])
AC_CONFIG_FILES([rgmanager/src/resources/samba.sh], [chmod +x rgmanager/src/resources/samba.sh])
AC_CONFIG_FILES([rgmanager/src/resources/script.sh], [chmod +x rgmanager/src/resources/script.sh])
AC_CONFIG_FILES([rgmanager/src/resources/service.sh], [chmod +x rgmanager/src/resources/service.sh])
AC_CONFIG_FILES([rgmanager/src/resources/smb.sh], [chmod +x rgmanager/src/resources/smb.sh])
AC_CONFIG_FILES([rgmanager/src/resources/tomcat-5.sh], [chmod +x rgmanager/src/resources/tomcat-5.sh])
AC_CONFIG_FILES([rgmanager/src/resources/tomcat-6.sh], [chmod +x rgmanager/src/resources/tomcat-6.sh])
AC_CONFIG_FILES([rgmanager/src/resources/vm.sh], [chmod +x rgmanager/src/resources/vm.sh])
dnl Now process the entire list of files added by previous
dnl calls to AC_CONFIG_FILES()
AC_OUTPUT()
dnl *****************
dnl Configure summary
dnl *****************
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE configuration:])
AC_MSG_RESULT([ Version = ${VERSION}])
AC_MSG_RESULT([ Build Version = $Format:%H$])
AC_MSG_RESULT([])
AC_MSG_RESULT([ Prefix = ${prefix}])
AC_MSG_RESULT([ Executables = ${sbindir}])
AC_MSG_RESULT([ Man pages = ${mandir}])
AC_MSG_RESULT([ Libraries = ${libdir}])
AC_MSG_RESULT([ Header files = ${includedir}])
AC_MSG_RESULT([ Arch-independent files = ${datadir}])
AC_MSG_RESULT([ Documentation = ${docdir}])
AC_MSG_RESULT([ State information = ${localstatedir}])
AC_MSG_RESULT([ System configuration = ${sysconfdir}])
AC_MSG_RESULT([ HA_BIN directory prefix = ${libexecdir}])
AC_MSG_RESULT([ RA state files = ${HA_RSCTMPDIR}])
AC_MSG_RESULT([ AIS Plugins = ${LCRSODIR}])
AC_MSG_RESULT([])
AC_MSG_RESULT([ CPPFLAGS = ${CPPFLAGS}])
AC_MSG_RESULT([ CFLAGS = ${CFLAGS}])
AC_MSG_RESULT([ Libraries = ${LIBS}])
AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}])
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 27cae2d11..489826b81 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -1,1386 +1,1386 @@
#!/bin/sh
#
# $Id: IPaddr2.in,v 1.24 2006/08/09 13:01:54 lars Exp $
#
# OCF Resource Agent compliant IPaddr2 script.
#
# Based on work by Tuomo Soini, ported to the OCF RA API by Lars
# Marowsky-Brée. Implements Cluster Alias IP functionality too.
#
# Cluster Alias IP cleanup, fixes and testing by Michael Schwartzkopff
#
#
# Copyright (c) 2003 Tuomo Soini
# Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brée
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#
# TODO:
# - There ought to be an ocf_run_cmd function which does all logging,
# timeout handling etc for us
# - Make this the standard IP address agent on Linux; the other
# platforms simply should ignore the additional parameters OR can use
# the legacy heartbeat resource script...
# - Check LVS <-> clusterip incompatibilities.
#
# OCF parameters are as below
# OCF_RESKEY_ip
# OCF_RESKEY_broadcast
# OCF_RESKEY_nic
# OCF_RESKEY_cidr_netmask
# OCF_RESKEY_iflabel
# OCF_RESKEY_mac
# OCF_RESKEY_clusterip_hash
# OCF_RESKEY_arp_interval
# OCF_RESKEY_arp_count
# OCF_RESKEY_arp_bg
# OCF_RESKEY_preferred_lft
#
# OCF_RESKEY_CRM_meta_clone
# OCF_RESKEY_CRM_meta_clone_max
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/findif.sh
# Defaults
OCF_RESKEY_ip_default=""
OCF_RESKEY_cidr_netmask_default=""
OCF_RESKEY_broadcast_default=""
OCF_RESKEY_proto_default=""
OCF_RESKEY_iflabel_default=""
OCF_RESKEY_lvs_support_default=false
OCF_RESKEY_lvs_ipv6_addrlabel_default=true
OCF_RESKEY_lvs_ipv6_addrlabel_value_default=99
OCF_RESKEY_clusterip_hash_default="sourceip-sourceport"
OCF_RESKEY_mac_default=""
OCF_RESKEY_unique_clone_address_default=false
OCF_RESKEY_arp_interval_default=200
OCF_RESKEY_arp_count_default=5
OCF_RESKEY_arp_count_refresh_default=0
OCF_RESKEY_arp_bg_default=""
OCF_RESKEY_arp_sender_default=""
OCF_RESKEY_send_arp_opts_default=""
OCF_RESKEY_flush_routes_default="false"
OCF_RESKEY_run_arping_default=false
OCF_RESKEY_nodad_default=false
OCF_RESKEY_noprefixroute_default="false"
OCF_RESKEY_preferred_lft_default="forever"
OCF_RESKEY_network_namespace_default=""
: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}}
: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}}
: ${OCF_RESKEY_broadcast=${OCF_RESKEY_broadcast_default}}
: ${OCF_RESKEY_proto=${OCF_RESKEY_proto_default}}
: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}}
: ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}}
: ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}}
: ${OCF_RESKEY_lvs_ipv6_addrlabel_value=${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}}
: ${OCF_RESKEY_clusterip_hash=${OCF_RESKEY_clusterip_hash_default}}
: ${OCF_RESKEY_mac=${OCF_RESKEY_mac_default}}
: ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}}
: ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}}
: ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}}
: ${OCF_RESKEY_arp_count_refresh=${OCF_RESKEY_arp_count_refresh_default}}
: ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}}
: ${OCF_RESKEY_arp_sender=${OCF_RESKEY_arp_sender_default}}
: ${OCF_RESKEY_send_arp_opts=${OCF_RESKEY_send_arp_opts_default}}
: ${OCF_RESKEY_flush_routes=${OCF_RESKEY_flush_routes_default}}
: ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}}
: ${OCF_RESKEY_nodad=${OCF_RESKEY_nodad_default}}
: ${OCF_RESKEY_noprefixroute=${OCF_RESKEY_noprefixroute_default}}
: ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}}
: ${OCF_RESKEY_network_namespace=${OCF_RESKEY_network_namespace_default}}
#######################################################################
[ -z "$OCF_RESKEY_proto" ] && proto="" || proto="proto $OCF_RESKEY_proto"
SENDARP=$HA_BIN/send_arp
SENDUA=$HA_BIN/send_ua
FINDIF=findif
VLDIR=$HA_RSCTMP
SENDARPPIDDIR=$HA_RSCTMP
CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip}
IPADDR2_CIP_IPTABLES=$IPTABLES
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="IPaddr2" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This Linux-specific resource manages IP alias IP addresses.
It can add an IP alias, or remove one.
In addition, it can implement Cluster Alias IP functionality
if invoked as a clone resource.
If used as a clone, "shared address with a trivial, stateless
(autonomous) load-balancing/mutual exclusion on ingress" mode gets
applied (as opposed to "assume resource uniqueness" mode otherwise).
For that, Linux firewall (kernel and userspace) is assumed, and since
recent distributions are ambivalent in plain "iptables" command to
particular back-end resolution, "iptables-legacy" (when present) gets
prioritized so as to avoid incompatibilities (note that respective
ipt_CLUSTERIP firewall extension in use here is, at the same time,
marked deprecated, yet said "legacy" layer can make it workable,
literally, to this day) with "netfilter" one (as in "iptables-nft").
In that case, you should explicitly set clone-node-max &gt;= 2,
and/or clone-max &lt; number of nodes. In case of node failure,
clone instances need to be re-allocated on surviving nodes.
This would not be possible if there is already an instance
on those nodes, and clone-node-max=1 (which is the default).
When the specified IP address gets assigned to a respective interface, the
resource agent sends unsolicited ARP (Address Resolution Protocol, IPv4) or NA
(Neighbor Advertisement, IPv6) packets to inform neighboring machines about the
change. This functionality is controlled for both IPv4 and IPv6 by shared
'arp_*' parameters.
</longdesc>
<shortdesc lang="en">Manages virtual IPv4 and IPv6 addresses (Linux specific version)</shortdesc>
<parameters>
<parameter name="ip" unique="1" required="1">
<longdesc lang="en">
The IPv4 (dotted quad notation) or IPv6 address (colon hexadecimal notation)
example IPv4 "192.168.1.1".
example IPv6 "2001:db8:DC28:0:0:FC57:D4C8:1FFF".
</longdesc>
<shortdesc lang="en">IPv4 or IPv6 address</shortdesc>
<content type="string" default="${OCF_RESKEY_ip_default}" />
</parameter>
<parameter name="nic" unique="0">
<longdesc lang="en">
The base network interface on which the IP address will be brought
online.
If left empty, the script will try and determine this from the
routing table.
Do NOT specify an alias interface in the form eth0:1 or anything here;
rather, specify the base interface only.
If you want a label, see the iflabel parameter.
Prerequisite:
There must be at least one static IP address, which is not managed by
the cluster, assigned to the network interface.
If you can not assign any static IP address on the interface,
modify this kernel parameter:
sysctl -w net.ipv4.conf.all.promote_secondaries=1 # (or per device)
</longdesc>
<shortdesc lang="en">Network interface</shortdesc>
<content type="string"/>
</parameter>
<parameter name="cidr_netmask">
<longdesc lang="en">
The netmask for the interface in CIDR format
(e.g., 24 and not 255.255.255.0)
If unspecified, the script will also try to determine this from the
routing table.
</longdesc>
<shortdesc lang="en">CIDR netmask</shortdesc>
<content type="string" default="${OCF_RESKEY_cidr_netmask_default}"/>
</parameter>
<parameter name="broadcast">
<longdesc lang="en">
Broadcast address associated with the IP. It is possible to use the
special symbols '+' and '-' instead of the broadcast address. In this
case, the broadcast address is derived by setting/resetting the host
bits of the interface prefix.
</longdesc>
<shortdesc lang="en">Broadcast address</shortdesc>
<content type="string" default="${OCF_RESKEY_broadcast_default}"/>
</parameter>
<parameter name="proto">
<longdesc lang="en">
Proto to match when finding network. E.g. "kernel".
</longdesc>
<shortdesc lang="en">Proto</shortdesc>
<content type="string" default="${OCF_RESKEY_proto_default}" />
</parameter>
<parameter name="iflabel">
<longdesc lang="en">
You can specify an additional label for your IP address here.
This label is appended to your interface name.
The kernel allows alphanumeric labels up to a maximum length of 15
characters including the interface name and colon (e.g. eth0:foobar1234)
A label can be specified in nic parameter but it is deprecated.
If a label is specified in nic name, this parameter has no effect.
</longdesc>
<shortdesc lang="en">Interface label</shortdesc>
<content type="string" default="${OCF_RESKEY_iflabel_default}"/>
</parameter>
<parameter name="lvs_support">
<longdesc lang="en">
Enable support for LVS Direct Routing configurations. In case a IP
address is stopped, only move it to the loopback device to allow the
local node to continue to service requests, but no longer advertise it
on the network.
Notes for IPv6:
It is not necessary to enable this option on IPv6.
Instead, enable 'lvs_ipv6_addrlabel' option for LVS-DR usage on IPv6.
</longdesc>
<shortdesc lang="en">Enable support for LVS DR</shortdesc>
<content type="boolean" default="${OCF_RESKEY_lvs_support_default}"/>
</parameter>
<parameter name="lvs_ipv6_addrlabel">
<longdesc lang="en">
Enable adding IPv6 address label so IPv6 traffic originating from
the address's interface does not use this address as the source.
This is necessary for LVS-DR health checks to realservers to work. Without it,
the most recently added IPv6 address (probably the address added by IPaddr2)
will be used as the source address for IPv6 traffic from that interface and
since that address exists on loopback on the realservers, the realserver
response to pings/connections will never leave its loopback.
See RFC3484 for the detail of the source address selection.
See also 'lvs_ipv6_addrlabel_value' parameter.
</longdesc>
<shortdesc lang="en">Enable adding IPv6 address label.</shortdesc>
<content type="boolean" default="${OCF_RESKEY_lvs_ipv6_addrlabel_default}"/>
</parameter>
<parameter name="lvs_ipv6_addrlabel_value">
<longdesc lang="en">
Specify IPv6 address label value used when 'lvs_ipv6_addrlabel' is enabled.
The value should be an unused label in the policy table
which is shown by 'ip addrlabel list' command.
You would rarely need to change this parameter.
</longdesc>
<shortdesc lang="en">IPv6 address label value.</shortdesc>
<content type="integer" default="${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}"/>
</parameter>
<parameter name="mac">
<longdesc lang="en">
Set the interface MAC address explicitly. Currently only used in case of
the Cluster IP Alias. Leave empty to chose automatically.
</longdesc>
<shortdesc lang="en">Cluster IP MAC address</shortdesc>
<content type="string" default="${OCF_RESKEY_mac_default}"/>
</parameter>
<parameter name="clusterip_hash">
<longdesc lang="en">
Specify the hashing algorithm used for the Cluster IP functionality.
</longdesc>
<shortdesc lang="en">Cluster IP hashing function</shortdesc>
<content type="string" default="${OCF_RESKEY_clusterip_hash_default}"/>
</parameter>
<parameter name="unique_clone_address">
<longdesc lang="en">
If true, add the clone ID to the supplied value of IP to create
a unique address to manage
</longdesc>
<shortdesc lang="en">Create a unique address for cloned instances</shortdesc>
<content type="boolean" default="${OCF_RESKEY_unique_clone_address_default}"/>
</parameter>
<parameter name="arp_interval">
<longdesc lang="en">
Specify the interval between unsolicited ARP (IPv4) or NA (IPv6) packets in
milliseconds.
This parameter is deprecated and used for the backward compatibility only.
It is effective only for the send_arp binary which is built with libnet,
and send_ua for IPv6. It has no effect for other arp_sender.
</longdesc>
<shortdesc lang="en">ARP/NA packet interval in ms (deprecated)</shortdesc>
<content type="integer" default="${OCF_RESKEY_arp_interval_default}"/>
</parameter>
<parameter name="arp_count">
<longdesc lang="en">
Number of unsolicited ARP (IPv4) or NA (IPv6) packets to send at resource
initialization.
</longdesc>
<shortdesc lang="en">ARP/NA packet count sent during initialization</shortdesc>
<content type="integer" default="${OCF_RESKEY_arp_count_default}"/>
</parameter>
<parameter name="arp_count_refresh">
<longdesc lang="en">
For IPv4, number of unsolicited ARP packets to send during resource monitoring.
Doing so helps mitigate issues of stuck ARP caches resulting from split-brain
situations.
</longdesc>
<shortdesc lang="en">ARP packet count sent during monitoring</shortdesc>
<content type="integer" default="${OCF_RESKEY_arp_count_refresh_default}"/>
</parameter>
<parameter name="arp_bg">
<longdesc lang="en">
Whether or not to send the ARP (IPv4) or NA (IPv6) packets in the background.
The default is true for IPv4 and false for IPv6.
</longdesc>
<shortdesc lang="en">ARP/NA from background</shortdesc>
<content type="string" default="${OCF_RESKEY_arp_bg_default}"/>
</parameter>
<parameter name="arp_sender">
<longdesc lang="en">
For IPv4, the program to send ARP packets with on start. Available options are:
- send_arp: default
- ipoibarping: default for infiniband interfaces if ipoibarping is available
- iputils_arping: use arping in iputils package
- libnet_arping: use another variant of arping based on libnet
</longdesc>
<shortdesc lang="en">ARP sender</shortdesc>
<content type="string" default="${OCF_RESKEY_arp_sender_default}"/>
</parameter>
<parameter name="send_arp_opts">
<longdesc lang="en">
For IPv4, extra options to pass to the arp_sender program.
Available options are vary depending on which arp_sender is used.
A typical use case is specifying '-A' for iputils_arping to use
ARP REPLY instead of ARP REQUEST as Gratuitous ARPs.
</longdesc>
<shortdesc lang="en">Options for ARP sender</shortdesc>
<content type="string" default="${OCF_RESKEY_send_arp_opts_default}"/>
</parameter>
<parameter name="flush_routes">
<longdesc lang="en">
Flush the routing table on stop. This is for
applications which use the cluster IP address
and which run on the same physical host that the
IP address lives on. The Linux kernel may force that
application to take a shortcut to the local loopback
interface, instead of the interface the address
is really bound to. Under those circumstances, an
application may, somewhat unexpectedly, continue
to use connections for some time even after the
IP address is deconfigured. Set this parameter in
order to immediately disable said shortcut when the
IP address goes away.
</longdesc>
<shortdesc lang="en">Flush kernel routing table on stop</shortdesc>
<content type="boolean" default="${OCF_RESKEY_flush_routes_default}"/>
</parameter>
<parameter name="run_arping">
<longdesc lang="en">
For IPv4, whether or not to run arping for collision detection check.
</longdesc>
<shortdesc lang="en">Run arping for IPv4 collision detection check</shortdesc>
<content type="string" default="${OCF_RESKEY_run_arping_default}"/>
</parameter>
<parameter name="nodad">
<longdesc lang="en">
For IPv6, do not perform Duplicate Address Detection when adding the address.
</longdesc>
<shortdesc lang="en">Use nodad flag</shortdesc>
<content type="string" default="${OCF_RESKEY_nodad_default}"/>
</parameter>
<parameter name="noprefixroute">
<longdesc lang="en">
Use noprefixroute flag (see 'man ip-address').
</longdesc>
<shortdesc lang="en">Use noprefixroute flag</shortdesc>
<content type="string" default="${OCF_RESKEY_noprefixroute_default}"/>
</parameter>
<parameter name="preferred_lft">
<longdesc lang="en">
For IPv6, set the preferred lifetime of the IP address.
This can be used to ensure that the created IP address will not
be used as a source address for routing.
Expects a value as specified in section 5.5.4 of RFC 4862.
</longdesc>
<shortdesc lang="en">IPv6 preferred lifetime</shortdesc>
<content type="string" default="${OCF_RESKEY_preferred_lft_default}"/>
</parameter>
<parameter name="network_namespace">
<longdesc lang="en">
Specifies the network namespace to operate within.
The namespace must already exist, and the interface to be used must be within
the namespace.
</longdesc>
<shortdesc lang="en">Network namespace to use</shortdesc>
<content type="string" default="${OCF_RESKEY_network_namespace_default}"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="status" depth="0" timeout="20s" interval="10s" />
<action name="monitor" depth="0" timeout="20s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
ip_init() {
local rc
if [ X`uname -s` != "XLinux" ]; then
ocf_exit_reason "IPaddr2 only supported Linux."
exit $OCF_ERR_INSTALLED
fi
if [ X"$OCF_RESKEY_ip" = "X" ] && [ "$__OCF_ACTION" != "stop" ]; then
ocf_exit_reason "IP address (the ip parameter) is mandatory"
exit $OCF_ERR_CONFIGURED
fi
if
case $__OCF_ACTION in
start|stop) ocf_is_root;;
*) true;;
esac
then
: YAY!
else
ocf_exit_reason "You must be root for $__OCF_ACTION operation."
exit $OCF_ERR_PERM
fi
BASEIP="$OCF_RESKEY_ip"
BRDCAST="$OCF_RESKEY_broadcast"
NIC="$OCF_RESKEY_nic"
# Note: We had a version out there for a while which used
# netmask instead of cidr_netmask. Don't remove this aliasing code!
if
[ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ]
then
OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask
export OCF_RESKEY_cidr_netmask
fi
NETMASK="$OCF_RESKEY_cidr_netmask"
IFLABEL="$OCF_RESKEY_iflabel"
IF_MAC="$OCF_RESKEY_mac"
IP_INC_GLOBAL=${OCF_RESKEY_CRM_meta_clone_max:-1}
IP_INC_NO=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + 1`
if ocf_is_true ${OCF_RESKEY_lvs_support} && [ $IP_INC_GLOBAL -gt 1 ]; then
ocf_exit_reason "LVS and load sharing do not go together well"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then
:
else
ocf_exit_reason "Invalid meta-attribute clone_max [$IP_INC_GLOBAL], should be positive integer"
exit $OCF_ERR_CONFIGURED
fi
echo $OCF_RESKEY_ip | grep -qs ":"
if [ $? -ne 0 ];then
FAMILY=inet
if [ -z "$OCF_RESKEY_arp_bg" ]; then
OCF_RESKEY_arp_bg=true
fi
else
FAMILY=inet6
# address sanitization defined in RFC5952
SANITIZED_IP=$($IP2UTIL route get $OCF_RESKEY_ip 2> /dev/null | awk '$1~/:/ {print $1} $2~/:/ {print $2}')
if [ -n "$SANITIZED_IP" ]; then
OCF_RESKEY_ip="$SANITIZED_IP"
fi
if ocf_is_true $OCF_RESKEY_lvs_support ;then
ocf_exit_reason "The IPv6 does not support lvs_support"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then
if ocf_is_decimal "$OCF_RESKEY_lvs_ipv6_addrlabel_value" && [ $OCF_RESKEY_lvs_ipv6_addrlabel_value -ge 0 ]; then
:
else
ocf_exit_reason "Invalid lvs_ipv6_addrlabel_value [$OCF_RESKEY_lvs_ipv6_addrlabel_value], should be positive integer"
exit $OCF_ERR_CONFIGURED
fi
fi
if [ -z "$OCF_RESKEY_arp_bg" ]; then
OCF_RESKEY_arp_bg=false
fi
fi
# support nic:iflabel format in nic parameter
case $NIC in
*:*)
IFLABEL=`echo $NIC | sed 's/[^:]*://'`
NIC=`echo $NIC | sed 's/:.*//'`
# only the base name should be passed to findif
OCF_RESKEY_nic=$NIC
;;
esac
# $FINDIF takes its parameters from the environment
#
NICINFO=`$FINDIF`
rc=$?
if
[ $rc -eq 0 ]
then
NICINFO=`echo "$NICINFO" | sed -e 's/netmask\ //;s/broadcast\ //;s/metric\ //'`
NIC=`echo "$NICINFO" | cut -d" " -f1`
NETMASK=`echo "$NICINFO" | cut -d" " -f2`
BRDCAST=`echo "$NICINFO" | cut -d" " -f3`
METRIC=`echo "$NICINFO" | cut -d" " -f4`
else
# findif couldn't find the interface
if ocf_is_probe; then
ocf_log info "[$FINDIF] failed"
exit $OCF_NOT_RUNNING
elif [ "$__OCF_ACTION" = stop ]; then
ocf_log warn "[$FINDIF] failed"
exit $OCF_SUCCESS
else
ocf_exit_reason "[$FINDIF] failed"
exit $rc
fi
fi
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
if [ -n "$IFLABEL" ]; then
IFLABEL=${NIC}:${IFLABEL}
if [ ${#IFLABEL} -gt 15 ]; then
ocf_exit_reason "Interface label [$IFLABEL] exceeds maximum character limit of 15"
exit $OCF_ERR_CONFIGURED
fi
fi
if [ "$IP_INC_GLOBAL" -gt 1 ] && ! ocf_is_true "$OCF_RESKEY_unique_clone_address"; then
IP_CIP="yes"
IP_CIP_HASH="${OCF_RESKEY_clusterip_hash}"
if [ -z "$IF_MAC" ]; then
# Choose a MAC
# 1. Concatenate some input together
# 2. This doesn't need to be a cryptographically
# secure hash.
# 3. Drop everything after the first 6 octets (12 chars)
# 4. Delimit the octets with ':'
# 5. Make sure the first octet is odd,
# so the result is a multicast MAC
IF_MAC=`echo $OCF_RESKEY_ip $NETMASK $BRDCAST | \
md5sum | \
sed -e 's#\(............\).*#\1#' \
-e 's#..#&:#g; s#:$##' \
-e 's#^\(.\)[02468aAcCeE]#\11#'`
fi
IP_CIP_FILE="/proc/net/ipt_CLUSTERIP/$OCF_RESKEY_ip"
fi
}
#
# Find out which interfaces serve the given IP address and netmask.
# The arguments are an IP address and a netmask.
# Its output are interface names devided by spaces (e.g., "eth0 eth1").
#
find_interface() {
local ipaddr="$1"
local netmask="$2"
#
# List interfaces but exclude FreeS/WAN ipsecN virtual interfaces
#
local iface="`$IP2UTIL -o -f $FAMILY addr show \
| grep " $ipaddr/$netmask" \
| cut -d ' ' -f2 \
| grep -v '^ipsec[0-9][0-9]*$'`"
echo "$iface"
return 0
}
#
# Delete an interface
#
delete_interface () {
ipaddr="$1"
iface="$2"
netmask="$3"
CMD="$IP2UTIL -f $FAMILY addr delete $ipaddr/$netmask dev $iface"
ocf_run $CMD || return $OCF_ERR_GENERIC
if ocf_is_true $OCF_RESKEY_flush_routes; then
ocf_run $IP2UTIL route flush cache
fi
if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then
delete_ipv6_addrlabel $ipaddr
fi
return $OCF_SUCCESS
}
#
# Add an interface
#
add_interface () {
local cmd msg extra_opts ipaddr netmask broadcast iface label metric
ipaddr="$1"
netmask="$2"
broadcast="$3"
iface="$4"
label="$5"
metric="$6"
if [ "$FAMILY" = "inet" ] && ocf_is_true $OCF_RESKEY_run_arping &&
check_binary arping; then
arping -q -c 2 -w 3 -D -I $iface $ipaddr
if [ $? = 1 ]; then
ocf_log err "IPv4 address collision $ipaddr [DAD]"
return $OCF_ERR_GENERIC
fi
fi
if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then
add_ipv6_addrlabel $ipaddr
fi
cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask dev $iface"
msg="Adding $FAMILY address $ipaddr/$netmask to device $iface"
if [ "$broadcast" != "none" ]; then
cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask brd $broadcast dev $iface"
msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface"
fi
extra_opts=""
if [ "$FAMILY" = "inet6" ] && [ -n "$metric" ]; then
extra_opts="$extra_opts metric $metric"
fi
if [ "$FAMILY" = "inet6" ] && ocf_is_true "${OCF_RESKEY_nodad}"; then
extra_opts="$extra_opts nodad"
fi
if ocf_is_true "${OCF_RESKEY_noprefixroute}"; then
extra_opts="$extra_opts noprefixroute"
fi
if [ ! -z "$label" ]; then
extra_opts="$extra_opts label $label"
fi
if [ "$FAMILY" = "inet6" ] ;then
extra_opts="$extra_opts preferred_lft $OCF_RESKEY_preferred_lft"
fi
if [ -n "$extra_opts" ]; then
cmd="$cmd$extra_opts"
msg="$msg (with$extra_opts)"
fi
ocf_log info "$msg"
ocf_run $cmd || return $OCF_ERR_GENERIC
msg="Bringing device $iface up"
cmd="$IP2UTIL link set dev $iface up"
ocf_log info "$msg"
ocf_run $cmd || return $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
#
# Delete a route
#
delete_route () {
prefix="$1"
iface="$2"
CMD="$IP2UTIL route delete $prefix dev $iface"
ocf_log info "$CMD"
$CMD
return $?
}
# On Linux systems the (hidden) loopback interface may
# conflict with the requested IP address. If so, this
# unoriginal code will remove the offending loopback address
# and save it in VLDIR so it can be added back in later
# when the IPaddr is released.
#
# TODO: This is very ugly and should be controlled by an additional
# instance parameter. Or even: multi-state, with the IP only being
# "active" on the master!?
#
remove_conflicting_loopback() {
ipaddr="$1"
netmask="$2"
broadcast="$3"
ifname="$4"
ocf_log info "Removing conflicting loopback $ifname."
if
echo "$ipaddr $netmask $broadcast $ifname" > "$VLDIR/$ipaddr"
then
: Saved loopback information in $VLDIR/$ipaddr
else
ocf_log err "Could not save conflicting loopback $ifname." \
"it will not be restored."
fi
delete_interface "$ipaddr" "$ifname" "$netmask"
# Forcibly remove the route (if it exists) to the loopback.
delete_route "$ipaddr" "$ifname"
}
#
# On Linux systems the (hidden) loopback interface may
# need to be restored if it has been taken down previously
# by remove_conflicting_loopback()
#
restore_loopback() {
ipaddr="$1"
if [ -s "$VLDIR/$ipaddr" ]; then
ifinfo=`cat "$VLDIR/$ipaddr"`
ocf_log info "Restoring loopback IP Address " \
"$ifinfo."
add_interface $ifinfo
rm -f "$VLDIR/$ipaddr"
fi
}
add_ipv6_addrlabel() {
local cmd ipaddr value
ipaddr="$1"
value="$OCF_RESKEY_lvs_ipv6_addrlabel_value"
cmd="$IP2UTIL addrlabel add prefix $ipaddr label $value"
ocf_log info "Adding IPv6 address label prefix $ipaddr label $value"
ocf_run $cmd || ocf_log warn "$cmd failed."
}
delete_ipv6_addrlabel() {
local cmd ipaddr value
ipaddr="$1"
value="$OCF_RESKEY_lvs_ipv6_addrlabel_value"
cmd="$IP2UTIL addrlabel del prefix $ipaddr label $value"
ocf_run $cmd # an error can be ignored
}
is_infiniband() {
$IP2UTIL link show $NIC | grep link/infiniband >/dev/null
}
log_arp_sender() {
local cmdline
local output
local rc
cmdline="$@"
output=$($cmdline 2>&1)
rc=$?
if [ $rc -ne 0 ] && \
[ "$ARP_SENDER" != "libnet_arping" ] ; then
# libnet_arping always return an error as no answers
ocf_log err "Could not send gratuitous arps: rc=$rc"
fi
ocf_log $LOGLEVEL "$output"
}
# wrapper function to manage PID file to run arping in background
run_with_pidfile() {
local cmdline
local pid
local rc
cmdline="$@"
$cmdline &
pid=$!
echo "$pid" > $SENDARPPIDFILE
wait $pid
rc=$?
rm -f $SENDARPPIDFILE
return $rc
}
build_arp_sender_cmd() {
case "$ARP_SENDER" in
send_arp)
if [ "x$IP_CIP" = "xyes" ] ; then
if [ x = "x$IF_MAC" ] ; then
MY_MAC=auto
else
# send_arp.linux should return without doing anything in this case
MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'`
fi
else
MY_MAC=auto
fi
ARGS="$OCF_RESKEY_send_arp_opts -i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used"
ARP_SENDER_CMD="$SENDARP $ARGS"
;;
iputils_arping)
ARGS="$OCF_RESKEY_send_arp_opts -U -c $ARP_COUNT -I $NIC $OCF_RESKEY_ip"
ARP_SENDER_CMD="run_with_pidfile arping $ARGS"
;;
libnet_arping)
ARGS="$OCF_RESKEY_send_arp_opts -U -c $ARP_COUNT -i $NIC -S $OCF_RESKEY_ip $OCF_RESKEY_ip"
ARP_SENDER_CMD="run_with_pidfile arping $ARGS"
;;
ipoibarping)
ARGS="-q -c $ARP_COUNT -U -I $NIC $OCF_RESKEY_ip"
ARP_SENDER_CMD="ipoibarping $ARGS"
;;
*)
# should not occur
ocf_exit_reason "unrecognized arp_sender value: $ARP_SENDER"
exit $OCF_ERR_GENERIC
;;
esac
}
#
# Send Unsolicited ARPs to update neighbor's ARP cache
#
run_arp_sender() {
if [ "x$1" = "xrefresh" ] ; then
ARP_COUNT=$OCF_RESKEY_arp_count_refresh
LOGLEVEL=debug
else
ARP_COUNT=$OCF_RESKEY_arp_count
LOGLEVEL=info
fi
if [ $ARP_COUNT -eq 0 ] ; then
return
fi
# do not need to send Gratuitous ARPs in the Cluster IP configuration
# except send_arp.libnet binary to retain the old behavior
if [ "x$IP_CIP" = "xyes" ] && \
[ "x$ARP_SENDER" != "xsend_arp" ] ; then
ocf_log info "Gratuitous ARPs are not sent in the Cluster IP configuration"
return
fi
# prepare arguments for each arp sender program
# $ARP_SENDER_CMD should be set
build_arp_sender_cmd
ocf_log $LOGLEVEL "$ARP_SENDER_CMD"
if ocf_is_true $OCF_RESKEY_arp_bg; then
log_arp_sender $ARP_SENDER_CMD &
else
log_arp_sender $ARP_SENDER_CMD
fi
}
log_send_ua() {
local cmdline
local output
local rc
cmdline="$@"
output=$($cmdline 2>&1)
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements: rc=$rc"
fi
ocf_log info "$output"
return $rc
}
#
# Run send_ua to note send ICMPv6 Unsolicited Neighbor Advertisements.
#
run_send_ua() {
local i
# Duplicate Address Detection [DAD]
# Kernel will flag the IP as 'tentative' until it ensured that
# there is no duplicates.
# If there is, it will flag it as 'dadfailed'
for i in $(seq 1 10); do
ipstatus=$($IP2UTIL -o -f $FAMILY addr show dev $NIC to $OCF_RESKEY_ip/$NETMASK)
case "$ipstatus" in
*dadfailed*)
ocf_log err "IPv6 address collision $OCF_RESKEY_ip [DAD]"
$IP2UTIL -f $FAMILY addr del dev $NIC $OCF_RESKEY_ip/$NETMASK
if [ $? -ne 0 ]; then
ocf_log err "Could not delete IPv6 address"
fi
return $OCF_ERR_GENERIC
;;
*tentative*)
if [ $i -eq 10 ]; then
ocf_log warn "IPv6 address : DAD is still in tentative"
fi
;;
*)
break
;;
esac
sleep 1
done
# Now the address should be usable
ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC"
ocf_log info "$SENDUA $ARGS"
if ocf_is_true $OCF_RESKEY_arp_bg; then
log_send_ua $SENDUA $ARGS &
else
log_send_ua $SENDUA $ARGS
fi
}
# Do we already serve this IP address on the given $NIC?
#
# returns:
# ok = served (for CIP: + hash bucket)
# partial = served and no hash bucket (CIP only)
# partial2 = served and no CIP iptables rule
# partial3 = served with no label
# no = nothing
#
ip_served() {
if [ -z "$NIC" ]; then # no nic found or specified
echo "no"
return 0
fi
cur_nic="`find_interface $OCF_RESKEY_ip $NETMASK`"
if [ -z "$cur_nic" ]; then
echo "no"
return 0
fi
if [ -z "$IP_CIP" ]; then
for i in $cur_nic; do
# check address label
if [ -n "$IFLABEL" ] && [ -z "`$IP2UTIL -o -f $FAMILY addr show $nic label $IFLABEL`" ]; then
echo partial3
return 0
fi
# only mark as served when on the same interfaces as $NIC
[ "$i" = "$NIC" ] || continue
echo "ok"
return 0
done
# There used to be logic here to pretend "not served",
# if ${OCF_RESKEY_lvs_support} was enabled, and the IP was
# found active on "lo*" only. With lvs_support on, you should
# have NIC != lo, so thats already filtered
# by the continue above.
echo "no"
return 0
fi
# Special handling for the CIP:
if [ ! -e $IP_CIP_FILE ]; then
echo "partial2"
return 0
fi
- if egrep -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then
+ if $EGREP -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then
echo "ok"
return 0
else
echo "partial"
return 0
fi
exit $OCF_ERR_GENERIC
}
#######################################################################
ip_usage() {
cat <<END
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
ip_start() {
if [ -z "$NIC" ]; then
ocf_exit_reason "No nic found or specified"
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$IP_CIP" ]; then
# Cluster IPs need special processing when the first bucket
# is added to the node... take a lock to make sure only one
# process executes that code
ocf_take_lock $CIP_lockfile
ocf_release_lock_on_exit $CIP_lockfile
fi
#
# Do we already service this IP address on $NIC?
#
local ip_status=`ip_served`
if [ "$ip_status" = "ok" ]; then
exit $OCF_SUCCESS
fi
if [ "$ip_status" = "partial3" ]; then
ocf_exit_reason "IP $OCF_RESKEY_ip available, but label missing"
exit $OCF_ERR_GENERIC
fi
if [ -n "$IP_CIP" ] && ([ $ip_status = "no" ] || [ $ip_status = "partial2" ]); then
$MODPROBE ip_conntrack
$IPADDR2_CIP_IPTABLES -I INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \
--new \
--clustermac $IF_MAC \
--total-nodes $IP_INC_GLOBAL \
--local-node $IP_INC_NO \
--hashmode $IP_CIP_HASH
if [ $? -ne 0 ]; then
ocf_exit_reason "iptables failed"
exit $OCF_ERR_GENERIC
fi
fi
if [ -n "$IP_CIP" ] && [ $ip_status = "partial" ]; then
echo "+$IP_INC_NO" >$IP_CIP_FILE
fi
if [ "$ip_status" = "no" ]; then
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
for i in `find_interface $OCF_RESKEY_ip 32`; do
case $i in
lo*)
remove_conflicting_loopback $OCF_RESKEY_ip 32 255.255.255.255 lo
;;
esac
done
fi
add_interface "$OCF_RESKEY_ip" "$NETMASK" "${BRDCAST:-none}" "$NIC" "$IFLABEL" "$METRIC"
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
ocf_exit_reason "Failed to add $OCF_RESKEY_ip"
exit $rc
fi
ip_status=`ip_served`
if [ "$ip_status" != "ok" ]; then
ocf_exit_reason "Failed to add $OCF_RESKEY_ip with error $ip_status"
exit $OCF_ERR_GENERIC
fi
fi
case $NIC in
lo*)
: no need to run send_arp on loopback
;;
*)
if [ $FAMILY = "inet" ];then
run_arp_sender
else
if [ -x $SENDUA ]; then
run_send_ua
if [ $? -ne 0 ]; then
ocf_exit_reason "run_send_ua failed."
exit $OCF_ERR_GENERIC
fi
fi
fi
;;
esac
exit $OCF_SUCCESS
}
ip_stop() {
local ip_del_if="yes"
if [ -n "$IP_CIP" ]; then
# Cluster IPs need special processing when the last bucket
# is removed from the node... take a lock to make sure only one
# process executes that code
ocf_take_lock $CIP_lockfile
ocf_release_lock_on_exit $CIP_lockfile
fi
if [ -f "$SENDARPPIDFILE" ] ; then
kill `cat "$SENDARPPIDFILE"`
if [ $? -ne 0 ]; then
ocf_log warn "Could not kill previously running send_arp for $OCF_RESKEY_ip"
else
ocf_log info "killed previously running send_arp for $OCF_RESKEY_ip"
fi
rm -f "$SENDARPPIDFILE"
fi
local ip_status=`ip_served`
ocf_log info "IP status = $ip_status, IP_CIP=$IP_CIP"
if [ $ip_status = "no" ]; then
: Requested interface not in use
exit $OCF_SUCCESS
fi
if [ -n "$IP_CIP" ] && [ $ip_status != "partial2" ]; then
if [ $ip_status = "partial" ]; then
exit $OCF_SUCCESS
fi
echo "-$IP_INC_NO" >$IP_CIP_FILE
if [ "x$(cat $IP_CIP_FILE)" = "x" ]; then
ocf_log info $OCF_RESKEY_ip, $IP_CIP_HASH
i=1
while [ $i -le $IP_INC_GLOBAL ]; do
ocf_log info $i
$IPADDR2_CIP_IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \
--new \
--clustermac $IF_MAC \
--total-nodes $IP_INC_GLOBAL \
--local-node $i \
--hashmode $IP_CIP_HASH
i=`expr $i + 1`
done
else
ip_del_if="no"
fi
fi
if [ "$ip_del_if" = "yes" ]; then
delete_interface $OCF_RESKEY_ip $NIC $NETMASK
if [ $? -ne 0 ]; then
ocf_exit_reason "Unable to remove IP [${OCF_RESKEY_ip} from interface [ $NIC ]"
exit $OCF_ERR_GENERIC
fi
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
restore_loopback "$OCF_RESKEY_ip"
fi
fi
exit $OCF_SUCCESS
}
ip_monitor() {
# TODO: Implement more elaborate monitoring like checking for
# interface health maybe via a daemon like FailSafe etc...
local ip_status=`ip_served`
case $ip_status in
ok)
run_arp_sender refresh
return $OCF_SUCCESS
;;
no)
exit $OCF_NOT_RUNNING
;;
*)
# Errors on this interface?
return $OCF_ERR_GENERIC
;;
esac
}
# make sure that we have something to send ARPs with
set_send_arp_program() {
ARP_SENDER=send_arp
if [ -n "$OCF_RESKEY_arp_sender" ]; then
case "$OCF_RESKEY_arp_sender" in
send_arp)
check_binary $SENDARP
;;
iputils_arping)
check_binary arping
;;
libnet_arping)
check_binary arping
;;
ipoibarping)
check_binary ipoibarping
;;
*)
ocf_exit_reason "unrecognized arp_sender value: $OCF_RESKEY_arp_sender"
exit $OCF_ERR_CONFIGURED
;;
esac
ARP_SENDER="$OCF_RESKEY_arp_sender"
else
if is_infiniband; then
ARP_SENDER=ipoibarping
if ! have_binary ipoibarping; then
[ "$__OCF_ACTION" = start ] &&
ocf_log warn "using send_arp for infiniband because ipoibarping is not available (set arp_sender to \"send_arp\" to suppress this message)"
check_binary $SENDARP
ARP_SENDER=send_arp
fi
fi
fi
}
ip_validate() {
check_binary $IP2UTIL
IP_CIP=
if [ -n "$OCF_RESKEY_network_namespace" ]; then
OCF_RESKEY_network_namespace= exec $IP2UTIL netns exec "$OCF_RESKEY_network_namespace" "$0" "$__OCF_ACTION"
fi
ip_init
set_send_arp_program
if [ -n "$IP_CIP" ]; then
if have_binary "$IPTABLES_LEGACY"; then
IPADDR2_CIP_IPTABLES="$IPTABLES_LEGACY"
fi
check_binary "$IPADDR2_CIP_IPTABLES"
check_binary $MODPROBE
fi
# $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init,
# do not bother here.
if ocf_is_true "$OCF_RESKEY_unique_clone_address" &&
! ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
ocf_exit_reason "unique_clone_address makes sense only with meta globally_unique set"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_decimal "$OCF_RESKEY_arp_interval" && [ $OCF_RESKEY_arp_interval -gt 0 ]; then
:
else
ocf_exit_reason "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_decimal "$OCF_RESKEY_arp_count" && [ $OCF_RESKEY_arp_count -gt 0 ]; then
:
else
ocf_exit_reason "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]"
exit $OCF_ERR_CONFIGURED
fi
if [ -z "$OCF_RESKEY_preferred_lft" ]; then
ocf_exit_reason "Empty value is invalid for OCF_RESKEY_preferred_lft"
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$IP_CIP" ]; then
local valid=1
case $IP_CIP_HASH in
sourceip|sourceip-sourceport|sourceip-sourceport-destport)
;;
*)
ocf_exit_reason "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]"
exit $OCF_ERR_CONFIGURED
;;
esac
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
ocf_exit_reason "LVS and load sharing not advised to try"
exit $OCF_ERR_CONFIGURED
fi
case $IF_MAC in
[0-9a-zA-Z][13579bBdDfF][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z])
;;
*)
valid=0
;;
esac
if [ $valid -eq 0 ]; then
ocf_exit_reason "Invalid IF_MAC [$IF_MAC]"
exit $OCF_ERR_CONFIGURED
fi
fi
}
if ocf_is_true "$OCF_RESKEY_unique_clone_address"; then
prefix=`echo $OCF_RESKEY_ip | awk -F. '{print $1"."$2"."$3}'`
suffix=`echo $OCF_RESKEY_ip | awk -F. '{print $4}'`
suffix=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + $suffix`
OCF_RESKEY_ip="$prefix.$suffix"
fi
case $__OCF_ACTION in
meta-data) meta_data
;;
usage|help) ip_usage
exit $OCF_SUCCESS
;;
esac
ip_validate
case $__OCF_ACTION in
start) ip_start
;;
stop) ip_stop
;;
status) ip_status=`ip_served`
if [ $ip_status = "ok" ]; then
echo "running"
exit $OCF_SUCCESS
else
echo "stopped"
exit $OCF_NOT_RUNNING
fi
;;
monitor) ip_monitor
;;
validate-all) ;;
*) ip_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
# vi:sw=4:ts=8:
diff --git a/heartbeat/ManageVE.in b/heartbeat/ManageVE.in
index f07ca5bdc..540addd94 100644
--- a/heartbeat/ManageVE.in
+++ b/heartbeat/ManageVE.in
@@ -1,320 +1,320 @@
#!@BASH_SHELL@
#
# ManageVE OCF RA. Manages OpenVZ Virtual Environments (VEs)
#
# (c) 2006-2010 Matthias Dahl, Florian Haas,
# and Linux-HA contributors
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#
# This OCF compliant resource agent manages OpenVZ VEs and thus requires
# a proper OpenVZ installation including a recent vzctl util.
#
# rev. 1.00.4
#
# Changelog
#
# 21/Oct/10 1.00.4 implement migrate_from/migrate_to
# 12/Sep/06 1.00.3 more cleanup
# 12/Sep/06 1.00.2 fixed some logic in start_ve
# general cleanup all over the place
# 11/Sep/06 1.00.1 fixed some typos
# 07/Sep/06 1.00.0 it's alive... muahaha... ALIVE... :-)
#
###
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Parameter defaults
OCF_RESKEY_veid_default=""
: ${OCF_RESKEY_veid=${OCF_RESKEY_veid_default}}
###
# required utilities
VZCTL=/usr/sbin/vzctl
#
# usage()
#
usage()
{
cat <<-EOF
usage: $0 {start|stop|status|monitor|migrate_from|migrate_to|validate-all|usage|meta-data}
EOF
}
#
# meta_data()
#
meta_data()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ManageVE" version="1.00.4">
<version>1.0</version>
<longdesc lang="en">
This OCF compliant resource agent manages OpenVZ VEs and thus requires
a proper OpenVZ installation including a recent vzctl util.
</longdesc>
<shortdesc lang="en">Manages an OpenVZ Virtual Environment (VE)</shortdesc>
<parameters>
<parameter name="veid" unique="0" required="1">
<longdesc lang="en">
OpenVZ ID of virtual environment (see output of vzlist -a for all assigned IDs)
</longdesc>
<shortdesc lang="en">OpenVZ ID of VE</shortdesc>
<content type="integer" default="${OCF_RESKEY_veid_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="75s" />
<action name="stop" timeout="75s" />
<action name="status" depth="0" timeout="10s" interval="10s" />
<action name="monitor" depth="0" timeout="10s" interval="10s" />
<action name="migrate_to" timeout="75s" />
<action name="migrate_from" timeout="75s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
#
# start_ve()
#
# Starts a VE, or simply logs a message if the VE is already running.
#
start_ve()
{
if status_ve; then
ocf_log info "VE $VEID already running."
return $OCF_SUCCESS
fi
ocf_run $VZCTL start $VEID || exit $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
#
# stop_ve()
#
# ATTENTION: The following code relies on vzctl's exit codes, especially:
#
# 0 : success
#
# In case any of those exit codes change, this function will need fixing.
#
stop_ve()
{
status_ve
if [ $? -eq $OCF_NOT_RUNNING ]; then
ocf_log info "VE $VEID already stopped."
return $OCF_SUCCESS
fi
ocf_run $VZCTL stop $VEID || exit $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
#
# migrate_to_ve()
#
# In the process of a resource migration, checkpoints the VE. For this
# to work, vzctl must obviously create the dump file in a place which
# the migration target has access to (an NFS mount, a DRBD device,
# etc.).
#
migrate_to_ve()
{
if ! status_ve; then
ocf_log err "VE $VEID is not running, aborting"
exit $OCF_ERR_GENERIC
fi
ocf_run $VZCTL chkpnt $VEID || exit $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
#
# migrate_to_ve()
#
# In the process of a resource migration, restores the VE. For this to
# work, vzctl must obviously have access to the dump file which was
# created on the migration source (on an NFS mount, a DRBD device,
# etc.).
#
migrate_from_ve()
{
ocf_run $VZCTL restore $VEID || exit $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
#
# status_ve()
#
# ATTENTION: The following code relies on vzctl's status output. The fifth
# column is interpreted as the VE status (either up or down).
#
# In case the output format should change, this function will need fixing.
#
status_ve()
{
declare -i retcode
veexists=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $3}'`
vestatus=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $5}'`
retcode=$?
if [[ $retcode != 0 ]]; then
# log error only if expected to find running
if [ "$__OCF_ACTION" = "monitor" ] && ! ocf_is_probe; then
ocf_log err "vzctl status $VEID returned: $retcode"
fi
exit $OCF_ERR_GENERIC
fi
if [[ $veexists != "exist" ]]; then
ocf_log err "vzctl status $VEID returned: $VEID does not exist."
return $OCF_NOT_RUNNING
fi
case "$vestatus" in
running)
return $OCF_SUCCESS
;;
down)
return $OCF_NOT_RUNNING
;;
*)
ocf_log err "vzctl status $VEID, wrong output format. (5th column: $vestatus)"
exit $OCF_ERR_GENERIC
;;
esac
}
#
# validate_all_ve()
#
# ATTENTION: The following code relies on vzctl's status output. The fifth
# column is interpreted as the VE status (either up or down).
#
# In case the output format should change, this function will need fixing.
#
validate_all_ve()
{
declare -i retcode
# VEID should be a valid VE
`status_ve`
retcode=$?
if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then
return $retcode
fi
return $OCF_SUCCESS
}
if [[ $# != 1 ]]; then
usage
exit $OCF_ERR_ARGS
fi
case "$1" in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage)
usage
exit $OCF_SUCCESS
;;
*)
;;
esac
#
# check relevant environment variables for sanity and security
#
# empty string?
`test -z "$OCF_RESKEY_veid"`
declare -i veidtest1=$?
# really a number?
-`echo "$OCF_RESKEY_veid" | egrep -q '^[[:digit:]]+$'`
+`echo "$OCF_RESKEY_veid" | $EGREP -q '^[[:digit:]]+$'`
if [[ $veidtest1 != 1 || $? != 0 ]]; then
ocf_log err "OCF_RESKEY_veid not set or not a number."
exit $OCF_ERR_ARGS
fi
declare -i VEID=$OCF_RESKEY_veid
#
# check that all relevant utilities are available
#
check_binary $VZCTL
check_binary $AWK
#
# finally... let's see what we are ordered to do :-)
#
case "$1" in
start)
start_ve
;;
stop)
stop_ve
;;
status|monitor)
status_ve
;;
migrate_to)
migrate_to_ve
;;
migrate_from)
migrate_from_ve
;;
validate-all)
validate_all_ve
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance
index 26fd54136..95140e9c4 100755
--- a/heartbeat/SAPInstance
+++ b/heartbeat/SAPInstance
@@ -1,1076 +1,1076 @@
#!/bin/sh
#
# SAPInstance
#
# Description: Manages a single SAP Instance as a High-Availability
# resource. One SAP Instance is defined by one
# SAP Instance-Profile. start/stop handles all services
# of the START-Profile, status and monitor care only
# about essential services.
#
# Author: Alexander Krauth, June 2006
# Support: linux@sap.com
# License: GNU General Public License (GPL)
# Copyright: (c) 2006-2008 Alexander Krauth
#
# An example usage:
# See usage() function below for more details...
#
# OCF instance parameters:
# OCF_RESKEY_InstanceName
# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default)
# OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default)
# OCF_RESKEY_START_PROFILE (optional, well known directories will be searched by default)
# OCF_RESKEY_START_WAITTIME (optional, to solve timing problems during J2EE-Addin start)
# OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery using cleanipc, default is false)
# OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor critical services only)
# OCF_RESKEY_SHUTDOWN_METHOD (optional, defaults to NORMAL, KILL: terminate the SAP instance with OS commands - faster, at your own risk)
# OCF_RESKEY_ERS_InstanceName (optional, InstanceName of the ERS instance in a Promotable configuration)
# OCF_RESKEY_ERS_START_PROFILE (optional, START_PROFILE of the ERS instance in a Promotable configuration)
# OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started)
# OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started)
# OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped)
# OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped)
# OCF_RESKEY_IS_ERS (needed for ENQ/REPL NW 740)
# OCF_RESKEY_MINIMAL_PROBE (optional but needed for simple mount structure architecure)
#
# TODO: - Option to shutdown sapstartsrv for non-active instances -> that means: do probes only with OS tools (sapinstance_status)
# - Option for better standalone enqueue server monitoring, using ensmon (test enque-deque)
# - Option for cleanup abandoned enqueue replication tables
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Parameter defaults
OCF_RESKEY_InstanceName_default=""
OCF_RESKEY_DIR_EXECUTABLE_default=""
OCF_RESKEY_DIR_PROFILE_default=""
OCF_RESKEY_START_PROFILE_default=""
OCF_RESKEY_START_WAITTIME_default="3600"
OCF_RESKEY_AUTOMATIC_RECOVER_default="false"
OCF_RESKEY_MONITOR_SERVICES_default="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart|enq_server|enq_replicator"
OCF_RESKEY_SHUTDOWN_METHOD_default="normal"
OCF_RESKEY_ERS_InstanceName_default=""
OCF_RESKEY_ERS_START_PROFILE_default=""
OCF_RESKEY_PRE_START_USEREXIT_default=""
OCF_RESKEY_POST_START_USEREXIT_default=""
OCF_RESKEY_PRE_STOP_USEREXIT_default=""
OCF_RESKEY_POST_STOP_USEREXIT_default=""
OCF_RESKEY_IS_ERS_default="false"
OCF_RESKEY_MINIMAL_PROBE_default="false"
: ${OCF_RESKEY_InstanceName=${OCF_RESKEY_InstanceName_default}}
: ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}}
: ${OCF_RESKEY_DIR_PROFILE=${OCF_RESKEY_DIR_PROFILE_default}}
: ${OCF_RESKEY_START_PROFILE=${OCF_RESKEY_START_PROFILE_default}}
: ${OCF_RESKEY_START_WAITTIME=${OCF_RESKEY_START_WAITTIME_default}}
: ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}}
: ${OCF_RESKEY_MONITOR_SERVICES=${OCF_RESKEY_MONITOR_SERVICES_default}}
: ${OCF_RESKEY_SHUTDOWN_METHOD=${OCF_RESKEY_SHUTDOWN_METHOD_default}}
: ${OCF_RESKEY_ERS_InstanceName=${OCF_RESKEY_ERS_InstanceName_default}}
: ${OCF_RESKEY_ERS_START_PROFILE=${OCF_RESKEY_ERS_START_PROFILE_default}}
: ${OCF_RESKEY_PRE_START_USEREXIT=${OCF_RESKEY_PRE_START_USEREXIT_default}}
: ${OCF_RESKEY_POST_START_USEREXIT=${OCF_RESKEY_POST_START_USEREXIT_default}}
: ${OCF_RESKEY_PRE_STOP_USEREXIT=${OCF_RESKEY_PRE_STOP_USEREXIT_default}}
: ${OCF_RESKEY_POST_STOP_USEREXIT=${OCF_RESKEY_POST_STOP_USEREXIT_default}}
: ${OCF_RESKEY_IS_ERS=${OCF_RESKEY_IS_ERS_default}}
: ${OCF_RESKEY_IS_MINIMAL_PROBE=${OCF_RESKEY_IS_MINIMAL_PROBE_default}}
#######################################################################
SH=/bin/sh
sapinstance_usage() {
methods=`sapinstance_methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-EOF
usage: $0 ($methods)
$0 manages a SAP Instance as an HA resource.
The 'start' operation starts the instance or the ERS instance in a Promotable configuration
The 'stop' operation stops the instance
The 'status' operation reports whether the instance is running
The 'monitor' operation reports whether the instance seems to be working
The 'promote' operation starts the primary instance in a Promotable configuration
The 'demote' operation stops the primary instance and starts the ERS instance
The 'reload' operation allows changed parameters (non-unique only) without restarting the service
The 'notify' operation always returns SUCCESS
The 'validate-all' operation reports whether the parameters are valid
The 'methods' operation reports on the methods $0 supports
EOF
}
sapinstance_meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SAPInstance" version="2.14">
<version>1.0</version>
<longdesc lang="en">
Usually a SAP system consists of one database and at least one or more SAP instances (sometimes called application servers). One SAP Instance is defined by having exactly one instance profile. The instance profiles can usually be found in the directory /sapmnt/SID/profile. Each instance must be configured as it's own resource in the cluster configuration.
The resource agent supports the following SAP versions:
- SAP WebAS ABAP Release 6.20 - 7.40
- SAP WebAS Java Release 6.40 - 7.40
- SAP WebAS ABAP + Java Add-In Release 6.20 - 7.40 (Java is not monitored by the cluster in that case)
When using a SAP Kernel 6.40 please check and implement the actions from the section "Manual postprocessing" from SAP note 995116 (http://sdn.sap.com).
Other versions may also work with this agent, but have not been verified.
All operations of the SAPInstance resource agent are done by using the startup framework called SAP Management Console or sapstartsrv that was introduced with SAP kernel release 6.40. Find more information about the SAP Management Console in SAP note 1014480. Using this framework defines a clear interface for the Heartbeat cluster, how it sees the SAP system. The options for monitoring the SAP system are also much better than other methods like just watching the ps command for running processes or doing some pings to the application. sapstartsrv uses SOAP messages to request the status of running SAP processes. Therefore it can actually ask a process itself what it's status is, independent from other problems that might exist at the same time.
sapstartsrv knows 4 status colours:
- GREEN = everything is fine
- YELLOW = something is wrong, but the service is still working
- RED = the service does not work
- GRAY = the service has not been started
The SAPInstance resource agent will interpret GREEN and YELLOW as OK. That means that minor problems will not be reported to the Heartbeat cluster. This prevents the cluster from doing an unwanted failover.
The statuses RED and GRAY are reported as NOT_RUNNING to the cluster. Depending on the status the cluster expects from the resource, it will do a restart, failover or just nothing.
</longdesc>
<shortdesc lang="en">Manages a SAP instance as an HA resource.</shortdesc>
<parameters>
<parameter name="InstanceName" unique="1" required="1">
<longdesc lang="en">The full qualified SAP instance name. e.g. P01_DVEBMGS00_sapp01ci. Usually this is the name of the SAP instance profile.</longdesc>
<shortdesc lang="en">Instance name: SID_INSTANCE_VIR-HOSTNAME</shortdesc>
<content type="string" default="${OCF_RESKEY_InstanceName_default}" />
</parameter>
<parameter name="DIR_EXECUTABLE" unique="0" required="0">
<longdesc lang="en">The full qualified path where to find sapstartsrv and sapcontrol. Specify this parameter, if you have changed the SAP kernel directory location after the default SAP installation.</longdesc>
<shortdesc lang="en">Path of sapstartsrv and sapcontrol</shortdesc>
<content type="string" default="${OCF_RESKEY_DIR_EXECUTABLE_default}" />
</parameter>
<parameter name="DIR_PROFILE" unique="0" required="0">
<longdesc lang="en">The full qualified path where to find the SAP START profile. Specify this parameter, if you have changed the SAP profile directory location after the default SAP installation.</longdesc>
<shortdesc lang="en">Path of start profile</shortdesc>
<content type="string" default="${OCF_RESKEY_DIR_PROFILE_default}" />
</parameter>
<parameter name="START_PROFILE" unique="1" required="0">
<longdesc lang="en">The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than.</longdesc>
<shortdesc lang="en">Start profile name</shortdesc>
<content type="string" default="${OCF_RESKEY_START_PROFILE_default}" />
</parameter>
<parameter name="START_WAITTIME" unique="0" required="0">
<longdesc lang="en">After that time in seconds a monitor operation is executed by the resource agent. Does the monitor return SUCCESS, the start ishandled as SUCCESS. This is useful to resolve timing problems with e.g. the J2EE-Addin instance.Usually the resource agent waits until all services are started and the SAP Management Console reports a GREEN status. A double stack installation (ABAP + Java AddIn) consists of an ABAP dispatcher and a JAVA instance. Normally the start of the JAVA instance takes much longer than the start of the ABAP instance. For a JAVA Instance you may need to configure a much higher timeout for the start operation of the resource in Heartbeat. The disadvantage here is, that the discovery of a failed start by the cluster takes longer. Somebody might say: For me it is important, that the ABAP instance is up and running. A failure of the JAVA instance shall not cause a failover of the SAP instance.
Actually the SAP MC reports a YELLOW status, if the JAVA instance of a double stack system fails. From the resource agent point of view YELLOW means:everything is OK. Setting START_WAITTIME to a lower value determines the resource agent to check the status of the instance during a start operation after that time. As it would wait normally for a GREEN status, now it reports SUCCESS to the cluster in case of a YELLOW status already after the specified time.
That is only useful for double stack systems.
</longdesc>
<shortdesc lang="en">Check the successful start after that time (do not wait for J2EE-Addin)</shortdesc>
<content type="string" default="${OCF_RESKEY_START_WAITTIME_default}" />
</parameter>
<parameter name="AUTOMATIC_RECOVER" unique="0" required="0">
<longdesc lang="en">The SAPInstance resource agent tries to recover a failed start attempt automatically one time. This is done by killing running instance processes, removing the kill.sap file and executing cleanipc. Sometimes a crashed SAP instance leaves some processes and/or shared memory segments behind. Setting this option to true will try to remove those leftovers during a start operation. That is to reduce manual work for the administrator.</longdesc>
<shortdesc lang="en">Enable or disable automatic startup recovery</shortdesc>
<content type="boolean" default="${OCF_RESKEY_AUTOMATIC_RECOVER_default}"/>
</parameter>
<parameter name="MONITOR_SERVICES" unique="0" required="0">
<longdesc lang="en">Within a SAP instance there can be several services. Usually you will find the defined services in the START profile of the related instance (Attention: with SAP Release 7.10 the START profile content was moved to the instance profile). Not all of those services are worth to monitor by the cluster. For example you properly do not like to failover your SAP instance, if the central syslog collector daemon fails.
Those services are monitored within the SAPInstance resource agent:
- disp+work
- msg_server
- enserver (ENSA1)
- enq_server (ENSA2)
- enrepserver (ENSA1)
- enq_replicator (ENSA2)
- jcontrol
- jstart
Some other services could be monitored as well. They have to be
given with the parameter MONITOR_SERVICES, e.g.:
- sapwebdisp
- TREXDaemon.x
That names match the strings used in the output of the command 'sapcontrol -nr [Instance-Nr] -function GetProcessList'.
The default should fit most cases where you want to manage a SAP Instance from the cluster. You may change this with this parameter, if you like to monitor more/less or other services that sapstartsrv supports.
You may specify multiple services separated by a | (pipe) sign in this parameter: disp+work|msg_server|enserver
</longdesc>
<shortdesc lang="en">Services to monitor</shortdesc>
<content type="string" default="${OCF_RESKEY_MONITOR_SERVICES_default}"/>
</parameter>
<parameter name="SHUTDOWN_METHOD" unique="0" required="0">
<longdesc lang="en">Usually a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the graceful stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !!</longdesc>
<shortdesc lang="en">Shutdown graceful or kill a SAP instance by terminating the processes. (normal|KILL)</shortdesc>
<content type="string" default="${OCF_RESKEY_SHUTDOWN_METHOD_default}"/>
</parameter>
<parameter name="ERS_InstanceName" unique="1" required="0">
<longdesc lang="en">Only used in a Promotable resource configuration:
The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile.
The enqueue replication instance must be installed, before you want to configure a promotable cluster resource.
The promotable configuration in the cluster must use this properties:
clone_max = 2
clone_node_max = 1
master_node_max = 1
master_max = 1
</longdesc>
<shortdesc lang="en">Enqueue replication instance name: SID_INSTANCE_VIR-HOSTNAME</shortdesc>
<content type="string" default="${OCF_RESKEY_ERS_InstanceName_default}"/>
</parameter>
<parameter name="ERS_START_PROFILE" unique="1" required="0">
<longdesc lang="en">Only used in a Promotable resource configuration:
The parameter ERS_InstanceName must also be set in this configuration.
The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than.
</longdesc>
<shortdesc lang="en">Enqueue replication start profile name</shortdesc>
<content type="string" default="${OCF_RESKEY_ERS_START_PROFILE_default}"/>
</parameter>
<parameter name="PRE_START_USEREXIT" unique="0" required="0">
<longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets started.</longdesc>
<shortdesc lang="en">Path to a pre-start script</shortdesc>
<content type="string" default="${OCF_RESKEY_PRE_START_USEREXIT_default}" />
</parameter>
<parameter name="POST_START_USEREXIT" unique="0" required="0">
<longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got started.</longdesc>
<shortdesc lang="en">Path to a post-start script</shortdesc>
<content type="string" default="${OCF_RESKEY_POST_START_USEREXIT_default}" />
</parameter>
<parameter name="PRE_STOP_USEREXIT" unique="0" required="0">
<longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets stopped.</longdesc>
<shortdesc lang="en">Path to a pre-start script</shortdesc>
<content type="string" default="${OCF_RESKEY_PRE_STOP_USEREXIT_default}" />
</parameter>
<parameter name="POST_STOP_USEREXIT" unique="0" required="0">
<longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got stopped.</longdesc>
<shortdesc lang="en">Path to a post-start script</shortdesc>
<content type="string" default="${OCF_RESKEY_POST_STOP_USEREXIT_default}" />
</parameter>
<parameter name="IS_ERS" unique="0" required="0">
<longdesc lang="en">Only used for ASCS/ERS SAP Netweaver installations without implementing a promotable resource to
allow the ASCS to 'find' the ERS running on another cluster node after a resource failure. This parameter should be set
to true 'only' for the ERS instance for implementations following the SAP NetWeaver 7.40 HA certification (NW-HA-CLU-740). This includes also
systems for NetWeaver less than 7.40, if you like to implement the NW-HA-CLU-740 scenario.
</longdesc>
<shortdesc lang="en">Mark SAPInstance as ERS instance</shortdesc>
<content type="boolean" default="${OCF_RESKEY_IS_ERS_default}" />
</parameter>
<parameter name="MINIMAL_PROBE" unique="0" required="0">
<longdesc lang="en">Setting MINIMAL_PROBE=true forces the resource agent to do only minimal check during a probe. This is needed for special
file system setups. The MINIMAL_PROBE=true is only supported, if requested either by your vendor's support or if described in an architecture document
from your HA vendor.
</longdesc>
<shortdesc lang="en">Switch probe action from full to minimal check</shortdesc>
<content type="boolean" default="${OCF_RESKEY_MINIMAL_PROBE_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="180s" />
<action name="stop" timeout="240s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="60s" interval="120s" />
<action name="monitor" depth="0" timeout="60s" interval="121s" role="Unpromoted" />
<action name="monitor" depth="0" timeout="60s" interval="119s" role="Promoted" />
<action name="promote" timeout="320s" />
<action name="demote" timeout="320s" />
<action name="reload" timeout="320s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
<action name="methods" timeout="5s" />
</actions>
</resource-agent>
END
}
#
# methods: What methods/operations do we support?
#
sapinstance_methods() {
cat <<-EOF
start
stop
status
monitor
promote
demote
reload
notify
validate-all
methods
meta-data
usage
EOF
}
#
# is_clone : find out if we are configured to run in a Master/Slave configuration
#
is_clone() {
if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ] \
&& [ "$OCF_RESKEY_CRM_meta_clone_max" -gt 0 ]
then
if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] || \
[ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] || \
[ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] || \
[ "$OCF_RESKEY_CRM_meta_master_max" -ne 1 ]
then
ocf_log err "Clone options misconfigured. (expect: clone_max=2,clone_node_max=1,master_node_max=1,master_max=1)"
exit $OCF_ERR_CONFIGURED
fi
if [ -z "$OCF_RESKEY_ERS_InstanceName" ]
then
ocf_log err "In a Master/Slave configuration the ERS_InstanceName parameter is mandatory."
exit $OCF_ERR_ARGS
fi
else
return 0
fi
return 1
}
#
# abnormal_end : essential things are missing, but in the natur of a SAP installation - which can be very different
# from customer to customer - we cannot handle this always as an error
# This would be the case, if the software is installed on shared disks and not visible
# to all cluster nodes at all times.
#
abnormal_end() {
local err_msg=$1
ocf_is_probe && {
sapinstance_status
exit $?
}
ocf_log err $err_msg
if [ "$ACTION" = "stop" ]
then
cleanup_instance
exit $OCF_SUCCESS
fi
exit $OCF_ERR_CONFIGURED
}
#
# sapinstance_init : Define global variables with default values, if optional parameters are not set
#
#
sapinstance_init() {
local myInstanceName="$1"
SID=`echo "$myInstanceName" | cut -d_ -f1`
InstanceName=`echo "$myInstanceName" | cut -d_ -f2`
InstanceNr=`echo "$InstanceName" | sed 's/.*\([0-9][0-9]\)$/\1/'`
SAPVIRHOST=`echo "$myInstanceName" | cut -d_ -f3`
# make sure that we don't care the content of variable from previous run of sapinstance_init
DIR_EXECUTABLE=""
SYSTEMCTL="systemctl"
# optional OCF parameters, we try to guess which directories are correct
if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ]
then
if have_binary /usr/sap/$SID/$InstanceName/exe/sapstartsrv && have_binary /usr/sap/$SID/$InstanceName/exe/sapcontrol
then
DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe"
SAPSTARTSRV="/usr/sap/$SID/$InstanceName/exe/sapstartsrv"
SAPCONTROL="/usr/sap/$SID/$InstanceName/exe/sapcontrol"
elif have_binary /usr/sap/$SID/SYS/exe/run/sapstartsrv && have_binary /usr/sap/$SID/SYS/exe/run/sapcontrol
then
DIR_EXECUTABLE="/usr/sap/$SID/SYS/exe/run"
SAPSTARTSRV="/usr/sap/$SID/SYS/exe/run/sapstartsrv"
SAPCONTROL="/usr/sap/$SID/SYS/exe/run/sapcontrol"
fi
else
if have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" && have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol"
then
DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE"
SAPSTARTSRV="$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv"
SAPCONTROL="$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol"
fi
fi
sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm"
[ -z "$DIR_EXECUTABLE" ] && abnormal_end "Cannot find sapstartsrv and sapcontrol executable, please set DIR_EXECUTABLE parameter!"
if [ -z "$OCF_RESKEY_DIR_PROFILE" ]
then
DIR_PROFILE="/usr/sap/$SID/SYS/profile"
else
DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE"
fi
if [ "$myInstanceName" != "$OCF_RESKEY_InstanceName" ]
then
currentSTART_PROFILE=$OCF_RESKEY_ERS_START_PROFILE
else
currentSTART_PROFILE=$OCF_RESKEY_START_PROFILE
fi
if [ -z "$OCF_RESKEY_IS_ERS" ]; then
is_ers="no"
else
is_ers="$OCF_RESKEY_IS_ERS"
fi
if [ -z "$currentSTART_PROFILE" ]
then
if [ ! -r "$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" -a -r "$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" ]; then
SAPSTARTPROFILE="$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}"
else
SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}"
fi
else
SAPSTARTPROFILE="$currentSTART_PROFILE"
fi
if [ -z "$OCF_RESKEY_START_WAITTIME" ]
then
export OCF_RESKEY_START_WAITTIME="${OCF_RESKEY_START_WAITTIME_default}"
fi
if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ]
then
export OCF_RESKEY_MONITOR_SERVICES="${OCF_RESKEY_MONITOR_SERVICES_default}"
fi
# as root user we need the library path to the SAP kernel to be able to call sapcontrol
if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then
LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH
export LD_LIBRARY_PATH
fi
return $OCF_SUCCESS
}
#
# check_systemd_integration : Check, if SAP instance is controlled by systemd unit file SAP<SID>_<InstanceNr>.service
# rc == 0 : sap instance is controlled by the unit file (file at least exists)
# rc == 1 : sap instance is NOT controlled by the unit file (file does not exist)
#
check_systemd_integration() {
local systemd_unit_name="SAP${SID}_${InstanceNr}"
local rc=1
if which "$SYSTEMCTL" 1>/dev/null 2>/dev/null; then
if $SYSTEMCTL list-unit-files | \
awk '$1 == service { found=1 } END { if (! found) {exit 1}}' service="${systemd_unit_name}.service"; then
rc=0
else
rc=1
fi
fi
return "$rc"
}
#
# check_sapstartsrv : Before using sapcontrol we make sure that the sapstartsrv is running for the correct instance.
# We cannot use sapinit and the /usr/sap/sapservices file in case of an enquerep instance,
# because then we have two instances with the same instance number.
#
check_sapstartsrv() {
local restart=0
local runninginst=""
local chkrc=$OCF_SUCCESS
local output=""
# check for sapstartsrv/systemd integration
if check_systemd_integration; then
# do it the systemd way
local systemd_unit_name="SAP${SID}_${InstanceNr}"
if $SYSTEMCTL status "$systemd_unit_name" 1>/dev/null 2>/dev/null; then
ocf_log info "systemd service $systemd_unit_name is active"
else
ocf_log warn "systemd service $systemd_unit_name is not active, it will be started using systemd"
$SYSTEMCTL start "$systemd_unit_name" 1>/dev/null 2>/dev/null
# use start, because restart does also stop sap instance
fi
return 0
else # otherwise continue with old code...
if [ ! -S /tmp/.sapstream5${InstanceNr}13 ]; then
ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName (no UDS), it will be started now"
restart=1
else
output=`$SAPCONTROL -nr $InstanceNr -function ParameterValue INSTANCE_NAME -format script`
if [ $? -eq 0 ]
then
runninginst=`echo "$output" | grep '^0 : ' | cut -d' ' -f3`
if [ "$runninginst" != "$InstanceName" ]
then
ocf_log warn "sapstartsrv is running for instance $runninginst, that service will be killed"
restart=1
else
output=`$SAPCONTROL -nr $InstanceNr -function AccessCheck Start`
if [ $? -ne 0 ]; then
ocf_log warn "FAILED : sapcontrol -nr $InstanceNr -function AccessCheck Start (`ls -ld1 /tmp/.sapstream5${InstanceNr}13`)"
ocf_log warn "sapstartsrv will be restarted to try to solve this situation, otherwise please check sapstsartsrv setup (SAP Note 927637)"
restart=1
fi
fi
else
ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName, it will be started now"
restart=1
fi
fi
if [ -z "$runninginst" ]; then runninginst=$InstanceName; fi
if [ $restart -eq 1 ]
then
if [ -d /usr/sap/$SID/SYS/profile/ ]
then
DIR_PROFILE="/usr/sap/$SID/SYS/profile"
else
abnormal_end "Expected /usr/sap/$SID/SYS/profile/ to be a directory, please set DIR_PROFILE parameter!"
fi
[ ! -r $SAPSTARTPROFILE ] && abnormal_end "Expected $SAPSTARTPROFILE to be the instance START profile, please set START_PROFILE parameter!"
pkill -9 -f "sapstartsrv.*$runninginst"
# removing the unix domain socket files as they might have wrong permissions
# or ownership - they will be recreated by sapstartsrv during next start
rm -f /tmp/.sapstream5${InstanceNr}13
rm -f /tmp/.sapstream5${InstanceNr}14
$SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm
# now make sure the daemon has been started and is able to respond
local srvrc=1
while [ $srvrc -eq 1 -a `pgrep -f "sapstartsrv.*$runninginst" | wc -l` -gt 0 ]
do
sleep 1
$SAPCONTROL -nr $InstanceNr -function GetProcessList > /dev/null 2>&1
srvrc=$?
done
if [ $srvrc -ne 1 ]
then
ocf_log info "sapstartsrv for instance $SID-$InstanceName was restarted !"
chkrc=$OCF_SUCCESS
else
ocf_log error "sapstartsrv for instance $SID-$InstanceName could not be started!"
chkrc=$OCF_ERR_GENERIC
ocf_is_probe && chkrc=$OCF_NOT_RUNNING
fi
fi
return $chkrc
fi
}
#
# sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems.
# This specialties do not allow a totally generic SAP cluster resource agent.
# Someone should write a resource agent for each additional process you need, if it
# is required to monitor that process within the cluster manager. To enable
# you to extent this resource agent without developing a new one, this user exit
# was introduced.
#
sapuserexit() {
local NAME="$1"
local VALUE="$2"
if [ -n "$VALUE" ]
then
if have_binary "$VALUE"
then
ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}"
"$VALUE" >/dev/null 2>&1
ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?"
else
ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable"
fi
fi
return 0
}
#
# cleanup_instance : remove resources (processes and shared memory) from a crashed instance)
#
cleanup_instance() {
pkill -9 -f -U $sidadm $InstanceName
ocf_log info "Terminated instance using 'pkill -9 -f -U $sidadm $InstanceName'"
# it is necessary to call cleanipc as user sidadm if the system has 'vmcj/enable = ON' set - otherwise SHM-segments in /dev/shm/SAP_ES2* cannot be removed
su - $sidadm -c "cleanipc $InstanceNr remove"
ocf_log info "Tried to remove shared memory resources using 'cleanipc $InstanceNr remove' as user $sidadm"
ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/kill.sap
ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/shutdown.sap
ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgcpid
ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgspid
return 0
}
#
# sapinstance_start : Start the SAP instance
#
sapinstance_start() {
sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT"
local rc=$OCF_NOT_RUNNING
local output=""
local loopcount=0
while [ $loopcount -lt 2 ]
do
loopcount=$(($loopcount + 1))
check_sapstartsrv
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
output=`$SAPCONTROL -nr $InstanceNr -function Start`
rc=$?
ocf_log info "Starting SAP Instance $SID-$InstanceName: $output"
fi
if [ $rc -ne 0 ]
then
ocf_log err "SAP Instance $SID-$InstanceName start failed."
return $OCF_ERR_GENERIC
fi
local startrc=1
while [ $startrc -gt 0 ]
do
local waittime_start=`date +%s`
output=`$SAPCONTROL -nr $InstanceNr -function WaitforStarted $OCF_RESKEY_START_WAITTIME 10`
startrc=$?
local waittime_stop=`date +%s`
if [ $startrc -ne 0 ]
then
if [ $(($waittime_stop - $waittime_start)) -ge $OCF_RESKEY_START_WAITTIME ]
then
sapinstance_monitor NOLOG
if [ $? -eq $OCF_SUCCESS ]
then
output="START_WAITTIME ($OCF_RESKEY_START_WAITTIME) has elapsed, but instance monitor returned SUCCESS. Instance considered running."
startrc=0; loopcount=2
fi
else
if [ $loopcount -eq 1 ] && ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER
then
ocf_log warn "SAP Instance $SID-$InstanceName start failed: $output"
ocf_log warn "Try to recover $SID-$InstanceName"
cleanup_instance
else
loopcount=2
fi
startrc=-1
fi
else
loopcount=2
fi
done
done
if [ $startrc -eq 0 ]
then
ocf_log info "SAP Instance $SID-$InstanceName started: $output"
rc=$OCF_SUCCESS
sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT"
if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 1 -l reboot; fi
else
ocf_log err "SAP Instance $SID-$InstanceName start failed: $output"
rc=$OCF_NOT_RUNNING
if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 0 -l reboot; fi
fi
return $rc
}
#
# sapinstance_recover: Try startup of failed instance by cleaning up resources
#
sapinstance_recover() {
cleanup_instance
sapinstance_start
return $?
}
#
# sapinstance_stop: Stop the SAP instance
#
sapinstance_stop() {
local output=""
local rc
sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT"
if [ "$OCF_RESKEY_SHUTDOWN_METHOD" = "KILL" ]
then
ocf_log info "Stopping SAP Instance $SID-$InstanceName with shutdown method KILL!"
cleanup_instance
return $OCF_SUCCESS
fi
check_sapstartsrv
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
output=`$SAPCONTROL -nr $InstanceNr -function Stop`
rc=$?
ocf_log info "Stopping SAP Instance $SID-$InstanceName: $output"
fi
if [ $rc -eq 0 ]
then
output=`$SAPCONTROL -nr $InstanceNr -function WaitforStopped 3600 1`
if [ $? -eq 0 ]
then
ocf_log info "SAP Instance $SID-$InstanceName stopped: $output"
rc=$OCF_SUCCESS
else
ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output"
rc=$OCF_ERR_GENERIC
fi
else
ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output"
rc=$OCF_ERR_GENERIC
fi
sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT"
if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 0 -l reboot; fi
return $rc
}
#
# sapinstance_monitor: Can the given SAP instance do anything useful?
#
sapinstance_monitor() {
local MONLOG=$1
local rc
if ocf_is_probe && ocf_is_true "$OCF_RESKEY_MINIMAL_PROBE"; then
# code for minimal probe: # grep for sapstartsrv and maybe also for sapstart
# TODO: Do we need to improve this minimal test?
if pgrep -f -l "sapstartsrv .*pf=.*${SID}_${InstanceName}_${SAPVIRHOST}"; then
rc="$OCF_SUCCESS"
elif pgrep -f -l "sapstart .*pf=.*${SID}_${InstanceName}_${SAPVIRHOST}"; then
rc="$OCF_SUCCESS"
else
rc="$OCF_NOT_RUNNING"
fi
else
# standard probe and monitoring code
check_sapstartsrv
rc=$?
fi
if [ $rc -eq $OCF_SUCCESS ]
then
local count=0
local SERVNO
local output
output=`$SAPCONTROL -nr $InstanceNr -function GetProcessList -format script`
# we have to parse the output, because the returncode doesn't tell anything about the instance status
for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u`
do
local COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3`
local SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3`
local STATE=0
local SEARCH
case $COLOR in
GREEN|YELLOW) STATE=$OCF_SUCCESS;;
*) STATE=$OCF_NOT_RUNNING;;
esac
SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'`
- if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ]
+ if [ `echo "$SERVICE" | $EGREP -c "$SEARCH"` -eq 1 ]
then
if [ $STATE -eq $OCF_NOT_RUNNING ]
then
[ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !"
rc=$STATE
fi
count=1
fi
done
if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ]
then
if ocf_is_probe
then
rc=$OCF_NOT_RUNNING
else
[ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!"
rc=$OCF_ERR_GENERIC
fi
fi
fi
return $rc
}
#
# sapinstance_status: Lightweight check of SAP instance only with OS tools
#
sapinstance_status() {
local pid
local pids
[ ! -f "/usr/sap/$SID/$InstanceName/work/kill.sap" ] && return $OCF_NOT_RUNNING
pids=$(awk '$3 ~ "^[0-9]+$" { print $3 }' /usr/sap/$SID/$InstanceName/work/kill.sap)
for pid in $pids
do
[ `pgrep -f -U $sidadm $InstanceName | grep -c $pid` -gt 0 ] && return $OCF_SUCCESS
done
return $OCF_NOT_RUNNING
}
#
# sapinstance_validate: Check the semantics of the input parameters
#
sapinstance_validate() {
local rc=$OCF_SUCCESS
if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ]
then
ocf_log err "Parsing instance profile name: '$SID' is not a valid system ID!"
rc=$OCF_ERR_ARGS
fi
if [ `echo "$InstanceName" | grep -c '^[A-Z].*[0-9][0-9]$'` -ne 1 ]
then
ocf_log err "Parsing instance profile name: '$InstanceName' is not a valid instance name!"
rc=$OCF_ERR_ARGS
fi
if [ `echo "$InstanceNr" | grep -c '^[0-9][0-9]$'` -ne 1 ]
then
ocf_log err "Parsing instance profile name: '$InstanceNr' is not a valid instance number!"
rc=$OCF_ERR_ARGS
fi
if [ `echo "$SAPVIRHOST" | grep -c '^[A-Za-z][A-Za-z0-9_-]*$'` -ne 1 ]
then
ocf_log err "Parsing instance profile name: '$SAPVIRHOST' is not a valid hostname!"
rc=$OCF_ERR_ARGS
fi
return $rc
}
#
# sapinstance_start_clone
#
sapinstance_start_clone() {
sapinstance_init $OCF_RESKEY_ERS_InstanceName
${HA_SBIN_DIR}/crm_master -v 50 -l reboot
sapinstance_start
return $?
}
#
# sapinstance_stop_clone
#
sapinstance_stop_clone() {
sapinstance_init $OCF_RESKEY_ERS_InstanceName
${HA_SBIN_DIR}/crm_master -v 0 -l reboot
sapinstance_stop
return $?
}
#
# sapinstance_monitor_clone
#
sapinstance_monitor_clone() {
# first check with the status function (OS tools) if there could be something like a SAP instance running
# as we do not know here, if we are in master or slave state we do not want to start our monitoring
# agents (sapstartsrv) on the wrong host
local rc
sapinstance_init $OCF_RESKEY_InstanceName
if sapinstance_status; then
if sapinstance_monitor; then
${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot
return $OCF_RUNNING_MASTER
fi
# by nature of the SAP enqueue server we have to make sure
# that we do a failover to the slave (enqueue replication server)
# in case the enqueue process has failed. We signal this to the
# cluster by setting our master preference to a lower value than the slave.
${HA_SBIN_DIR}/crm_master -v 10 -l reboot
return $OCF_FAILED_MASTER
fi
sapinstance_init $OCF_RESKEY_ERS_InstanceName
sapinstance_status && sapinstance_monitor
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot
fi
return $rc
}
#
# sapinstance_promote_clone: In a Master/Slave configuration get Master by starting the SCS instance and stopping the ERS instance
# The order is important here to behave correct from the application levels view
#
sapinstance_promote_clone() {
local rc
sapinstance_init $OCF_RESKEY_InstanceName
ocf_log info "Promoting $SID-$InstanceName to running Master."
sapinstance_start
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
sapinstance_init $OCF_RESKEY_ERS_InstanceName
sapinstance_stop
rc=$?
fi
return $rc
}
#
# sapinstance_demote_clone: In a Master/Slave configuration get Slave by stopping the SCS instance and starting the ERS instance
#
sapinstance_demote_clone() {
local rc
sapinstance_init $OCF_RESKEY_InstanceName
ocf_log info "Demoting $SID-$InstanceName to a slave."
sapinstance_stop
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
sapinstance_init $OCF_RESKEY_ERS_InstanceName
sapinstance_start
rc=$?
fi
return $rc
}
#
# sapinstance_notify: Handle master scoring - to make sure a slave gets the next master
#
sapinstance_notify() {
local n_type="$OCF_RESKEY_CRM_meta_notify_type"
local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
if [ "${n_type}_${n_op}" = "post_promote" ]; then
# After promotion of one master in the cluster, we make sure that all clones reset their master
# value back to 100. This is because a failed monitor on a master might have degree one clone
# instance to score 10.
${HA_SBIN_DIR}/crm_master -v 100 -l reboot
elif [ "${n_type}_${n_op}" = "pre_demote" ]; then
# if we are a slave and a demote event is announced, make sure we are highest on the list to become master
# that is, when a slave resource was started after the promote event of an already running master (e.g. node of slave was down)
# We also have to make sure to overrule the globally set resource_stickiness or any fail-count factors => INFINITY
local n_uname="$OCF_RESKEY_CRM_meta_notify_demote_uname"
if [ ${n_uname} != ${NODENAME} ]; then
${HA_SBIN_DIR}/crm_master -v INFINITY -l reboot
fi
fi
}
#
# 'main' starts here...
#
## GLOBALS
SID=""
sidadm=""
InstanceName=""
InstanceNr=""
SAPVIRHOST=""
DIR_EXECUTABLE=""
SAPSTARTSRV=""
SAPCONTROL=""
DIR_PROFILE=""
SAPSTARTPROFILE=""
CLONE=0
NODENAME=$(ocf_local_nodename)
if
( [ $# -ne 1 ] )
then
sapinstance_usage
exit $OCF_ERR_ARGS
fi
ACTION=$1
if [ "$ACTION" = "status" ]; then
ACTION=monitor
fi
# These operations don't require OCF instance parameters to be set
case "$ACTION" in
usage|methods) sapinstance_$ACTION
exit $OCF_SUCCESS;;
meta-data) sapinstance_meta_data
exit $OCF_SUCCESS;;
notify) sapinstance_notify
exit $OCF_SUCCESS;;
*);;
esac
if ! ocf_is_root
then
ocf_log err "$0 must be run as root"
exit $OCF_ERR_PERM
fi
# parameter check
if [ -z "$OCF_RESKEY_InstanceName" ]
then
ocf_log err "Please set OCF_RESKEY_InstanceName to the name to the SAP instance profile!"
exit $OCF_ERR_ARGS
fi
is_clone; CLONE=$?
if [ ${CLONE} -eq 1 ]
then
CLACT=_clone
else
if [ "$ACTION" = "promote" -o "$ACTION" = "demote" ]
then
ocf_log err "$ACTION called in a non master/slave environment"
exit $OCF_ERR_ARGS
fi
sapinstance_init $OCF_RESKEY_InstanceName
fi
# What kind of method was invoked?
case "$ACTION" in
start|stop|monitor|promote|demote) sapinstance_$ACTION$CLACT
exit $?;;
validate-all) sapinstance_validate
exit $?;;
reload )
ocf_log info "reloading SAPInstance parameters"
exit $OCF_SUCCESS;;
*) sapinstance_methods
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index 3905695ae..7db42bd12 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -1,1158 +1,1158 @@
#!/bin/sh
#
# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# Resource Agent for domains managed by the libvirt API.
# Requires a running libvirt daemon (libvirtd).
#
# (c) 2008-2010 Florian Haas, Dejan Muhamedagic,
# and Linux-HA contributors
#
# usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_config_default=""
OCF_RESKEY_migration_transport_default=""
OCF_RESKEY_migration_downtime_default=0
OCF_RESKEY_migration_speed_default=0
OCF_RESKEY_migration_network_suffix_default=""
OCF_RESKEY_force_stop_default=0
OCF_RESKEY_monitor_scripts_default=""
OCF_RESKEY_autoset_utilization_cpu_default="true"
OCF_RESKEY_autoset_utilization_host_memory_default="true"
OCF_RESKEY_autoset_utilization_hv_memory_default="true"
OCF_RESKEY_unset_utilization_cpu_default="false"
OCF_RESKEY_unset_utilization_host_memory_default="false"
OCF_RESKEY_unset_utilization_hv_memory_default="false"
OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 ))
OCF_RESKEY_CRM_meta_timeout_default=90000
OCF_RESKEY_save_config_on_stop_default=false
OCF_RESKEY_sync_config_on_stop_default=false
OCF_RESKEY_snapshot_default=""
OCF_RESKEY_backingfile_default=""
OCF_RESKEY_stateless_default="false"
OCF_RESKEY_copyindirs_default=""
OCF_RESKEY_shutdown_mode_default=""
OCF_RESKEY_start_resources_default="false"
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_migration_transport=${OCF_RESKEY_migration_transport_default}}
: ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}}
: ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}}
: ${OCF_RESKEY_migration_network_suffix=${OCF_RESKEY_migration_network_suffix_default}}
: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
: ${OCF_RESKEY_monitor_scripts=${OCF_RESKEY_monitor_scripts_default}}
: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}}
: ${OCF_RESKEY_autoset_utilization_host_memory=${OCF_RESKEY_autoset_utilization_host_memory_default}}
: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}}
: ${OCF_RESKEY_unset_utilization_cpu=${OCF_RESKEY_unset_utilization_cpu_default}}
: ${OCF_RESKEY_unset_utilization_host_memory=${OCF_RESKEY_unset_utilization_host_memory_default}}
: ${OCF_RESKEY_unset_utilization_hv_memory=${OCF_RESKEY_unset_utilization_hv_memory_default}}
: ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}}
: ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}}
: ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}}
: ${OCF_RESKEY_sync_config_on_stop=${OCF_RESKEY_sync_config_on_stop_default}}
: ${OCF_RESKEY_snapshot=${OCF_RESKEY_snapshot_default}}
: ${OCF_RESKEY_backingfile=${OCF_RESKEY_backingfile_default}}
: ${OCF_RESKEY_stateless=${OCF_RESKEY_stateless_default}}
: ${OCF_RESKEY_copyindirs=${OCF_RESKEY_copyindirs_default}}
: ${OCF_RESKEY_shutdown_mode=${OCF_RESKEY_shutdown_mode_default}}
: ${OCF_RESKEY_start_resources=${OCF_RESKEY_start_resources_default}}
if ocf_is_true ${OCF_RESKEY_sync_config_on_stop}; then
OCF_RESKEY_save_config_on_stop="true"
fi
#######################################################################
## I'd very much suggest to make this RA use bash,
## and then use magic $SECONDS.
## But for now:
NOW=$(date +%s)
usage() {
echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}"
}
VirtualDomain_meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="VirtualDomain" version="1.1">
<version>1.0</version>
<longdesc lang="en">
Resource agent for a virtual domain (a.k.a. domU, virtual machine,
virtual environment etc., depending on context) managed by libvirtd.
</longdesc>
<shortdesc lang="en">Manages virtual domains through the libvirt virtualization framework</shortdesc>
<parameters>
<parameter name="config" unique="1" required="1">
<longdesc lang="en">
Absolute path to the libvirt configuration file,
for this virtual domain.
</longdesc>
<shortdesc lang="en">Virtual domain configuration file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="hypervisor" unique="0" required="0">
<longdesc lang="en">
Hypervisor URI to connect to. See the libvirt documentation for
details on supported URI formats. The default is system dependent.
Determine the system's default uri by running 'virsh --quiet uri'.
</longdesc>
<shortdesc lang="en">Hypervisor URI</shortdesc>
<content type="string"/>
</parameter>
<parameter name="force_stop" unique="0" required="0">
<longdesc lang="en">
Always forcefully shut down ("destroy") the domain on stop. The default
behavior is to resort to a forceful shutdown only after a graceful
shutdown attempt has failed. You should only set this to true if
your virtual domain (or your virtualization backend) does not support
graceful shutdown.
</longdesc>
<shortdesc lang="en">Always force shutdown on stop</shortdesc>
<content type="boolean" default="${OCF_RESKEY_force_stop_default}" />
</parameter>
<parameter name="migration_transport" unique="0" required="0">
<longdesc lang="en">
Transport used to connect to the remote hypervisor while
migrating. Please refer to the libvirt documentation for details on
transports available. If this parameter is omitted, the resource will
use libvirt's default transport to connect to the remote hypervisor.
</longdesc>
<shortdesc lang="en">Remote hypervisor transport</shortdesc>
<content type="string" default="${OCF_RESKEY_migration_transport_default}" />
</parameter>
<parameter name="migration_user" unique="0" required="0">
<longdesc lang="en">
The username will be used in the remote libvirt remoteuri/migrateuri. No user will be
given (which means root) in the username if omitted
If remoteuri is set, migration_user will be ignored.
</longdesc>
<shortdesc lang="en">Remote username for the remoteuri</shortdesc>
<content type="string" />
</parameter>
<parameter name="migration_downtime" unique="0" required="0">
<longdesc lang="en">
Define max downtime during live migration in milliseconds
</longdesc>
<shortdesc lang="en">Live migration downtime</shortdesc>
<content type="integer" default="${OCF_RESKEY_migration_downtime_default}" />
</parameter>
<parameter name="migration_speed" unique="0" required="0">
<longdesc lang="en">
Define live migration speed per resource in MiB/s
</longdesc>
<shortdesc lang="en">Live migration speed</shortdesc>
<content type="integer" default="${OCF_RESKEY_migration_speed_default}" />
</parameter>
<parameter name="migration_network_suffix" unique="0" required="0">
<longdesc lang="en">
Use a dedicated migration network. The migration URI is composed by
adding this parameters value to the end of the node name. If the node
name happens to be an FQDN (as opposed to an unqualified host name),
insert the suffix immediately prior to the first period (.) in the FQDN.
At the moment Qemu/KVM and Xen migration via a dedicated network is supported.
Note: Be sure this composed host name is locally resolvable and the
associated IP is reachable through the favored network. This suffix will
be added to the remoteuri and migrateuri parameters.
See also the migrate_options parameter below.
</longdesc>
<shortdesc lang="en">Migration network host name suffix</shortdesc>
<content type="string" default="${OCF_RESKEY_migration_network_suffix_default}" />
</parameter>
<parameter name="migrateuri" unique="0" required="0">
<longdesc lang="en">
You can also specify here if the calculated migrate URI is unsuitable for your
environment.
If migrateuri is set then migration_network_suffix, migrateport and
--migrateuri in migrate_options are effectively ignored. Use "%n" as the
placeholder for the target node name.
Please refer to the libvirt documentation for details on guest
migration.
</longdesc>
<shortdesc lang="en">Custom migrateuri for migration state transfer</shortdesc>
<content type="string" />
</parameter>
<parameter name="migrate_options" unique="0" required="0">
<longdesc lang="en">
Extra virsh options for the guest live migration. You can also specify
here --migrateuri if the calculated migrate URI is unsuitable for your
environment. If --migrateuri is set then migration_network_suffix
and migrateport are effectively ignored. Use "%n" as the placeholder
for the target node name.
Please refer to the libvirt documentation for details on guest
migration.
</longdesc>
<shortdesc lang="en">live migrate options</shortdesc>
<content type="string" />
</parameter>
<parameter name="monitor_scripts" unique="0" required="0">
<longdesc lang="en">
To additionally monitor services within the virtual domain, add this
parameter with a list of scripts to monitor.
Note: when monitor scripts are used, the start and migrate_from operations
will complete only when all monitor scripts have completed successfully.
Be sure to set the timeout of these operations to accommodate this delay.
</longdesc>
<shortdesc lang="en">space-separated list of monitor scripts</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_scripts_default}" />
</parameter>
<parameter name="autoset_utilization_cpu" unique="0" required="0">
<longdesc lang="en">
If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it
into the CPU utilization of the resource when the monitor is executed.
</longdesc>
<shortdesc lang="en">Enable auto-setting the CPU utilization of the resource</shortdesc>
<content type="boolean" default="${OCF_RESKEY_autoset_utilization_cpu_default}" />
</parameter>
<parameter name="autoset_utilization_host_memory" unique="0" required="0">
<longdesc lang="en">
If set true, the agent will detect the number of *Max memory* from virsh, and put it
into the host_memory utilization of the resource when the monitor is executed.
</longdesc>
<shortdesc lang="en">Enable auto-setting the host_memory utilization of the resource</shortdesc>
<content type="boolean" default="${OCF_RESKEY_autoset_utilization_host_memory_default}" />
</parameter>
<parameter name="autoset_utilization_hv_memory" unique="0" required="0">
<longdesc lang="en">
If set true, the agent will detect the number of *Max memory* from virsh, and put it
into the hv_memory utilization of the resource when the monitor is executed.
</longdesc>
<shortdesc lang="en">Enable auto-setting the hv_memory utilization of the resource</shortdesc>
<content type="boolean" default="${OCF_RESKEY_autoset_utilization_hv_memory_default}" />
</parameter>
<parameter name="unset_utilization_cpu" unique="0" required="0">
<longdesc lang="en">
If set true then the agent will remove the cpu utilization resource when the monitor
is executed.
</longdesc>
<shortdesc lang="en">Enable auto-removing the CPU utilization of the resource</shortdesc>
<content type="boolean" default="${OCF_RESKEY_unset_utilization_cpu_default}" />
</parameter>
<parameter name="unset_utilization_host_memory" unique="0" required="0">
<longdesc lang="en">
If set true then the agent will remove the host_memory utilization resource when the monitor
is executed.
</longdesc>
<shortdesc lang="en">Enable auto-removing the host_memory utilization of the resource</shortdesc>
<content type="boolean" default="${OCF_RESKEY_unset_utilization_host_memory_default}" />
</parameter>
<parameter name="unset_utilization_hv_memory" unique="0" required="0">
<longdesc lang="en">
If set true then the agent will remove the hv_memory utilization resource when the monitor
is executed.
</longdesc>
<shortdesc lang="en">Enable auto-removing the hv_memory utilization of the resource</shortdesc>
<content type="boolean" default="${OCF_RESKEY_unset_utilization_hv_memory_default}" />
</parameter>
<parameter name="migrateport" unique="0" required="0">
<longdesc lang="en">
This port will be used in the qemu migrateuri. If unset, the port will be a random highport.
</longdesc>
<shortdesc lang="en">Port for migrateuri</shortdesc>
<content type="integer" />
</parameter>
<parameter name="remoteuri" unique="0" required="0">
<longdesc lang="en">
Use this URI as virsh connection URI to commuicate with a remote hypervisor.
If remoteuri is set then migration_user and migration_network_suffix are
effectively ignored. Use "%n" as the placeholder for the target node name.
Please refer to the libvirt documentation for details on guest
migration.
</longdesc>
<shortdesc lang="en">Custom remoteuri to communicate with a remote hypervisor</shortdesc>
<content type="string" />
</parameter>
<parameter name="save_config_on_stop" unique="0" required="0">
<longdesc lang="en">
Changes to a running VM's config are normally lost on stop.
This parameter instructs the RA to save the configuration back to the xml file provided in the "config" parameter.
</longdesc>
<shortdesc lang="en">Save running VM's config back to its config file</shortdesc>
<content type="boolean" />
</parameter>
<parameter name="sync_config_on_stop" unique="0" required="0">
<longdesc lang="en">
Setting this automatically enables save_config_on_stop.
When enabled this parameter instructs the RA to
call csync2 -x to synchronize the file to all nodes.
csync2 must be properly set up for this to work.
</longdesc>
<shortdesc lang="en">Save running VM's config back to its config file</shortdesc>
<content type="boolean" />
</parameter>
<parameter name="snapshot">
<longdesc lang="en">
Path to the snapshot directory where the virtual machine image will be stored. When this
parameter is set, the virtual machine's RAM state will be saved to a file in the snapshot
directory when stopped. If on start a state file is present for the domain, the domain
will be restored to the same state it was in right before it stopped last. This option
is incompatible with the 'force_stop' option.
</longdesc>
<shortdesc lang="en">
Restore state on start/stop
</shortdesc>
<content type="string" default="${OCF_RESKEY_snapshot_default}"/>
</parameter>
<parameter name="backingfile" unique="0" required="0">
<longdesc lang="en">
When the VM is used in Copy-On-Write mode, this is the backing file to use (with its full path).
The VMs image will be created based on this backing file.
This backing file will never be changed during the life of the VM.
</longdesc>
<shortdesc lang="en">If the VM is wanted to work with Copy-On-Write mode, this is the backing file to use (with its full path)</shortdesc>
<content type="string" default="${OCF_RESKEY_backingfile_default}" />
</parameter>
<parameter name="stateless" unique="0" required="0">
<longdesc lang="en">
If set to true and backingfile is defined, the start of the VM will systematically create a new qcow2 based on
the backing file, therefore the VM will always be stateless. If set to false, the start of the VM will use the
COW (&lt;vmname&gt;.qcow2) file if it exists, otherwise the first start will create a new qcow2 based on the backing
file given as backingfile.
</longdesc>
<shortdesc lang="en">If set to true, the (&lt;vmname&gt;.qcow2) file will be re-created at each start, based on the backing file (if defined)</shortdesc>
<content type="boolean" default="${OCF_RESKEY_stateless_default}" />
</parameter>
<parameter name="copyindirs" unique="0" required="0">
<longdesc lang="en">
List of directories for the virt-copy-in before booting the VM. Used only in stateless mode.
</longdesc>
<shortdesc lang="en">List of directories for the virt-copy-in before booting the VM stateless mode.</shortdesc>
<content type="string" default="${OCF_RESKEY_copyindirs_default}" />
</parameter>
<parameter name="shutdown_mode">
<longdesc lang="en">
virsh shutdown method to use. Please verify that it is supported by your virsh toolsed with 'virsh help shutdown'
When this parameter is set --mode shutdown_mode is passed as an additional argument to the 'virsh shutdown' command.
One can use this option in case default acpi method does not work. Verify that this mode is supported
by your VM. By default --mode is not passed.
</longdesc>
<shortdesc lang="en">
Instruct virsh to use specific shutdown mode
</shortdesc>
<content type="string" default="${OCF_RESKEY_shutdown_mode_default}"/>
</parameter>
<parameter name="start_resources">
<longdesc lang="en">
Start the virtual storage pools and networks used by the virtual machine before starting it or before live migrating it.
</longdesc>
<shortdesc lang="en">
Ensure the needed virtual storage pools and networks are started
</shortdesc>
<content type="boolean" default="${OCF_RESKEY_start_resources_default}"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="90s" />
<action name="stop" timeout="90s" />
<action name="status" depth="0" timeout="30s" interval="10s" />
<action name="monitor" depth="0" timeout="30s" interval="10s" />
<action name="migrate_from" timeout="60s" />
<action name="migrate_to" timeout="120s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5s" />
</actions>
</resource-agent>
EOF
}
set_util_attr() {
local attr=$1 val=$2
local cval outp
cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null)
if [ $? -ne 0 ] && [ -z "$cval" ]; then
crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1
if [ $? -eq 0 ]; then
ocf_log debug "Unable to set utilization attribute, cib is not available"
return
fi
fi
if [ "$cval" != "$val" ]; then
outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) ||
ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp"
fi
}
unset_util_attr() {
local attr=$1
local cval outp
outp=$(crm_resource --resource=$OCF_RESOURCE_INSTANCE --utilization --delete-parameter=$attr 2>&1) ||
ocf_log warn "crm_resource failed to unset utilization attribute $attr: $outp"
}
update_utilization() {
local dom_cpu dom_mem
if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then
dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}')
test -n "$dom_cpu" && set_util_attr cpu $dom_cpu
elif ocf_is_true "$OCF_RESKEY_unset_utilization_cpu"; then
unset_util_attr cpu
fi
if ocf_is_true "$OCF_RESKEY_autoset_utilization_host_memory"; then
dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}')
test -n "$dom_mem" && set_util_attr host_memory "$dom_mem"
elif ocf_is_true "$OCF_RESKEY_unset_utilization_host_memory"; then
unset_util_attr host_memory
fi
if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then
dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}')
test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem"
elif ocf_is_true "$OCF_RESKEY_unset_utilization_hv_memory"; then
unset_util_attr hv_memory
fi
}
get_emulator()
{
local emulator=""
emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/^.*<emulator>\(.*\)<\/emulator>.*$/\1/p')
if [ -z "$emulator" ] && [ -e "$EMULATOR_STATE" ]; then
emulator=$(cat $EMULATOR_STATE)
fi
if [ -z "$emulator" ]; then
emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*<emulator>\(.*\)<\/emulator>.*$/\1/p')
fi
if [ -n "$emulator" ]; then
basename $emulator
fi
}
update_emulator_cache()
{
local emulator
emulator=$(get_emulator)
if [ -n "$emulator" ]; then
echo $emulator > $EMULATOR_STATE
fi
}
# attempt to check domain status outside of libvirt using the emulator process
pid_status()
{
local rc=$OCF_ERR_GENERIC
local emulator=$(get_emulator)
# An emulator is not required, so only report message in debug mode
local loglevel="debug"
if ocf_is_probe; then
loglevel="notice"
fi
case "$emulator" in
qemu-kvm|qemu-dm|qemu-system-*)
rc=$OCF_NOT_RUNNING
ps awx | grep -E "[q]emu-(kvm|dm|system).*-name ($DOMAIN_NAME|[^ ]*guest=$DOMAIN_NAME(,[^ ]*)?) " > /dev/null 2>&1
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
;;
libvirt_lxc)
rc=$OCF_NOT_RUNNING
ps awx | grep -E "[l]ibvirt_lxc.*-name ($DOMAIN_NAME|[^ ]*guest=$DOMAIN_NAME(,[^ ]*)?) " > /dev/null 2>&1
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
;;
# This can be expanded to check for additional emulators
*)
# We may be running xen with PV domains, they don't
# have an emulator set. try xl list or xen-lists
if have_binary xl; then
rc=$OCF_NOT_RUNNING
xl list $DOMAIN_NAME >/dev/null 2>&1
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
elif have_binary xen-list; then
rc=$OCF_NOT_RUNNING
xen-list $DOMAIN_NAME 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
else
ocf_log $loglevel "Unable to determine emulator for $DOMAIN_NAME"
fi
;;
esac
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log debug "Virtual domain $DOMAIN_NAME is currently running."
elif [ $rc -eq $OCF_NOT_RUNNING ]; then
ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running."
fi
return $rc
}
VirtualDomain_status() {
local try=0
rc=$OCF_ERR_GENERIC
status="no state"
while [ "$status" = "no state" ]; do
try=$(($try + 1 ))
status=$(LANG=C virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z')
case "$status" in
*"error:"*"domain not found"|*"error:"*"failed to get domain"*|"shut off")
# shut off: domain is defined, but not started, will not happen if
# domain is created but not defined
# "Domain not found" or "failed to get domain": domain is not defined
# and thus not started
ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)"
rc=$OCF_NOT_RUNNING
;;
running|paused|idle|blocked|"in shutdown")
# running: domain is currently actively consuming cycles
# paused: domain is paused (suspended)
# idle: domain is running but idle
# blocked: synonym for idle used by legacy Xen versions
# in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed.
ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status."
rc=$OCF_SUCCESS
;;
""|*"failed to "*"connect to the hypervisor"*|"no state")
# Empty string may be returned when virsh does not
# receive a reply from libvirtd.
# "no state" may occur when the domain is currently
# being migrated (on the migration target only), or
# whenever virsh can't reliably obtain the domain
# state.
status="no state"
if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then
# During the stop operation, we want to bail out
# quickly, so as to be able to force-stop (destroy)
# the domain if necessary.
ocf_exit_reason "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out."
return $OCF_ERR_GENERIC;
elif [ "$__OCF_ACTION" = "monitor" ]; then
pid_status
rc=$?
if [ $rc -ne $OCF_ERR_GENERIC ]; then
# we've successfully determined the domains status outside of libvirt
return $rc
fi
else
# During all other actions, we just wait and try
# again, relying on the CRM/LRM to time us out if
# this takes too long.
ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying."
fi
sleep 1
;;
*)
# any other output is unexpected.
ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!"
sleep 1
;;
esac
done
return $rc
}
# virsh undefine removes configuration files if they are in
# directories which are managed by libvirt. such directories
# include also subdirectories of /etc (for instance
# /etc/libvirt/*) which may be surprising. VirtualDomain didn't
# include the undefine call before, hence this wasn't an issue
# before.
#
# There seems to be no way to find out which directories are
# managed by libvirt.
#
verify_undefined() {
local tmpf
if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME"
then
tmpf=$(mktemp -t vmcfgsave.XXXXXX)
if [ ! -r "$tmpf" ]; then
ocf_log warn "unable to create temp file, disk full?"
# we must undefine the domain
virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
else
cp -p $OCF_RESKEY_config $tmpf
virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
[ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config
rm -f $tmpf
fi
fi
}
start_resources() {
local virsh_opts="--connect=$1 --quiet"
local pool_state net_state
for pool in `sed -n "s/^.*pool=['\"]\([^'\"]\+\)['\"].*\$/\1/gp" ${OCF_RESKEY_config} | sort | uniq`; do
pool_state=`LANG=C virsh ${virsh_opts} pool-info ${pool} | sed -n 's/^State: \+\(.*\)$/\1/gp'`
if [ "$pool_state" != "running" ]; then
virsh ${virsh_opts} pool-start $pool
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start required virtual storage pool ${pool}."
return $OCF_ERR_GENERIC
fi
else
virsh ${virsh_opts} pool-refresh $pool
fi
done
for net in `sed -n "s/^.*network=['\"]\([^'\"]\+\)['\"].*\$/\1/gp" ${OCF_RESKEY_config} | sort | uniq`; do
net_state=`LANG=C virsh ${virsh_opts} net-info ${net} | sed -n 's/^Active: \+\(.*\)$/\1/gp'`
if [ "$net_state" != "yes" ]; then
virsh ${virsh_opts} net-start $net
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start required virtual network ${net}."
return $OCF_ERR_GENERIC
fi
fi
done
return $OCF_SUCCESS
}
VirtualDomain_start() {
local snapshotimage
if VirtualDomain_status; then
ocf_log info "Virtual domain $DOMAIN_NAME already running."
return $OCF_SUCCESS
fi
# systemd drop-in to stop domain before libvirtd terminates services
# during shutdown/reboot
if systemd_is_running ; then
systemd_drop_in "99-VirtualDomain-libvirt" "After" "libvirtd.service"
systemd_drop_in "99-VirtualDomain-machines" "Wants" "virt-guest-shutdown.target"
systemctl start virt-guest-shutdown.target
fi
snapshotimage="$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state"
if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then
virsh restore $snapshotimage
if [ $? -eq 0 ]; then
rm -f $snapshotimage
return $OCF_SUCCESS
fi
ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory."
return $OCF_ERR_GENERIC
fi
# Make sure domain is undefined before creating.
# The 'create' command guarantees that the domain will be
# undefined on shutdown, but requires the domain to be undefined.
# if a user defines the domain
# outside of this agent, we have to ensure that the domain
# is restored to an 'undefined' state before creating.
verify_undefined
if ocf_is_true "${OCF_RESKEY_start_resources}"; then
start_resources ${OCF_RESKEY_hypervisor}
rc=$?
if [ $rc -eq $OCF_ERR_GENERIC ]; then
return $rc
fi
fi
if [ -z "${OCF_RESKEY_backingfile}" ]; then
virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config}
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}."
return $OCF_ERR_GENERIC
fi
else
if ocf_is_true "${OCF_RESKEY_stateless}" || [ ! -s "${OCF_RESKEY_config%%.*}.qcow2" ]; then
# Create the Stateless image
dirconfig=`dirname ${OCF_RESKEY_config}`
qemu-img create -f qcow2 -b ${OCF_RESKEY_backingfile} ${OCF_RESKEY_config%%.*}.qcow2
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed qemu-img create ${DOMAIN_NAME} with backing file ${OCF_RESKEY_backingfile}."
return $OCF_ERR_GENERIC
fi
virsh define ${OCF_RESKEY_config}
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}."
return $OCF_ERR_GENERIC
fi
if [ -n "${OCF_RESKEY_copyindirs}" ]; then
# Inject copyindirs directories and files
virt-copy-in -d ${DOMAIN_NAME} ${OCF_RESKEY_copyindirs} /
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed on virt-copy-in command ${DOMAIN_NAME}."
return $OCF_ERR_GENERIC
fi
fi
else
virsh define ${OCF_RESKEY_config}
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}."
return $OCF_ERR_GENERIC
fi
fi
virsh $VIRSH_OPTIONS start ${DOMAIN_NAME}
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}."
return $OCF_ERR_GENERIC
fi
fi
while ! VirtualDomain_monitor; do
sleep 1
done
return $OCF_SUCCESS
}
force_stop()
{
local out ex translate
local status=0
ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}."
out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1)
ex=$?
translate=$(echo $out|tr 'A-Z' 'a-z')
echo >&2 "$translate"
case $ex$translate in
*"error:"*"domain is not running"*|*"error:"*"domain not found"*|\
*"error:"*"failed to get domain"*)
: ;; # unexpected path to the intended outcome, all is well
[!0]*)
ocf_exit_reason "forced stop failed"
return $OCF_ERR_GENERIC ;;
0*)
while [ $status != $OCF_NOT_RUNNING ]; do
VirtualDomain_status
status=$?
done ;;
esac
return $OCF_SUCCESS
}
sync_config(){
ocf_log info "Syncing $DOMAIN_NAME config file with csync2 -x ${OCF_RESKEY_config}"
if ! csync2 -x ${OCF_RESKEY_config}; then
ocf_log warn "Syncing ${OCF_RESKEY_config} failed.";
fi
}
save_config(){
CFGTMP=$(mktemp -t vmcfgsave.XXX)
virsh $VIRSH_OPTIONS dumpxml --inactive --security-info ${DOMAIN_NAME} > ${CFGTMP}
if [ -s ${CFGTMP} ]; then
if ! cmp -s ${CFGTMP} ${OCF_RESKEY_config}; then
if virt-xml-validate ${CFGTMP} domain 2>/dev/null ; then
ocf_log info "Saving domain $DOMAIN_NAME to ${OCF_RESKEY_config}. Please make sure it's present on all nodes or sync_config_on_stop is on."
if cat ${CFGTMP} > ${OCF_RESKEY_config} ; then
ocf_log info "Saved $DOMAIN_NAME domain's configuration to ${OCF_RESKEY_config}."
if ocf_is_true "$OCF_RESKEY_sync_config_on_stop"; then
sync_config
fi
else
ocf_log warn "Moving ${CFGTMP} to ${OCF_RESKEY_config} failed."
fi
else
ocf_log warn "Domain $DOMAIN_NAME config failed to validate after dump. Skipping config update."
fi
fi
else
ocf_log warn "Domain $DOMAIN_NAME config has 0 size. Skipping config update."
fi
rm -f ${CFGTMP}
}
VirtualDomain_stop() {
local i
local status
local shutdown_timeout
local needshutdown=1
VirtualDomain_status
status=$?
case $status in
$OCF_SUCCESS)
if ocf_is_true $OCF_RESKEY_force_stop; then
# if force stop, don't bother attempting graceful shutdown.
force_stop
return $?
fi
ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}."
if [ -n "$OCF_RESKEY_snapshot" ]; then
virsh save $DOMAIN_NAME "$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state"
if [ $? -eq 0 ]; then
needshutdown=0
else
ocf_log error "Failed to save snapshot state of ${DOMAIN_NAME} on stop"
fi
fi
# save config if needed
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
# issue the shutdown if save state didn't shutdown for us
if [ $needshutdown -eq 1 ]; then
# Issue a graceful shutdown request
if [ -n "${OCF_RESKEY_CRM_shutdown_mode}" ]; then
shutdown_opts="--mode ${OCF_RESKEY_CRM_shutdown_mode}"
fi
virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} $shutdown_opts
fi
# The "shutdown_timeout" we use here is the operation
# timeout specified in the CIB, minus 5 seconds
shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 ))
# Loop on status until we reach $shutdown_timeout
while [ $NOW -lt $shutdown_timeout ]; do
VirtualDomain_status
status=$?
case $status in
$OCF_NOT_RUNNING)
# This was a graceful shutdown.
return $OCF_SUCCESS
;;
$OCF_SUCCESS)
# Domain is still running, keep
# waiting (until shutdown_timeout
# expires)
sleep 1
;;
*)
# Something went wrong. Bail out and
# resort to forced stop (destroy).
break;
esac
NOW=$(date +%s)
done
;;
$OCF_NOT_RUNNING)
ocf_log info "Domain $DOMAIN_NAME already stopped."
return $OCF_SUCCESS
esac
# OK. Now if the above graceful shutdown hasn't worked, kill
# off the domain with destroy. If that too does not work,
# have the LRM time us out.
force_stop
}
mk_migrateuri() {
local target_node
local migrate_target
local hypervisor
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
# A typical migration URI via a special migration network looks
# like "tcp://bar-mig:49152". The port would be randomly chosen
# by libvirt from the range 49152-49215 if omitted, at least since
# version 0.7.4 ...
if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then
hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}"
# Hostname might be a FQDN
migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},")
case $hypervisor in
qemu)
# For quiet ancient libvirt versions a migration port is needed
# and the URI must not contain the "//". Newer versions can handle
# the "bad" URI.
echo "tcp:${migrate_target}:${OCF_RESKEY_migrateport}"
;;
xen)
echo "${migrate_target}"
;;
*)
ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}."
;;
esac
fi
}
VirtualDomain_migrate_to() {
local rc
local target_node
local remoteuri
local transport_suffix
local migrateuri
local migrate_opts
local migrate_pid
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
if VirtualDomain_status; then
# Find out the remote hypervisor to connect to. That is, turn
# something like "qemu://foo:9999/system" into
# "qemu+tcp://bar:9999/system"
if [ -n "${OCF_RESKEY_remoteuri}" ]; then
remoteuri=`echo "${OCF_RESKEY_remoteuri}" |
sed "s/%n/$target_node/g"`
else
if [ -n "${OCF_RESKEY_migration_transport}" ]; then
transport_suffix="+${OCF_RESKEY_migration_transport}"
fi
# append user defined suffix if virsh target should differ from cluster node name
if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then
# Hostname might be a FQDN
target_node=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},")
fi
# a remote user has been defined to connect to target_node
if echo ${OCF_RESKEY_migration_user} | grep -q "^[a-z][-a-z0-9]*$" ; then
target_node="${OCF_RESKEY_migration_user}@${target_node}"
fi
# Scared of that sed expression? So am I. :-)
remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,")
fi
# User defined migrateuri or do we make one?
migrate_opts="$OCF_RESKEY_migrate_options"
# migration_uri is directly set
if [ -n "${OCF_RESKEY_migrateuri}" ]; then
migrateuri=`echo "${OCF_RESKEY_migrateuri}" |
sed "s/%n/$target_node/g"`
# extract migrationuri from options
- elif echo "$migrate_opts" | fgrep -qs -- "--migrateuri="; then
+ elif echo "$migrate_opts" | $FGREP -qs -- "--migrateuri="; then
migrateuri=`echo "$migrate_opts" |
sed "s/.*--migrateuri=\([^ ]*\).*/\1/;s/%n/$target_node/g"`
# auto generate
else
migrateuri=`mk_migrateuri`
fi
# remove --migrateuri from migration_opts
migrate_opts=`echo "$migrate_opts" |
sed "s/\(.*\)--migrateuri=[^ ]*\(.*\)/\1\2/"`
# save config if needed
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
if ocf_is_true "${OCF_RESKEY_start_resources}"; then
start_resources $remoteuri
rc=$?
if [ $rc -eq $OCF_ERR_GENERIC ]; then
return $rc
fi
fi
# Live migration speed limit
if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then
ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})."
virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed}
fi
# OK, we know where to connect to. Now do the actual migration.
ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)."
virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri &
migrate_pid=${!}
# Live migration downtime interval
# Note: You can set downtime only while live migration is in progress
if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then
sleep 2
ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})."
virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime}
fi
wait ${migrate_pid}
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc"
return $OCF_ERR_GENERIC
else
ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded."
return $OCF_SUCCESS
fi
else
ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!"
return $OCF_ERR_GENERIC
fi
}
VirtualDomain_migrate_from() {
# systemd drop-in to stop domain before libvirtd terminates services
# during shutdown/reboot
if systemd_is_running ; then
systemd_drop_in "99-VirtualDomain-libvirt" "After" "libvirtd.service"
systemd_drop_in "99-VirtualDomain-machines" "Wants" "virt-guest-shutdown.target"
systemctl start virt-guest-shutdown.target
fi
while ! VirtualDomain_monitor; do
sleep 1
done
ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded."
# save config if needed
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
return $OCF_SUCCESS
}
VirtualDomain_monitor() {
# First, check the domain status. If that returns anything other
# than $OCF_SUCCESS, something is definitely wrong.
VirtualDomain_status
rc=$?
if [ ${rc} -eq ${OCF_SUCCESS} ]; then
# OK, the generic status check turned out fine. Now, if we
# have monitor scripts defined, run them one after another.
for script in ${OCF_RESKEY_monitor_scripts}; do
script_output="$($script 2>&1)"
script_rc=$?
if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then
# A monitor script returned a non-success exit
# code. Stop iterating over the list of scripts, log a
# warning message, and propagate $OCF_ERR_GENERIC.
ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}"
rc=$OCF_ERR_GENERIC
break
else
ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}"
fi
done
fi
update_emulator_cache
update_utilization
# Save configuration on monitor as well, so we will have a better chance of
# having fresh and up to date config files on all nodes.
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
return ${rc}
}
VirtualDomain_validate_all() {
if ocf_is_true $OCF_RESKEY_force_stop && [ -n "$OCF_RESKEY_snapshot" ]; then
ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together."
return $OCF_ERR_CONFIGURED
fi
# check if we can read the config file (otherwise we're unable to
# deduce $DOMAIN_NAME from it, see below)
if [ ! -r $OCF_RESKEY_config ]; then
if ocf_is_probe; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe."
elif [ "$__OCF_ACTION" = "stop" ]; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped."
else
ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or not readable."
fi
return $OCF_ERR_INSTALLED
fi
if [ -z $DOMAIN_NAME ]; then
ocf_exit_reason "Unable to determine domain name."
return $OCF_ERR_INSTALLED
fi
# Check if csync2 is available when config tells us we might need it.
if ocf_is_true $OCF_RESKEY_sync_config_on_stop; then
check_binary csync2
fi
# Check if migration_speed is a decimal value
if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then
ocf_exit_reason "migration_speed has to be a decimal value"
return $OCF_ERR_CONFIGURED
fi
# Check if migration_downtime is a decimal value
if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then
ocf_exit_reason "migration_downtime has to be a decimal value"
return $OCF_ERR_CONFIGURED
fi
if ocf_is_true "${OCF_RESKEY_stateless}" && [ -z "${OCF_RESKEY_backingfile}" ]; then
ocf_exit_reason "Stateless functionality can't be achieved without a backing file."
return $OCF_ERR_CONFIGURED
fi
}
VirtualDomain_getconfig() {
# Grab the virsh uri default, but only if hypervisor isn't set
: ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)}
# Set options to be passed to virsh:
VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet"
# Retrieve the domain name from the xml file.
- DOMAIN_NAME=`egrep '[[:space:]]*<name>.*</name>[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*<name>\(.*\)<\/name>[[:space:]]*$/\1/'`
+ DOMAIN_NAME=`$EGREP '[[:space:]]*<name>.*</name>[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*<name>\(.*\)<\/name>[[:space:]]*$/\1/'`
EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state"
}
OCF_REQUIRED_PARAMS="config"
OCF_REQUIRED_BINARIES="virsh sed"
ocf_rarun $*
diff --git a/heartbeat/WAS b/heartbeat/WAS
index 15b56e99e..44aa83e20 100755
--- a/heartbeat/WAS
+++ b/heartbeat/WAS
@@ -1,572 +1,572 @@
#!/bin/sh
#
#
# WAS
#
# Description: Manages a Websphere Application Server as an HA resource
#
#
# Author: Alan Robertson
# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 - 2005 International Business Machines, Inc.
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 WAS::/opt/WebSphere/ApplicationServer/config/server-cfg.xml
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_config
# (WAS-configuration file, used for the single server edition of WAS)
# OCF_RESKEY_port
# (WAS-<snoop>-port-number, used for the advanced edition of WAS)
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
WASDIR=/opt/WebSphere/AppServer
if
[ ! -d $WASDIR ]
then
WASDIR=/usr/WebSphere/AppServer
fi
STARTTIME=300 # 5 minutes
DEFAULT_WASPORTS="9080"
#
#
WASBIN=$WASDIR/bin
DEFAULT=$WASDIR/config/server-cfg.xml
#
# Print usage message
#
usage() {
methods=`WAS_methods | grep -v methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-END
usage: $0 ($methods)
For the single server edition of WAS, you have to set the following
enviroment virable:
OCF_RESKEY_config
(WAS-configuration file)
For the advanced edition of WAS, you have to set the following
enviroment virable:
OCF_RESKEY_port
(WAS-<snoop>-port-number)
$0 manages a Websphere Application Server (WAS) as an HA resource
The 'start' operation starts WAS.
The 'stop' operation stops WAS.
The 'status' operation reports whether WAS is running
The 'monitor' operation reports whether the WAS seems to be working
(httpd also needs to be working for this case)
The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_config or OCF_RESKEY_port) is valid
The 'methods' operation reports on the methods $0 supports
This is known to work with the Single Server edition of Websphere,
and is believed to work with the Advanced edition too.
Since the Advanced Edition has no configuration file (it's in a the
database) you need to give a port number instead of a
configuration file for this config parameter.
The default configuration file for the single server edition is:
$DEFAULT
The default snoop-port for the advanced edition is: $DEFAULT_WASPORTS
The start and stop operations must be run as root.
The status operation will report a pid of "-" for the
WAS root process using unless it is run as root.
If you don't have xmllint on your system, parsing of WAS
configuration files is very primitive.
In this case, the port specification we need from the XML
config file has to be on the same line as the
first part of the <transports/> tag.
We run servlet/snoop on the first transport port listed in
the config file for the "monitor" operation.
END
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="WAS" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Resource script for WAS. It manages a Websphere Application Server (WAS) as
an HA resource.
</longdesc>
<shortdesc lang="en">Manages a WebSphere Application Server instance</shortdesc>
<parameters>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
The WAS-configuration file.
</longdesc>
<shortdesc lang="en">configration file</shortdesc>
<content type="string" default="$DEFAULT" />
</parameter>
<parameter name="port" unique="0">
<longdesc lang="en">
The WAS-(snoop)-port-number.
</longdesc>
<shortdesc lang="en">port</shortdesc>
<content type="integer" default="$DEFAULT_WASPORTS" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="300s" />
<action name="stop" timeout="300s" />
<action name="status" depth="0" timeout="30s" interval="10s" />
<action name="monitor" depth="0" timeout="30s" interval="10s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
<action name="methods" timeout="5s" />
</actions>
</resource-agent>
END
}
#
# Reformat the XML document in a sort of canonical form
# if we can. If we don't have xmllint, we just cat it out
# and hope for the best ;-)
#
xmlcat() {
if
[ "X$XMLcat" = X ]
then
XMLcat=`which xmllint 2>/dev/null`
if
[ "X${XMLcat}" = X -o ! -x "${XMLcat}" ]
then
XMLcat=cat
else
XMLcat="$XMLcat --recover --format"
fi
fi
for j in "$@"
do
${XMLcat} "$j"
done
}
#
#This is a bit skanky, but it works anyway...
#
#<transports xmi:type="applicationserver:HTTPTransport" xmi:id="HttpTransport_1" hostname="*" port="9080"/>
#<transports xmi:type="applicationserver:HTTPTransport" xmi:id="HttpTransport_2" hostname="*" port="9443" sslEnabled="true"/>
#<transports xmi:type="applicationserver:HTTPTransport" xmi:id="HttpTransport_3" hostname="*" port="9090" external="false"/>
#
# It's not really skanky if we can find xmllint on the system, because it
# reformats tags so they are all on one line, which is all we we need...
#
#
# Get the numbers of the ports WAS should be listening on...
#
# If we don't have xmllint around, then the applicationserver and the
# port= specification have to be on the same line in the XML config file.
#
GetWASPorts() {
case $1 in
[0-9]*) echo "$1" | tr ',' '\012';;
*)
xmlcat $1 | grep -i 'transports.*applicationserver:HTTPTransport' |
grep port= |
sed -e 's%.*port= *"* *%%' \
-e 's%[^0-9][^0-9]*.*$%%'
# Delete up to port=, throw away optional quote and optional
# white space.
# Throw away everything after the first non-digit.
# This should leave us the port number all by itself...
esac
}
#
# We assume that the first port listed in the <transports/>
# is the one we should run servlet/snoop on.
#
GetWASSnoopPort() {
GetWASPorts "$@" | head -n1
}
#
# Return information on the processname/id for the WAS ports
#
# pid/java is the expected output. Several lines, one per port...
#
#
WASPortInfo() {
pat=""
once=yes
PortCount=0
for j in $*
do
case $pat in
"") pat="$j";;
*) pat="$pat|$j";;
esac
PortCount=`expr $PortCount + 1`
done
- netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%'
+ netstat -ltnp 2>/dev/null| $EGREP -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%'
}
#
# Return the number of WAS ports which are open
#
CheckWASPortsInUse() {
count=`WASPortInfo "$@" | wc -l`
echo $count
}
#
# Return the pid(s) of the processes that have WAS ports open
#
WASPIDs() {
WASPortInfo "$@" | sort -u | cut -f1 -d/
}
#
# The version of ps that returns all processes and their (long) args
# It's only used by WAS_procs, which isn't used for anything ;-)
#
ps_long() {
ps axww
}
#
# The total set of WAS processes (single server only)
#
WAS_procs() {
ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1
}
#
# methods: What methods/operations do we support?
#
WAS_methods() {
cat <<-!
start
stop
status
methods
validate-all
meta-data
usage
!
if
have_binary $WGET
then
echo monitor
fi
}
#
# Return WAS status (silently)
#
WAS_status() {
WASPorts=`GetWASPorts $1`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) false;;
*) true;;
esac
}
#
# Report on WAS status to stdout...
#
WAS_report_status() {
WASPorts=`GetWASPorts $1`
PortCount=`echo $WASPorts | wc -w`
PortCount=`echo $PortCount`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;;
*)
pids=`WASPIDs $WASPorts`
if
[ $PortsInUse -ge $PortCount ]
then
ocf_log debug "WAS: server $1 is running (pid" $pids "et al)."
else
ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports."
fi
return $OCF_SUCCESS;;
esac
}
#
# Monitor WAS - does it really seem to be working?
#
# For this we invoke the snoop applet via wget.
#
# This is actually faster than WAS_status above...
#
WAS_monitor() {
trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0
tmpfile=`maketempfile` || return 1
SnoopPort=`GetWASSnoopPort $1`
output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/servlet/snoop 2>&1`
rc=$?
if
[ $rc -eq 0 ]
then
if
grep -i 'user-agent.*Wget' $tmpfile >/dev/null
then
: OK
else
ocf_log "err" "WAS: $1: no user-agent from snoop application"
rc=$OCF_ERR_GENERIC
fi
else
ocf_log "err" "WAS: $1: wget failure: $output"
rc=$OCF_ERR_GENERIC
fi
return $rc
}
#
# Start WAS instance
#
WAS_start() {
# Launch Arguments:
#
# -configFile <configFile>
# -nodeName <nodeName>
# -serverName <serverName>
# -oltEnabled
# -oltHost <hostname>
# -oltPort <port>
# -debugEnabled
# -jdwpPort <port>
# -debugSource <sourcePath>
# -serverTrace <traceString>
# -serverTraceFile <traceFile>
# -script [<scriptFile>]
# -platform <platformName>
# -noExecute
# -help
if
[ -x $WASBIN/startServer.sh ]
then
cmd="$WASBIN/startServer.sh -configFile $1"
else
cmd="$WASBIN/startupServer.sh"
fi
if
ocf_run $cmd
then
if
WAS_wait_4_start $STARTTIME "$@"
then
#true
return $OCF_SUCCESS
else
ocf_log "err" "WAS server $1 did not start correctly"
return $OCF_ERR_GENERIC
fi
else
#false
return $OCF_ERR_GENERIC
fi
}
#
# Wait for WAS to actually start up.
#
# It seems to take between 30 and 60 seconds for it to
# start up on a trivial WAS instance.
#
WAS_wait_4_start() {
max=$1
retries=0
shift
while
[ $retries -lt $max ]
do
if
WAS_status "$@"
then
return $OCF_SUCCESS
else
sleep 1
fi
retries=`expr $retries + 1`
done
WAS_status "$@"
}
#
# Shut down WAS
#
WAS_stop() {
# They don't return good return codes...
# And, they seem to allow anyone to stop WAS (!)
if
[ -x $WASBIN/stopServer.sh ]
then
ocf_run $WASBIN/stopServer.sh -configFile $1
else
WASPorts=`GetWASPorts $1`
kill `WASPIDs $WASPorts`
fi
if
WAS_status $1
then
ocf_log "err" "WAS: $1 did not stop correctly"
#false
return $OCF_ERR_GENERIC
else
#true
return $OCF_SUCCESS
fi
}
#
# Check if the port is valid
#
CheckPort() {
ocf_is_decimal "$1" && [ $1 -gt 0 ]
}
WAS_validate_all() {
if [ -x $WASBIN/startServer.sh ]; then
# $arg should be config file
if [ ! -f "$arg" ]; then
ocf_log err "Configuration file [$arg] does not exist"
exit $OCF_ERR_ARGS
fi
# $arg should specify a valid port number at the very least
local WASPorts=`GetWASPorts $arg`
if [ -z "$WASPorts" ]; then
ocf_log err "No port number specified in configuration file [$arg]"
exit $OCF_ERR_CONFIGURED
fi
local port
local have_valid_port=false
for port in $WASPorts; do
if CheckPort $port; then
have_valid_port=true
break
fi
done
if [ "false" = "$have_valid_port" ]; then
ocf_log err "No valid port number specified in configuration file [$arg]"
exit $OCF_ERR_CONFIGURED
fi
elif [ -x $WASBIN/startupServer.sh ]; then
# $arg should be port number
if CheckPort "$arg"; then
ocf_log err "Port number is required but [$arg] is not valid port number"
exit $OCF_ERR_ARGS
fi
else
# Do not know hot to validate_all
ocf_log warn "Do not know how to validate-all, assuming validation OK"
return $OCF_SUCCESS
fi
}
#
# 'main' starts here...
#
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
#
# Supply default configuration parameter(s)
#
if
( [ -z $OCF_RESKEY_config ] && [ -z $OCF_RESKEY_port ] )
then
if
[ -f $DEFAULT ]
then
arg=$DEFAULT
else
arg=$DEFAULT_WASPORTS
fi
elif
[ ! -z $OCF_RESKEY_config ]
then
arg=$OCF_RESKEY_config
else
arg=$OCF_RESKEY_port
fi
if
[ ! -f $arg ]
then
case $arg in
[0-9]*) ;; # ignore port numbers...
*) ocf_log "err" "WAS configuration file $arg does not exist!"
usage
exit $OCF_ERR_ARGS;;
esac
fi
# What kind of method was invoked?
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
start) WAS_start $arg
exit $?;;
stop) WAS_stop $arg
exit $?;;
status) WAS_report_status $arg
exit $?;;
monitor) WAS_monitor $arg
exit $?;;
validate-all) WAS_validate_all $arg
exit $?;;
methods) WAS_methods
exit $?;;
usage) usage
exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/WAS6 b/heartbeat/WAS6
index 9e18cd682..e71eec930 100755
--- a/heartbeat/WAS6
+++ b/heartbeat/WAS6
@@ -1,546 +1,546 @@
#!/bin/sh
# WAS6
#
# Description: Manages a Websphere Application Server as an HA resource
#
#
# Author: Ru Xiang Min
# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2006 International Business Machines China, Ltd., Inc.
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 WAS::/opt/IBM/WebSphere/AppServer/profiles/default/config/cells/Node01Cell/nodes/Node01/serverindex.xml
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_profile
# (WAS profile name, used for the single server edition of WAS6)
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
WAS_DIR=/opt/IBM/WebSphere/AppServer
if
[ ! -d $WAS_DIR ]
then
WAS_DIR=/usr/IBM/WebSphere/AppServer
fi
STARTTIME=300 # 5 minutes
DEFAULT_WASPORTS="9080"
#
#
WAS_BIN=$WAS_DIR/bin
DEFAULT=default
#
# Print usage message
#
usage() {
methods=`WAS_methods | grep -v methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-END
usage: $0 ($methods)
For the single server edition of WAS6, you have to set the following
enviroment virable:
OCF_RESKEY_profile
(WAS profile name)
$0 manages a Websphere Application Server 6(WAS6) as an HA resource
The 'start' operation starts WAS6.
The 'stop' operation stops WAS6.
The 'status' operation reports whether WAS6 is running
The 'monitor' operation reports whether the WAS6 seems to be working
(httpd also needs to be working for this case)
The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_profileName ) is valid
The 'methods' operation reports on the methods $0 supports
This is known to work with the Single Server edition of Websphere.
The default profile name for the single server edition is:
$DEFAULT
The start and stop operations must be run as root.
The status operation will report a pid of "-" for the
WAS root process using unless it is run as root.
If you don't have xmllint on your system, parsing of WAS
configuration files is very primitive.
We run servlet/snoop on the seventh transport port listed in
the config file for the "monitor" operation.
END
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="WAS6" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Resource script for WAS6. It manages a Websphere Application Server (WAS6) as
an HA resource.
</longdesc>
<shortdesc lang="en">Manages a WebSphere Application Server 6 instance</shortdesc>
<parameters>
<parameter name="profile" unique="0" required="0">
<longdesc lang="en">
The WAS profile name.
</longdesc>
<shortdesc lang="en">profile name</shortdesc>
<content type="string" default="$DEFAULT" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="300s" />
<action name="stop" timeout="300s" />
<action name="status" depth="0" timeout="30s" interval="10s" />
<action name="monitor" depth="0" timeout="30s" interval="10s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
<action name="methods" timeout="5s" />
</actions>
</resource-agent>
END
}
#
# Reformat the XML document in a sort of canonical form
# if we can. If we don't have xmllint, we just cat it out
# and hope for the best ;-)
#
xmlcat() {
if
[ "X$XMLcat" = X ]
then
XMLcat=`which xmllint 2>/dev/null`
if
[ "X${XMLcat}" = X -o ! -x "${XMLcat}" ]
then
XMLcat=cat
else
XMLcat="$XMLcat --recover --format"
fi
fi
for j in "$@"
do
${XMLcat} "$j"
done
}
#
#This is a bit skanky, but it works anyway...
#
# It's not really skanky if we can find xmllint on the system, because it
# reformats tags so they are all on one line, which is all we we need...
#
#
# Get the numbers of the ports WAS should be listening on...
#
# If we don't have xmllint around, then the applicationserver and the
# port= specification have to be on the same line in the XML config file.
#
GetWASPorts() {
case $1 in
[0-9]*) echo "$1" | tr ',' '\012';;
*)
xmlcat ${WAS_DIR}/profiles/${WAS_PROFILE_NAME}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml |
grep port= |
sed -e 's%.*port= *"* *%%' \
-e 's%[^0-9][^0-9]*.*$%%'
# Delete up to port=, throw away optional quote and optional
# white space.
# Throw away everything after the first non-digit.
# This should leave us the port number all by itself...
esac
}
#
# We assume that the seventh port listed in the serverindex.xml
# is the one we should run servlet/snoop on.
#
GetWASSnoopPort() {
GetWASPorts "$@" | sed -n '7p'
}
#
# Return information on the processname/id for the WAS ports
#
# pid/java is the expected output. Several lines, one per port...
#
#
WASPortInfo() {
pat=""
once=yes
PortCount=0
for j in $*
do
case $pat in
"") pat="$j";;
*) pat="$pat|$j";;
esac
PortCount=`expr $PortCount + 1`
done
- netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%'
+ netstat -ltnp 2>/dev/null| $EGREP -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%'
}
#
# Return the number of WAS ports which are open
#
CheckWASPortsInUse() {
count=`WASPortInfo "$@" | wc -l`
echo $count
}
#
# Return the pid(s) of the processes that have WAS ports open
#
WASPIDs() {
WASPortInfo "$@" | sort -u | cut -f1 -d/
}
#
# The version of ps that returns all processes and their (long) args
# It's only used by WAS_procs, which isn't used for anything ;-)
#
ps_long() {
ps axww
}
#
# The total set of WAS processes (single server only)
#
WAS_procs() {
ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1
}
#
# methods: What methods/operations do we support?
#
WAS_methods() {
cat <<-!
start
stop
status
methods
validate-all
meta-data
usage
!
if
have_binary $WGET
then
echo " monitor"
fi
}
#
# Return WAS status (silently)
#
WAS_status() {
WASPorts=`GetWASPorts $1`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) false;;
*) true;;
esac
}
#
# Report on WAS status to stdout...
#
WAS_report_status() {
WASPorts=`GetWASPorts $1`
PortCount=`echo $WASPorts | wc -w`
PortCount=`echo $PortCount`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;;
*)
pids=`WASPIDs $WASPorts`
if
[ $PortsInUse -ge $PortCount ]
then
ocf_log debug "WAS: server $1 is running (pid" $pids "et al)."
else
ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports."
fi
return $OCF_SUCCESS;;
esac
}
#
# Monitor WAS - does it really seem to be working?
#
# For this we invoke the snoop applet via wget.
#
# This is actually faster than WAS_status above...
#
WAS_monitor() {
trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0
tmpfile=`maketempfile` || exit 1
SnoopPort=`GetWASSnoopPort $1`
output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/snoop 2>&1`
rc=$?
if
[ $rc -eq 0 ]
then
if
grep -i 'user-agent.*Wget' $tmpfile >/dev/null
then
: OK
else
ocf_log "err" "WAS: $1: no user-agent from snoop application"
rc=$OCF_ERR_GENERIC
fi
else
ocf_log "err" "WAS: $1: wget failure: $output"
rc=$OCF_ERR_GENERIC
fi
return $rc
}
#
# Start WAS instance
#
WAS_start() {
# Launch Arguments:
# -nowait
# -quiet
# -logfile <filename>
# -replacelog
# -trace
# -script [<script filename >] [-background]
# -timeout <seconds>
# -statusport <portnumber>
# -profileName <profile>
# -help
if
[ -x $WAS_BIN/startServer.sh ]
then
cmd="$WAS_BIN/startServer.sh server1 -profileName $1"
fi
if
ocf_run $cmd
then
if
WAS_wait_4_start $STARTTIME "$@"
then
#true
return $OCF_SUCCESS
else
ocf_log "err" "WAS server $1 did not start correctly"
return $OCF_ERR_GENERIC
fi
else
#false
if
WAS_wait_4_start $STARTTIME "$@"
then
#true
return $OCF_SUCCESS
else
ocf_log "err" "WAS server $1 did not start correctly"
return $OCF_ERR_GENERIC
fi
fi
}
#
# Wait for WAS to actually start up.
#
# It seems to take between 30 and 60 seconds for it to
# start up on a trivial WAS instance.
#
WAS_wait_4_start() {
max=$1
retries=0
shift
while
[ $retries -lt $max ]
do
if
WAS_status "$@"
then
return $OCF_SUCCESS
else
sleep 1
fi
retries=`expr $retries + 1`
done
WAS_status "$@"
}
#
# Shut down WAS
#
WAS_stop() {
# They don't return good return codes...
# And, they seem to allow anyone to stop WAS (!)
if
[ -x $WAS_BIN/stopServer.sh ]
then
ocf_run $WAS_BIN/stopServer.sh server1 -profileName $1
else
WASPorts=`GetWASPorts $1`
kill `WASPIDs $WASPorts`
fi
if
WAS_status $1
then
ocf_log "err" "WAS: $1 did not stop correctly"
#false
return $OCF_ERR_GENERIC
else
#true
return $OCF_SUCCESS
fi
}
#
# Check if the port is valid
#
CheckPort() {
ocf_is_decimal "$1" && [ $1 -gt 0 ]
}
WAS_validate_all() {
if [ -x $WAS_BIN/startServer.sh ]; then
# $arg should be profile name
if [ ! -f ${WAS_DIR}/profiles/${arg}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml ]; then
ocf_log err "profile [$arg] does not exist"
exit $OCF_ERR_ARGS
fi
# $arg should specify a valid port number at the very least
local WASPorts=`GetWASPorts $arg`
if [ -z "$WASPorts" ]; then
ocf_log err "No port number specified in configuration file of profile [$arg]"
exit $OCF_ERR_CONFIGURED
fi
local port
local have_valid_port=false
for port in $WASPorts; do
if CheckPort $port; then
have_valid_port=true
break
fi
done
if [ "false" = "$have_valid_port" ]; then
ocf_log err "No valid port number specified in configuration file of profile [$arg]"
exit $OCF_ERR_CONFIGURED
fi
elif [ -x $WAS_BIN/startupServer.sh ]; then
# $arg should be port number
if CheckPort "$arg"; then
ocf_log err "Port number is required but [$arg] is not valid port number"
exit $OCF_ERR_ARGS
fi
else
# Do not know hot to validate_all
ocf_log warn "Do not know how to validate-all, assuming validation OK"
return $OCF_SUCCESS
fi
}
#
# 'main' starts here...
#
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
# These operations don't require OCF instance parameters to be set
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage) usage
exit $OCF_SUCCESS;;
methods) WAS_methods
exit $?;;
*);;
esac
#
# Supply default configuration parameter(s)
#
if
[ -z $OCF_RESKEY_profile ]
then
arg=$DEFAULT
else
arg=$OCF_RESKEY_profile
fi
if
[ ! -d ${WAS_DIR}/profiles/$arg ]
then
ocf_log "err" "WAS profile $arg does not exist!"
usage
exit $OCF_ERR_ARGS
fi
WAS_PROFILE_NAME=$arg
if [ "${WAS_PROFILE_NAME:=}" != "" ]; then
WAS_PROFILE_FSDB_SCRIPT=${WAS_DIR}/properties/fsdb/${WAS_PROFILE_NAME}.sh
fi
if [ "${WAS_PROFILE_FSDB_SCRIPT:=}" != "" ] && [ -f ${WAS_PROFILE_FSDB_SCRIPT} ]; then
. ${WAS_PROFILE_FSDB_SCRIPT}
fi
if [ "${WAS_USER_SCRIPT:=}" != "" ]; then
. ${WAS_USER_SCRIPT}
fi
# What kind of method was invoked?
case "$1" in
start) WAS_start $arg
exit $?;;
stop) WAS_stop $arg
exit $?;;
status) WAS_report_status $arg
exit $?;;
monitor) WAS_monitor $arg
exit $?;;
validate-all) WAS_validate_all $arg
exit $?;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/docker b/heartbeat/docker
index 50523db93..d51c46897 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -1,605 +1,605 @@
#!/bin/sh
#
# The docker HA resource agent creates and launches a docker container
# based off a supplied docker image. Containers managed by this agent
# are both created and removed upon the agent's start and stop actions.
#
# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Parameter defaults
OCF_RESKEY_reuse_default="0"
: ${OCF_RESKEY_reuse=${OCF_RESKEY_reuse_default}}
#######################################################################
meta_data()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="docker" version="1.0">
<version>1.0</version>
<longdesc lang="en">
The docker HA resource agent creates and launches a docker container
based off a supplied docker image. Containers managed by this agent
are both created and removed upon the agent's start and stop actions.
</longdesc>
<shortdesc lang="en">Docker container resource agent.</shortdesc>
<parameters>
<parameter name="image" required="1" unique="0">
<longdesc lang="en">
The docker image to base this container off of.
</longdesc>
<shortdesc lang="en">docker image</shortdesc>
<content type="string"/>
</parameter>
<parameter name="name" required="0" unique="0">
<longdesc lang="en">
The name to give the created container. By default this will
be that resource's instance name.
</longdesc>
<shortdesc lang="en">docker container name</shortdesc>
<content type="string"/>
</parameter>
<parameter name="allow_pull" unique="0">
<longdesc lang="en">
Allow the image to be pulled from the configured docker registry when
the image does not exist locally. NOTE, this can drastically increase
the time required to start the container if the image repository is
pulled over the network.
</longdesc>
<shortdesc lang="en">Allow pulling non-local images</shortdesc>
<content type="boolean"/>
</parameter>
<parameter name="run_opts" required="0" unique="0">
<longdesc lang="en">
Add options to be appended to the 'docker run' command which is used
when creating the container during the start action. This option allows
users to do things such as setting a custom entry point and injecting
environment variables into the newly created container. Note the '-d'
option is supplied regardless of this value to force containers to run
in the background.
NOTE: Do not explicitly specify the --name argument in the run_opts. This
agent will set --name using either the resource's instance or the name
provided in the 'name' argument of this agent.
</longdesc>
<shortdesc lang="en">run options</shortdesc>
<content type="string"/>
</parameter>
<parameter name="run_cmd" required="0" unique="0">
<longdesc lang="en">
Specify a command to launch within the container once
it has initialized.
</longdesc>
<shortdesc lang="en">run command</shortdesc>
<content type="string"/>
</parameter>
<parameter name="mount_points" required="0" unique="0">
<longdesc lang="en">
A comma separated list of directories that the container is expecting to use.
The agent will ensure they exist by running 'mkdir -p'
</longdesc>
<shortdesc lang="en">Required mount points</shortdesc>
<content type="string"/>
</parameter>
<parameter name="monitor_cmd" required="0" unique="0">
<longdesc lang="en">
Specify the full path of a command to launch within the container to check
the health of the container. This command must return 0 to indicate that
the container is healthy. A non-zero return code will indicate that the
container has failed and should be recovered.
If 'docker exec' is supported, it is used to execute the command. If not,
nsenter is used.
Note: Using this method for monitoring processes inside a container
is not recommended, as containerd tries to track processes running
inside the container and does not deal well with many short-lived
processes being spawned. Ensure that your container monitors its
own processes and terminates on fatal error rather than invoking
a command from the outside.
</longdesc>
<shortdesc lang="en">monitor command</shortdesc>
<content type="string"/>
</parameter>
<parameter name="force_kill" required="0" unique="0">
<longdesc lang="en">
Kill a container immediately rather than waiting for it to gracefully
shutdown
</longdesc>
<shortdesc lang="en">force kill</shortdesc>
<content type="boolean"/>
</parameter>
<parameter name="reuse" required="0" unique="0">
<longdesc lang="en">
Allow the container to be reused once it is stopped. By default,
containers get removed once they are stopped. Enable this option
to have the particular one persist when this happens.
</longdesc>
<shortdesc lang="en">reuse container</shortdesc>
<content type="boolean" default="${OCF_RESKEY_reuse_default}"/>
</parameter>
<parameter name="query_docker_health" required="0" unique="0">
<longdesc lang="en">
Query the builtin healthcheck of docker (v1.12+) to determine health of the
container. If left empty or set to false it will not be used.
The healthcheck itself has to be configured within docker, e.g. via
HEALTHCHECK in Dockerfile. This option just queries in what condition
docker considers the container to be and lets ocf do its thing accordingly.
Note that the time a container is in "starting" state counts against the
monitor timeout.
This is an additional check besides the standard check for the container
to be running, and the optional monitor_cmd check. It doesn't disable or
override them, so all of them (if used) have to come back healthy for the
container to be considered healthy.
</longdesc>
<shortdesc lang="en">use healthcheck</shortdesc>
<content type="boolean"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="90s" />
<action name="stop" timeout="90s" />
<action name="monitor" timeout="30s" interval="30s" depth="0" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="30s" />
</actions>
</resource-agent>
END
}
#######################################################################
REQUIRE_IMAGE_PULL=0
docker_usage()
{
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
monitor_cmd_exec()
{
local rc=$OCF_SUCCESS
local out
if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
return $rc
fi
if docker exec --help >/dev/null 2>&1; then
out=$(docker exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
rc=$?
else
out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --type=container --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
rc=$?
fi
if [ $rc -eq 127 ]; then
ocf_log err "monitor cmd failed (rc=$rc), output: $out"
ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container."
# there is no recovering from this, exit immediately
exit $OCF_ERR_ARGS
elif [ $rc -ne 0 ]; then
ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out"
rc=$OCF_ERR_GENERIC
else
ocf_log debug "monitor cmd passed: exit code = $rc"
fi
return $rc
}
container_exists()
{
local err
err=$(docker inspect --type=container $CONTAINER 2>&1 >/dev/null)
if [ $? -ne $OCF_SUCCESS ]; then
case $err in
*"No such container"*)
# Return failure instead of exiting if container does not exist
return 1
;;
*)
# Exit if error running command
ocf_exit_reason "$err"
exit $OCF_ERR_GENERIC
;;
esac
fi
return $OCF_SUCCESS
}
remove_container()
{
if ocf_is_true "$OCF_RESKEY_reuse"; then
# never remove the container if we have reuse enabled.
return 0
fi
container_exists
if [ $? -ne 0 ]; then
# don't attempt to remove a container that doesn't exist
return 0
fi
ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
ocf_run docker rm $CONTAINER
}
docker_simple_status()
{
local val
if [ ! -x "$(command -v docker)" ]; then
ocf_exit_reason "docker is not installed on this host"
return $OCF_ERR_INSTALLED
fi
# let's first check if the daemon is up and running.
VERSION_OUT=$(docker version)
version_ret=$?
if [ $version_ret -eq 1 ]; then
ocf_log err "Docker service is not running or in error state while checking for ${CONTAINER}, based on image, ${OCF_RESKEY_image}: ${VERSION_OUT}"
return $OCF_NOT_RUNNING
fi
container_exists
if [ $? -ne 0 ]; then
return $OCF_NOT_RUNNING
fi
# retrieve the 'Running' attribute for the container
val=$(docker inspect --type=container --format {{.State.Running}} $CONTAINER 2>/dev/null)
if [ $? -ne 0 ]; then
#not running as a result of container not being found
return $OCF_NOT_RUNNING
fi
if ocf_is_true "$val"; then
# container exists and is running
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}
docker_health_status()
{
if ocf_is_true "$OCF_RESKEY_query_docker_health"; then
local val
container_exists
if [ $? -ne 0 ]; then
return $OCF_NOT_RUNNING
fi
# retrieve the 'Health' attribute for the container
# This is a bash-style do-while loop to wait until instance is started.
# if starting takes longer than monitor timeout then upstream will make this fail.
while
val=$(docker inspect --type=container --format {{.State.Health.Status}} $CONTAINER 2>/dev/null)
if [ $? -ne 0 ]; then
#not healthy as a result of container not being found
return $OCF_NOT_RUNNING
fi
test "$val" = "starting"
do
sleep 1
done
if [ "$val" = "healthy" ]; then
# container exists and is healthy
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
fi
return 0
}
docker_monitor()
{
local rc=0
docker_simple_status
rc=$?
if [ $rc -ne 0 ]; then
return $rc
fi
docker_health_status
rc=$?
if [ $rc -ne 0 ]; then
return $rc
fi
monitor_cmd_exec
}
docker_create_mounts() {
oldIFS="$IFS"
IFS=","
for directory in $OCF_RESKEY_mount_points; do
mkdir -p "$directory"
done
IFS="$oldIFS"
}
docker_start()
{
docker_create_mounts
local run_opts="-d --name=${CONTAINER}"
# check to see if the container has already started
docker_simple_status
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
if [ -n "$OCF_RESKEY_run_opts" ]; then
run_opts="$run_opts $OCF_RESKEY_run_opts"
fi
if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then
ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}"
docker pull "${OCF_RESKEY_image}"
if [ $? -ne 0 ]; then
ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}"
return $OCF_ERR_GENERIC
fi
fi
if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then
ocf_log info "starting existing container $CONTAINER."
ocf_run docker start $CONTAINER
else
# make sure any previous container matching our container name is cleaned up first.
# we already know at this point it wouldn't be running
remove_container
ocf_log info "running container $CONTAINER for the first time"
ocf_run docker run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
fi
if [ $? -ne 0 ]; then
ocf_exit_reason "docker failed to launch container"
return $OCF_ERR_GENERIC
fi
# wait for monitor to pass before declaring that the container is started
while true; do
docker_simple_status
if [ $? -ne $OCF_SUCCESS ]; then
ocf_exit_reason "Newly created docker container exited after start"
return $OCF_ERR_GENERIC
fi
monitor_cmd_exec
if [ $? -eq $OCF_SUCCESS ]; then
ocf_log notice "Container $CONTAINER started successfully"
return $OCF_SUCCESS
fi
ocf_exit_reason "waiting on monitor_cmd to pass after start"
sleep 1
done
}
docker_stop()
{
local timeout=60
docker_simple_status
ret=$?
if [ $ret -eq $OCF_NOT_RUNNING ]; then
remove_container
return $OCF_SUCCESS
elif [ $ret -eq $OCF_ERR_GENERIC ]; then
return $OCF_ERR_GENERIC
fi
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 ))
if [ $timeout -lt 10 ]; then
timeout=10
fi
fi
if ocf_is_true "$OCF_RESKEY_force_kill"; then
ocf_run docker kill $CONTAINER
else
ocf_log debug "waiting $timeout second[s] before killing container"
ocf_run docker stop -t=$timeout $CONTAINER
fi
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
return $OCF_ERR_GENERIC
fi
remove_container
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
image_exists()
{
# if no tag was specified, use default "latest"
local COLON_FOUND=0
local SLASH_FOUND=0
local SERVER_NAME=""
local IMAGE_NAME="${OCF_RESKEY_image}"
local IMAGE_TAG="latest"
SLASH_FOUND="$(echo "${OCF_RESKEY_image}" | grep -o '/' | grep -c .)"
if [ ${SLASH_FOUND} -ge 1 ]; then
SERVER_NAME="$(echo ${IMAGE_NAME} | cut -d / -f 1-${SLASH_FOUND})"
IMAGE_NAME="$(echo ${IMAGE_NAME} | awk -F'/' '{print $NF}')"
fi
COLON_FOUND="$(echo "${IMAGE_NAME}" | grep -o ':' | grep -c .)"
if [ ${COLON_FOUND} -ge 1 ]; then
IMAGE_TAG="$(echo ${IMAGE_NAME} | awk -F':' '{print $NF}')"
IMAGE_NAME="$(echo ${IMAGE_NAME} | cut -d : -f 1-${COLON_FOUND})"
fi
# IMAGE_NAME might be following formats:
# - image
# - repository:port/image
# - docker.io/image (some distro will display "docker.io/" as prefix)
- docker images | awk '{print $1 ":" $2}' | egrep -q -s "^(docker.io\/|${SERVER_NAME}\/)?${IMAGE_NAME}:${IMAGE_TAG}\$"
+ docker images | awk '{print $1 ":" $2}' | $EGREP -q -s "^(docker.io\/|${SERVER_NAME}\/)?${IMAGE_NAME}:${IMAGE_TAG}\$"
if [ $? -eq 0 ]; then
# image found
return 0
fi
if ocf_is_true "$OCF_RESKEY_allow_pull"; then
REQUIRE_IMAGE_PULL=1
ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start"
return 0
fi
# image not found.
return 1
}
docker_validate()
{
check_binary docker
if [ -z "$OCF_RESKEY_image" ]; then
ocf_exit_reason "'image' option is required"
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
docker exec --help >/dev/null 2>&1
if [ ! $? ]; then
ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified"
check_binary nsenter
fi
fi
image_exists
if [ $? -ne 0 ]; then
ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
# TODO :
# When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters.
# When a user appoints reuse, the resource agent cannot connect plural clones with a container.
if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
if [ -n "$OCF_RESKEY_name" ]; then
if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ]
then
ocf_exit_reason "Cannot make plural clones from the same name parameter."
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ]
then
ocf_exit_reason "Cannot make plural master from the same name parameter."
exit $OCF_ERR_CONFIGURED
fi
fi
: ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`}
else
: ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}}
fi
if [ -n "$OCF_RESKEY_container" ]; then
# we'll keep the container attribute around for a bit in order not to break
# any existing deployments. The 'name' attribute is prefered now though.
CONTAINER=$OCF_RESKEY_container
ocf_log warn "The 'container' attribute is depreciated"
else
CONTAINER=$OCF_RESKEY_name
fi
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS;;
start)
docker_validate
docker_start;;
stop) docker_stop;;
monitor) docker_monitor;;
validate-all) docker_validate;;
usage|help) docker_usage
exit $OCF_SUCCESS
;;
*) docker_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/eDir88.in b/heartbeat/eDir88.in
index cd945d2c3..9a21ff852 100644
--- a/heartbeat/eDir88.in
+++ b/heartbeat/eDir88.in
@@ -1,476 +1,476 @@
#!@BASH_SHELL@
#
# eDirectory Resource Agent (RA) for Heartbeat.
# This script is only compatible with eDirectory 8.8 and later
#
# Copyright (c) 2007 Novell Inc, Yan Fitterer
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#
# OCF parameters:
# OCF_RESKEY_eDir_config_file - full filename to instance configuration file
# OCF_RESKEY_eDir_monitor_ldap - Should we monitor LDAP (0/1 - 1 is true)
# OCF_RESKEY_eDir_monitor_idm - Should we monitor IDM (0/1 - 1 is true)
# OCF_RESKEY_eDir_jvm_initial_heap - Value of the DHOST_INITIAL_HEAP java env var
# OCF_RESKEY_eDir_jvm_max_heap - Value of the DHOST_MAX_HEAP java env var
# OCF_RESKEY_eDir_jvm_options - Value of the DHOST_OPTIONS java env var
###############################################################################
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
test -f /opt/novell/eDirectory/bin/ndspath &&
. /opt/novell/eDirectory/bin/ndspath 2>/dev/null >/dev/null
# Parameter defaults
OCF_RESKEY_eDir_config_file_default="/etc/opt/novell/eDirectory/conf/nds.conf"
OCF_RESKEY_eDir_monitor_ldap_default="0"
OCF_RESKEY_eDir_monitor_idm_default="0"
OCF_RESKEY_eDir_jvm_initial_heap_default=""
OCF_RESKEY_eDir_jvm_max_heap_default=""
OCF_RESKEY_eDir_jvm_options_default=""
: ${OCF_RESKEY_eDir_config_file=${OCF_RESKEY_eDir_config_file_default}}
: ${OCF_RESKEY_eDir_monitor_ldap=${OCF_RESKEY_eDir_monitor_ldap_default}}
: ${OCF_RESKEY_eDir_monitor_idm=${OCF_RESKEY_eDir_monitor_idm_default}}
: ${OCF_RESKEY_eDir_jvm_initial_heap=${OCF_RESKEY_eDir_jvm_initial_heap_default}}
: ${OCF_RESKEY_eDir_jvm_max_heap=${OCF_RESKEY_eDir_jvm_max_heap_default}}
: ${OCF_RESKEY_eDir_jvm_options=${OCF_RESKEY_eDir_jvm_options_default}}
#######################################################################
usage() {
ME=$(basename "$0")
cat <<-EOFA
usage: $ME start|stop|status|monitor|validate-all
$ME manages an eDirectory instance as an HA resource.
The 'start' operation starts the instance.
The 'stop' operation stops the instance.
The 'status' operation reports if the instance is running.
The 'monitor' operation reports if the instance is running, and runs additional checks.
The 'validate-all' operation checks the validity of the arguments (environment variables).
EOFA
}
eDir_meta_data() {
cat <<-EOFB
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="eDir88" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Resource script for managing an eDirectory instance. Manages a single instance
of eDirectory as an HA resource. The "multiple instances" feature or
eDirectory has been added in version 8.8. This script will not work for any
version of eDirectory prior to 8.8. This RA can be used to load multiple
eDirectory instances on the same host.
It is very strongly recommended to put eDir configuration files (as per the
eDir_config_file parameter) on local storage on each node. This is necessary for
this RA to be able to handle situations where the shared storage has become
unavailable. If the eDir configuration file is not available, this RA will fail,
and heartbeat will be unable to manage the resource. Side effects include
STONITH actions, unmanageable resources, etc...
Setting a high action timeout value is _very_ _strongly_ recommended. eDir
with IDM can take in excess of 10 minutes to start. If heartbeat times out
before eDir has had a chance to start properly, mayhem _WILL ENSUE_.
The LDAP module seems to be one of the very last to start. So this script will
take even longer to start on installations with IDM and LDAP if the monitoring
of IDM and/or LDAP is enabled, as the start command will wait for IDM and LDAP
to be available.
</longdesc>
<shortdesc lang="en">Manages a Novell eDirectory directory server</shortdesc>
<parameters>
<parameter name="eDir_config_file" unique="1" required="0">
<longdesc lang="en">
Path to configuration file for eDirectory instance.
</longdesc>
<shortdesc lang="en">eDir config file</shortdesc>
<content type="string" default="${OCF_RESKEY_eDir_config_file_default}" />
</parameter>
<parameter name="eDir_monitor_ldap" required="0">
<longdesc lang="en">
Should we monitor if LDAP is running for the eDirectory instance?
</longdesc>
<shortdesc lang="en">eDir monitor ldap</shortdesc>
<content type="boolean" default="${OCF_RESKEY_eDir_monitor_ldap_default}" />
</parameter>
<parameter name="eDir_monitor_idm" required="0">
<longdesc lang="en">
Should we monitor if IDM is running for the eDirectory instance?
</longdesc>
<shortdesc lang="en">eDir monitor IDM</shortdesc>
<content type="boolean" default="${OCF_RESKEY_eDir_monitor_idm_default}" />
</parameter>
<parameter name="eDir_jvm_initial_heap" required="0">
<longdesc lang="en">
Value for the DHOST_INITIAL_HEAP java environment variable. If unset, java defaults will be used.
</longdesc>
<shortdesc lang="en">DHOST_INITIAL_HEAP value</shortdesc>
<content type="integer" default="${OCF_RESKEY_eDir_jvm_initial_heap_default}" />
</parameter>
<parameter name="eDir_jvm_max_heap" required="0">
<longdesc lang="en">
Value for the DHOST_MAX_HEAP java environment variable. If unset, java defaults will be used.
</longdesc>
<shortdesc lang="en">DHOST_MAX_HEAP value</shortdesc>
<content type="integer" default="${OCF_RESKEY_eDir_jvm_max_heap_default}" />
</parameter>
<parameter name="eDir_jvm_options" required="0">
<longdesc lang="en">
Value for the DHOST_OPTIONS java environment variable. If unset, original values will be used.
</longdesc>
<shortdesc lang="en">DHOST_OPTIONS value</shortdesc>
<content type="string" default="${OCF_RESKEY_eDir_jvm_options_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="600s" />
<action name="stop" timeout="600s" />
<action name="monitor" timeout="60s" interval="30s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5s" />
</actions>
</resource-agent>
EOFB
return $OCF_SUCCESS
}
#
# eDir_start: Start eDirectory instance
#
eDir_start() {
if eDir_status ; then
ocf_log info "eDirectory is already running ($NDSCONF)."
return $OCF_SUCCESS
fi
# Start eDirectory instance
if [ -n "$OCF_RESKEY_eDir_jvm_initial_heap" ]; then
DHOST_JVM_INITIAL_HEAP=$OCF_RESKEY_eDir_jvm_initial_heap
export DHOST_JVM_INITIAL_HEAP
fi
if [ -n "$OCF_RESKEY_eDir_jvm_max_heap" ]; then
DHOST_JVM_MAX_HEAP=$OCF_RESKEY_eDir_jvm_max_heap
export DHOST_JVM_MAX_HEAP
fi
if [ -n "$OCF_RESKEY_eDir_jvm_options" ]; then
DHOST_JVM_OPTIONS=$OCF_RESKEY_eDir_jvm_options
export DHOST_JVM_OPTIONS
fi
$NDSMANAGE start --config-file "$NDSCONF" > /dev/null 2>&1
if [ $? -eq 0 ]; then
ocf_log info "eDir start command sent for $NDSCONF."
else
echo "ERROR: Can't start eDirectory for $NDSCONF."
return $OCF_ERR_GENERIC
fi
CNT=0
while ! eDir_monitor ; do
# Apparently, LDAP will only start after all other services
# Startup time can be in excess of 10 minutes.
# Leave a very long heartbeat timeout on the start action
# We're relying on heartbeat to bail us out...
let CNT=$CNT+1
ocf_log info "eDirectory start waiting for ${CNT}th retry for $NDSCONF."
sleep 10
done
ocf_log info "eDirectory start verified for $NDSCONF."
return $OCF_SUCCESS
}
#
# eDir_stop: Stop eDirectory instance
# This action is written in such a way that even when run
# on a node were things are broken (no binaries, no config
# etc...) it will try to stop any running ndsd processes
# and report success if none are running.
#
eDir_stop() {
if ! eDir_status ; then
return $OCF_SUCCESS
fi
$NDSMANAGE stop --config-file "$NDSCONF" >/dev/null 2>&1
if eDir_status ; then
# eDir failed to stop.
ocf_log err "eDirectory instance failed to stop for $NDSCONF"
return $OCF_ERR_GENERIC
else
ocf_log info "eDirectory stop verified for $NDSCONF."
return $OCF_SUCCESS
fi
}
#
# eDir_status: is eDirectory instance up ?
#
eDir_status() {
if [ ! -r "$NDSCONF" ] ; then
ocf_log err "Config file missing ($NDSCONF)."
exit $OCF_ERR_GENERIC
fi
# Find how many ndsd processes have open listening sockets
# with the IP of this eDir instance
IFACE=$(grep -i "n4u.server.interfaces" $NDSCONF | cut -f2 -d= | tr '@' ':')
if [ -z "$IFACE" ] ; then
ocf_log err "Cannot retrieve interfaces from $NDSCONF. eDirectory may not be correctly configured."
exit $OCF_ERR_GENERIC
fi
# In case of multiple IP's split into an array
# and check all of them
IFS=', ' read -a IFACE2 <<< "$IFACE"
ocf_log debug "Found ${#IFACE2[@]} interfaces from $NDSCONF."
counter=${#IFACE2[@]}
for IFACE in "${IFACE2[@]}"
do
ocf_log debug "Checking ndsd instance for $IFACE"
NDSD_SOCKS=$(netstat -ntlp | grep -ce "$IFACE.*ndsd")
if [ "$NDSD_SOCKS" -eq 1 ] ; then
let counter=counter-1
ocf_log debug "Found ndsd instance for $IFACE"
elif [ "$NDSD_SOCKS" -gt 1 ] ; then
ocf_log err "More than 1 ndsd listening socket matched. Likely misconfiguration of eDirectory."
exit $OCF_ERR_GENERIC
fi
done
if [ $counter -eq 0 ] ; then
# Correct ndsd instance is definitely running
ocf_log debug "All ndsd instances found."
return 0;
elif [ $counter -lt ${#IFACE2[@]} ]; then
ocf_log err "Only some ndsd listening sockets matched, something is very wrong."
exit $OCF_ERR_GENERIC
fi
# No listening socket. Make sure we don't have the process running...
PIDDIR=$(grep -i "n4u.server.vardir" "$NDSCONF" | cut -f2 -d=)
if [ -z "$PIDDIR" ] ; then
ocf_log err "Cannot get vardir from nds config ($NDSCONF). Probable eDir configuration error."
exit $OCF_ERR_GENERIC
fi
NDSD_PID=$(cat $PIDDIR/ndsd.pid 2>/dev/null)
if [ -z "$NDSD_PID" ] ; then
# PID file unavailable or empty.
# This will happen if the PIDDIR is not available
# on this node at this time.
return 1
fi
RC=$(ps -p "$NDSD_PID" | grep -c ndsd)
if [ "$RC" -gt 0 ] ; then
# process found but no listening socket. ndsd likely not operational
ocf_log err "ndsd process found, but no listening socket. Something's gone wrong ($NDSCONF)"
exit $OCF_ERR_GENERIC
fi
ocf_log debug "ndsd instance is not running, but no other error detected."
return 1
}
#
# eDir_monitor: Do more in-depth checks to ensure that eDirectory is fully functional
# LDAP and IDM checks are only done if reqested.
#
#
eDir_monitor() {
if ! eDir_status ; then
ocf_log info "eDirectory instance is down ($NDSCONF)"
return $OCF_NOT_RUNNING
fi
# We know the right ndsd is running locally, check health
$NDSSTAT --config-file "$NDSCONF" >/dev/null 2>&1
if [ $? -ne 0 ] ; then
return 1
fi
# Monitor IDM first, as it will start before LDAP
if [ $MONITOR_IDM -eq 1 ]; then
- RET=$($NDSTRACE --config-file "$NDSCONF" -c modules | egrep -i '^vrdim.*Running' | awk '{print $1}')
+ RET=$($NDSTRACE --config-file "$NDSCONF" -c modules | $EGREP -i '^vrdim.*Running' | awk '{print $1}')
if [ "$RET" != "vrdim" ]; then
ocf_log err "eDirectory IDM engine isn't running ($NDSCONF)."
return $OCF_ERR_GENERIC
fi
fi
if [ $MONITOR_LDAP -eq 1 ] ; then
$NDSNLDAP -c --config-file "$NDSCONF" >/dev/null 2>&1
if [ $? -ne 0 ]; then
ocf_log err "eDirectory LDAP server isn't running ($NDSCONF)."
return $OCF_ERR_GENERIC
fi
fi
ocf_log debug "eDirectory monitor success ($NDSCONF)"
return $OCF_SUCCESS
}
#
# eDir_validate: Validate environment
#
eDir_validate() {
declare rc=$OCF_SUCCESS
# Script must be run as root
if ! ocf_is_root ; then
ocf_log err "$0 must be run as root"
rc=$OCF_ERR_GENERIC
fi
# ndsmanage must be available and runnable
check_binary $NDSMANAGE
# ndsstat must be available and runnable
check_binary $NDSSTAT
# Config file must be readable
if [ ! -r "$NDSCONF" ] ; then
ocf_log err "eDirectory configuration file [$NDSCONF] is not readable"
rc=$OCF_ERR_ARGS
fi
# monitor_ldap must be unambiguously resolvable to a truth value
MONITOR_LDAP=$(echo "$MONITOR_LDAP" | tr [A-Z] [a-z])
case "$MONITOR_LDAP" in
yes|true|1)
MONITOR_LDAP=1;;
no|false|0)
MONITOR_LDAP=0;;
*)
ocf_log err "Configuration parameter eDir_monitor_ldap has invalid value [$MONITOR_LDAP]"
rc=$OCF_ERR_ARGS;;
esac
# monitor_idm must be unambiguously resolvable to a truth value
MONITOR_IDM=$(echo "$MONITOR_IDM" | tr [A-Z] [a-z])
case "$MONITOR_IDM" in
yes|true|1)
MONITOR_IDM=1;;
no|false|0)
MONITOR_IDM=0;;
*)
ocf_log err "Configuration parameter eDir_monitor_idm has invalid value [$MONITOR_IDM]"
rc=$OCF_ERR_ARGS;;
esac
# eDir_jvm_initial_heap must be blank or numeric
if [ -n "$OCF_RESKEY_eDir_jvm_initial_heap" ] ; then
if ! ocf_is_decimal "$OCF_RESKEY_eDir_jvm_initial_heap" ; then
ocf_log err "Configuration parameter eDir_jvm_initial_heap has invalid" \
"value [$OCF_RESKEY_eDir_jvm_initial_heap]"
rc=$OCF_ERR_ARGS
fi
fi
# eDir_jvm_max_heap must be blank or numeric
if [ -n "$OCF_RESKEY_eDir_jvm_max_heap" ] ; then
if ! ocf_is_decimal "$OCF_RESKEY_eDir_jvm_max_heap" ; then
ocf_log err "Configuration parameter eDir_jvm_max_heap has invalid" \
"value [$OCF_RESKEY_eDir_jvm_max_heap]"
rc=$OCF_ERR_ARGS
fi
fi
if [ $rc -ne $OCF_SUCCESS ] ; then
ocf_log err "Invalid environment"
fi
return $rc
}
#
# Start of main logic
#
ocf_log debug "$0 started with arguments \"$*\""
NDSBASE=/opt/novell/eDirectory
NDSNLDAP=$NDSBASE/sbin/nldap
NDSMANAGE=$NDSBASE/bin/ndsmanage
NDSSTAT=$NDSBASE/bin/ndsstat
NDSTRACE=$NDSBASE/bin/ndstrace
NDSCONF=${OCF_RESKEY_eDir_config_file:-/etc/opt/novell/eDirectory/conf/nds.conf}
MONITOR_LDAP=${OCF_RESKEY_eDir_monitor_ldap:-0}
MONITOR_IDM=${OCF_RESKEY_eDir_monitor_idm:-0}
# What kind of method was invoked?
case "$1" in
validate-all) eDir_validate; exit $?;;
meta-data) eDir_meta_data; exit $OCF_SUCCESS;;
status) if eDir_status ; then
ocf_log info "eDirectory instance is up ($NDSCONF)"
exit $OCF_SUCCESS
else
ocf_log info "eDirectory instance is down ($NDSCONF)"
exit $OCF_NOT_RUNNING
fi;;
start) : skip;;
stop) : skip;;
monitor) : skip;;
usage) usage; exit $OCF_SUCCESS;;
*) ocf_log err "Invalid argument [$1]"
usage; exit $OCF_ERR_ARGS;;
esac
# From now on we must have a valid environment to continue.
# stop goes in the list above as it should ideally be able to
# clean up after a start that failed due to bad args
eDir_validate
RC=$?
if [ $RC -ne $OCF_SUCCESS ]; then
exit $RC
fi
case "$1" in
start) eDir_start;;
stop) eDir_stop;;
monitor) eDir_monitor;;
esac
exit $?
diff --git a/heartbeat/mysql-proxy b/heartbeat/mysql-proxy
index 013c5e4ec..2815860d7 100755
--- a/heartbeat/mysql-proxy
+++ b/heartbeat/mysql-proxy
@@ -1,741 +1,741 @@
#!/bin/sh
#
# Resource script for MySQL Proxy
#
# Description: Manages MySQL Proxy as an OCF resource in
# an high-availability setup.
#
# Tested with MySQL Proxy 0.8.1 and 0.8.3 on Debian 6.0.
#
# Based on the mysql and Pure-Ftpd OCF resource agents.
#
# Author: Raoul Bhatia <r.bhatia@ipax.at> : Original Author
# License: GNU General Public License (GPL)
#
#
# usage: $0 {start|stop|reload|status|monitor|validate-all|meta-data}
#
# The "start" arg starts a MySQL Proxy instance
#
# The "stop" arg stops it.
#
# TODO
# * add in-depth monitoring by querying the mysql-proxy admin port
#
# Test via
# (note: this did not work with MySQL Proxy 0.8.1 and ocf-tester from resource-agents 3.9.2 on Debian 6.0)
#
# * /usr/sbin/ocf-tester -n mp -o binary="/usr/sbin/mysql-proxy" -o defaults_file="" -o parameters="--proxy-skip-profiling" \
# -o admin_address="127.0.0.1:4041" -o admin_username="root" -o admin_password="la" -o admin_lua_script="/usr/lib/mysql-proxy/lua/admin.lua" \
# -o proxy_backend_addresses="192.168.100.200:42006" -o proxy_address="/var/run/mysqld/mysqld.sock" /usr/lib/ocf/resource.d/heartbeat/mysql-proxy
#
#
# OCF parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_client_binary
# OCF_RESKEY_defaults_file
# OCF_RESKEY_proxy_backend_addresses
# OCF_RESKEY_proxy_read_only_backend_addresses
# OCF_RESKEY_proxy_address
# OCF_RESKEY_log_level
# OCF_RESKEY_keepalive
# OCF_RESKEY_plugins
# OCF_RESKEY_admin_address
# OCF_RESKEY_admin_username
# OCF_RESKEY_admin_password
# OCF_RESKEY_admin_lua_script
# OCF_RESKEY_test_table
# OCF_RESKEY_test_user
# OCF_RESKEY_test_passwd
# OCF_RESKEY_parameters
# OCF_RESKEY_pidfile
#
##########################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Parameter defaults
OCF_RESKEY_binary_default="/usr/sbin/mysql-proxy"
OCF_RESKEY_client_binary_default="mysql"
OCF_RESKEY_defaults_file_default=""
OCF_RESKEY_proxy_backend_addresses_default="127.0.0.1:3306"
OCF_RESKEY_proxy_read_only_backend_addresses_default=""
OCF_RESKEY_proxy_address_default=":4040"
OCF_RESKEY_log_level_default=""
OCF_RESKEY_keepalive_default=""
OCF_RESKEY_plugins_default=""
OCF_RESKEY_admin_address_default="127.0.0.1:4041"
OCF_RESKEY_admin_username_default=""
OCF_RESKEY_admin_password_default=""
OCF_RESKEY_admin_lua_script_default=""
OCF_RESKEY_test_table_default="mysql.user"
OCF_RESKEY_test_user_default=""
OCF_RESKEY_test_passwd_default=""
OCF_RESKEY_parameters_default=""
OCF_RESKEY_pidfile_default="${HA_RSCTMP}/mysql-proxy-${OCF_RESOURCE_INSTANCE}.pid"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}}
: ${OCF_RESKEY_defaults_file=${OCF_RESKEY_defaults_file_default}}
: ${OCF_RESKEY_proxy_backend_addresses=${OCF_RESKEY_proxy_backend_addresses_default}}
: ${OCF_RESKEY_proxy_read_only_backend_addresses=${OCF_RESKEY_proxy_read_only_backend_addresses_default}}
: ${OCF_RESKEY_proxy_address=${OCF_RESKEY_proxy_address_default}}
: ${OCF_RESKEY_log_level=${OCF_RESKEY_log_level_default}}
: ${OCF_RESKEY_keepalive=${OCF_RESKEY_keepalive_default}}
: ${OCF_RESKEY_plugins=${OCF_RESKEY_plugins_default}}
: ${OCF_RESKEY_admin_address=${OCF_RESKEY_admin_address_default}}
: ${OCF_RESKEY_admin_username=${OCF_RESKEY_admin_username_default}}
: ${OCF_RESKEY_admin_password=${OCF_RESKEY_admin_password_default}}
: ${OCF_RESKEY_admin_lua_script=${OCF_RESKEY_admin_lua_script_default}}
: ${OCF_RESKEY_test_table=${OCF_RESKEY_test_table_default}}
: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}}
: ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}}
: ${OCF_RESKEY_parameters=${OCF_RESKEY_parameters_default}}
: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}}
USAGE="Usage: $0 {start|stop|reload|status|monitor|validate-all|meta-data}"
##########################################################################
usage() {
echo $USAGE >&2
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="mysql-proxy" version="0.1">
<version>1.0</version>
<longdesc lang="en">
This script manages MySQL Proxy as an OCF resource in a high-availability setup.
The default monitor operation will verify that mysql-proxy is running.
The level 10 monitor operation is left out intentionally for possible future enhancements in conjunction with the admin plugin.
The level 20 monitor operation will perform a SELECT on a given table to verify that the connection to a back-end server is actually working.
Tested with MySQL Proxy 0.8.1 and 0.8.3 on Debian 6.0.
</longdesc>
<shortdesc lang="en">Manages a MySQL Proxy instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Full path to the MySQL Proxy binary.
For example, "/usr/sbin/mysql-proxy".
</longdesc>
<shortdesc lang="en">Full path to MySQL Proxy binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL client binary.
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="defaults_file" unique="0" required="0">
<longdesc lang="en">
Full path to a MySQL Proxy configuration file.
For example, "/etc/mysql-proxy.conf".
</longdesc>
<shortdesc lang="en">Full path to configuration file</shortdesc>
<content type="string" default="${OCF_RESKEY_defaults_file_default}" />
</parameter>
<parameter name="proxy_backend_addresses" unique="0" required="0">
<longdesc lang="en">
Address:port of the remote back-end servers (default: 127.0.0.1:3306).
</longdesc>
<shortdesc lang="en">MySQL Proxy back-end servers</shortdesc>
<content type="string" default="${OCF_RESKEY_proxy_backend_addresses_default}" />
</parameter>
<parameter name="proxy_read_only_backend_addresses" unique="0" required="0">
<longdesc lang="en">
Address:port of the remote (read only) unpromoted-server (default: ).
</longdesc>
<shortdesc lang="en">MySql Proxy read only back-end servers</shortdesc>
<content type="string" default="${OCF_RESKEY_proxy_read_only_backend_addresses_default}" />
</parameter>
<parameter name="proxy_address" unique="0" required="0">
<longdesc lang="en">
Listening address:port of the proxy server (default: :4040).
You can also specify a socket like "/var/run/mysql-proxy.sock".
</longdesc>
<shortdesc lang="en">MySQL Proxy listening address</shortdesc>
<content type="string" default="${OCF_RESKEY_proxy_address_default}" />
</parameter>
<parameter name="log_level" unique="0" required="0">
<longdesc lang="en">
Log all messages of level (error|warning|info|message|debug|) or higher.
An empty value disables logging.
</longdesc>
<shortdesc lang="en">MySQL Proxy log level.</shortdesc>
<content type="string" default="${OCF_RESKEY_log_level_default}" />
</parameter>
<parameter name="keepalive" unique="0" required="0">
<longdesc lang="en">
Try to restart the proxy if it crashed (default: ).
Valid values: true or false. An empty value equals "false".
</longdesc>
<shortdesc lang="en">Use keepalive option</shortdesc>
<content type="string" default="${OCF_RESKEY_keepalive_default}" />
</parameter>
<parameter name="plugins" unique="0" required="0">
<longdesc lang="en">
Whitespace separated list of plugins to load (default: ).
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy plugins</shortdesc>
<content type="string" default="${OCF_RESKEY_plugins_default}" />
</parameter>
<parameter name="admin_address" unique="0" required="0">
<longdesc lang="en">
Listening address:port of the admin plugin (default: 127.0.0.1:4041).
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy admin plugin listening address</shortdesc>
<content type="string" default="${OCF_RESKEY_admin_address_default}" />
</parameter>
<parameter name="admin_username" unique="0" required="0">
<longdesc lang="en">
Username for the admin plugin (default: ).
Required since MySQL Proxy 0.8.1, if the admin plugin is loaded.
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy admin plugin username</shortdesc>
<content type="string" default="${OCF_RESKEY_admin_username_default}" />
</parameter>
<parameter name="admin_password" unique="0" required="0">
<longdesc lang="en">
Password for the admin plugin (default: ).
Required since MySQL Proxy 0.8.1, if the admin plugin is loaded.
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy admin plugin password</shortdesc>
<content type="string" default="${OCF_RESKEY_admin_password_default}" />
</parameter>
<parameter name="admin_lua_script" unique="0" required="0">
<longdesc lang="en">
Script to execute by the admin plugin.
Required since MySQL Proxy 0.8.1, if the admin plugin is loaded.
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy admin plugin lua script</shortdesc>
<content type="string" default="${OCF_RESKEY_admin_lua_script_default}" />
</parameter>
<parameter name="test_table" unique="0" required="0">
<longdesc lang="en">
Table to be tested in monitor statement (in database.table notation)
</longdesc>
<shortdesc lang="en">MySQL test table</shortdesc>
<content type="string" default="${OCF_RESKEY_test_table_default}" />
</parameter>
<parameter name="test_user" unique="0" required="0">
<longdesc lang="en">
MySQL test user
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="${OCF_RESKEY_test_user_default}" />
</parameter>
<parameter name="test_passwd" unique="0" required="0">
<longdesc lang="en">
MySQL test user password
</longdesc>
<shortdesc lang="en">MySQL test user password</shortdesc>
<content type="string" default="${OCF_RESKEY_test_passwd_default}" />
</parameter>
<parameter name="parameters" unique="0" required="0">
<longdesc lang="en">
The MySQL Proxy daemon may be called with additional parameters.
Specify any of them here.
</longdesc>
<shortdesc lang="en">MySQL Proxy additional parameters</shortdesc>
<content type="string" default="${OCF_RESKEY_parameters_default}" />
</parameter>
<parameter name="pidfile" unique="1" required="0">
<longdesc lang="en">PID file</longdesc>
<shortdesc lang="en">PID file</shortdesc>
<content type="string" default="${OCF_RESKEY_pidfile_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="30s" />
<action name="stop" timeout="30s" />
<action name="reload" timeout="30s" />
<action name="monitor" depth="0" timeout="20s" interval="60s" />
<action name="validate-all" timeout="30s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
isRunning()
{
kill -s 0 "$1" 2>/dev/null
}
mysqlproxy_status()
{
local PID
if [ -f "${pidfile}" ]; then
# MySQL Proxy is probably running
PID=`head -n 1 "${pidfile}"`
if [ ! -z "$PID" ] ; then
isRunning "$PID"
return $?
fi
fi
# MySQL Proxy is not running
false
}
mysqlproxy_start()
{
local PARAM_PREFIX OPTIONS
local p pa pba proba
local pid_dir socket_dir
# if MySQL Proxy is running return success
if mysqlproxy_status ; then
ocf_log info "MySQL Proxy already running."
return $OCF_SUCCESS
fi
PARAM_PREFIX=''
# MySQL Proxy plugins to load
# @TODO check if the plugins are actually available?
if ocf_is_true $plugin_support; then
for p in $plugins; do
PARAM_PREFIX="$PARAM_PREFIX --plugins=$p"
done
fi
# check if the MySQL Proxy defaults-file exist
if [ -f "$defaults_file" ]; then
PARAM_PREFIX="$PARAM_PREFIX --defaults-file=$defaults_file"
fi
# set log-level
if [ ! -z "$log_level" ]; then
PARAM_PREFIX="$PARAM_PREFIX --log-level=$log_level"
fi
# set keepalive
if [ "$keepalive" = "true" ]; then
PARAM_PREFIX="$PARAM_PREFIX --keepalive"
fi
# honor admin_* options
if [ ! -z "$admin_username" ]; then
PARAM_PREFIX="$PARAM_PREFIX --admin-username=$admin_username"
fi
if [ ! -z "$admin_password" ]; then
PARAM_PREFIX="$PARAM_PREFIX --admin-password=$admin_password"
fi
if [ ! -z "$admin_lua_script" ]; then
PARAM_PREFIX="$PARAM_PREFIX --admin-lua-script=$admin_lua_script"
fi
# make sure that the pid directory exists
pid_dir=`dirname $pidfile`
if [ ! -d $pid_dir ] ; then
ocf_log info "Creating PID directory '$pid_dir'."
mkdir -p $pid_dir
#chown $OCF_RESKEY_user:$OCF_RESKEY_group $pid_dir # c/p from mysql ra; currently not needed
fi
# split multiple proxy-address options.
# currently unsupported but let us hope for the future ;)
for pa in $proxy_address; do
[ -z "$pa" ] && continue
OPTIONS=" $OPTIONS --proxy-address=$pa"
# if $pa contains a slash, we are dealing with a socket
# make sure that the socket directory exists
if echo "$pa" | grep -q '/' ; then
socket_dir=`dirname $pa`
if [ ! -d $socket_dir ] ; then
ocf_log info "Creating socket directory '$socket_dir'."
mkdir -p $socket_dir
#chown $OCF_RESKEY_user:$OCF_RESKEY_group $socket_dir # c/p from mysql ra; currently not needed
fi
fi
done
# split multiple proxy-backend-addresses options.
for pba in $proxy_backend_addresses; do
[ -z "$pba" ] && continue
OPTIONS=" $OPTIONS --proxy-backend-addresses=$pba"
done
# split multiple proxy-backend-addresses options.
for proba in $proxy_read_only_backend_addresses; do
[ -z "$proba" ] && continue
OPTIONS=" $OPTIONS --proxy-read-only-backend-addresses=$proba"
done
# build $OPTIONS and add admin-address and pidfile
OPTIONS="$PARAM_PREFIX $OPTIONS --admin-address=$admin_address --pid-file=${pidfile}"
# add additional parameters
if [ -n "$parameters" ]; then
OPTIONS="$OPTIONS $parameters"
fi
# start MySQL Proxy
#start-stop-daemon --start --quiet --pidfile $pidfile --make-pidfile --name mysql-proxy --startas $binary -b -- $OPTIONS
$binary --daemon $OPTIONS
ret=$?
if [ $ret -ne 0 ]; then
ocf_log err "MySQL Proxy returned error: " $ret
return $OCF_ERR_GENERIC
fi
# @TODO add an initial monitoring action?
return $OCF_SUCCESS
}
mysqlproxy_stop()
{
local ret
local pa
if mysqlproxy_status ; then
#start-stop-daemon --stop --quiet --retry 3 --exec $binary --pidfile $pidfile
/bin/kill `cat "${pidfile}"`
ret=$?
if [ $ret -ne 0 ]; then
ocf_log err "MySQL Proxy returned an error while stopping: " $ret
return $OCF_ERR_GENERIC
fi
# grant some time for shutdown and recheck
sleep 1
if mysqlproxy_status ; then
ocf_log err "MySQL Proxy failed to stop."
return $OCF_ERR_GENERIC
fi
# remove dangling socketfile, if specified
for pa in $proxy_address; do
if [ -S "$pa" ]; then
ocf_log info "Removing dangling socket file '$pa'."
rm -f "$pa"
fi
done
# remove dangling pidfile
if [ -f "${pidfile}" ]; then
ocf_log info "Removing dangling pidfile '${pidfile}'."
rm -f "${pidfile}"
fi
fi
return $OCF_SUCCESS
}
mysqlproxy_reload()
{
# @TODO check if pidfile is empty
# PID=`head -n 1 "${pidfile}"`
# if [ ! -z "$PID" ] ; then
if mysqlproxy_status; then
ocf_log info "Reloading MySQL Proxy."
kill -s HUP `cat ${pidfile}`
fi
}
mysqlproxy_monitor()
{
local rc
if [ "${OCF_RESKEY_CRM_meta_interval:-0}" -eq "0" ]; then
# in case of probe, monitor operation is surely treated as
# under suspension. This will call start operation.
# (c/p from ocf:heartbeat:sfex)
mysqlproxy_validate_all
rc=$?
[ $rc -ne 0 ] && return $rc
fi
if ! mysqlproxy_status ; then
return $OCF_NOT_RUNNING
fi
if [ $OCF_CHECK_LEVEL -eq 20 ]; then
mysqlproxy_monitor_20
rc=$?
[ $rc -ne 0 ] && return $rc
fi
return $OCF_SUCCESS
}
mysqlproxy_monitor_20()
{
local rc
local mysql_options pa
local mysql_server_parameter mysql_server_host mysql_server_port
if [ -z "$OCF_RESKEY_test_table" -o -z "$OCF_RESKEY_test_user" -a -z "$OCF_RESKEY_test_passwd" ]; then
ocf_log warn "Missing proper configuration for OCF_CHECK_LEVEL=20 (test_table=[$OCF_RESKEY_test_table] test_user=[$OCF_RESKEY_test_user] test_password=[$OCF_RESKEY_test_passwd]). Not running in-depth monitoring."
return $OCF_SUCCESS
fi
mysql_options="--connect_timeout=10 --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
# cycle each address
for pa in $proxy_address; do
# build correct connect parameter
if [ -S "$pa" ]; then
# we need to monitor a mysql socket
mysql_server_parameter="--socket=$pa"
else
# we need to monitor a host address
mysql_server_parameter=""
# split host:port
# @TODO correctly handle IPv6 address
# @TODO correctly handle 0.0.0.0 address
mysql_server_host=`echo $pa | cut -d : -f 1`
mysql_server_port=`echo $pa | cut -d : -f 2`
if [ -n "$mysql_server_host" ]; then
mysql_server_parameter="$mysql_server_parameter --host=$mysql_server_host"
fi
if [ -n "$mysql_server_port" ]; then
mysql_server_parameter="$mysql_server_parameter --port=$mysql_server_port"
fi
fi
# Check for test table
ocf_run $mysql $mysql_server_parameter $mysql_options \
-e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table"
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Failed to select from $OCF_RESKEY_test_table: " $rc
return $OCF_ERR_GENERIC
fi
done
return $OCF_SUCCESS
}
mysqlproxy_validate_all()
{
# local variables
local config_error=0
# check that the MySQL Proxy binary exists and can be executed
check_binary $binary
# check MySQL client binary only if in-depth monitoring is requested
# do not break backwards compatibility otherwise
if [ $OCF_CHECK_LEVEL -gt 0 ]; then
check_binary $mysql
fi
# check for valid log-level
- echo $log_level | egrep -q "^(error|warning|info|message|debug|)$"
+ echo $log_level | $EGREP -q "^(error|warning|info|message|debug|)$"
if [ $? -ne 0 ]; then
ocf_log err "MySQL Proxy log level '$log_level' not in valid range error|warning|info|message|debug"
return $OCF_ERR_CONFIGURED
fi
# if we're running MySQL Proxy > 0.8.1 and there is any admin parameter set,
# explicitly load the admin (and the proxy) plugin.
# (version 0.8.2 does not load the admin plugin by default anymore)
ocf_version_cmp "$version" "0.8.1"
ret=$?
if [ $ret -eq 2 ]; then
# simple check: concat all parameters and check if the string has non-zero length
if [ -n "$admin_username$admin_password$admin_lua_script$admin_address" ]; then
plugins="proxy admin"
has_plugin_admin=1
else
has_plugin_admin=0
fi
fi
# check for required admin_* parameters for 0.8.1 and 0.8.2 (with admin module)
# translated: if (version == 0.8.1 or (version > 0.8.1 and has_plugin_admin))
if [ $ret -eq 1 -o \( $ret -eq 2 -a $has_plugin_admin -eq 1 \) ]; then
if [ -z "$admin_username" ]; then
ocf_log err "Missing required parameter \"admin_username\""
config_error=1
fi
if [ -z "$admin_password" ]; then
ocf_log err "Missing required parameter \"admin_password\""
config_error=1
fi
if [ -z "$admin_lua_script" ]; then
ocf_log err "Missing required parameter \"admin_lua_script\""
config_error=1
fi
# check if the admin_lua_script, if specified, exists
if [ -n "$admin_lua_script" -a ! -e "$admin_lua_script" ]; then
ocf_log err "MySQL Proxy admin lua script '$admin_lua_script' does not exist or is not readable."
fi
fi
# issue a warning during start if the user wants to load a plugin
# but this version of MySQL Proxy does not support the plugin architecture.
if [ -n "$plugins" ] && ocf_is_false "$plugin_support" && [ $__OCF_ACTION = 'start' ]; then
ocf_log warn "You are running MySQL Proxy version '$version'. This version does not support the plugin architecture. Please use version 0.7.0 or later to load the plugins '$plugins'."
fi
# exit in case we have found relevant config errors
if [ $config_error -eq 1 ]; then
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
#
# Main
#
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
pidfile=$OCF_RESKEY_pidfile
binary=$OCF_RESKEY_binary
defaults_file=$OCF_RESKEY_defaults_file
proxy_backend_addresses=$OCF_RESKEY_proxy_backend_addresses
proxy_read_only_backend_addresses=$OCF_RESKEY_proxy_read_only_backend_addresses
admin_address=$OCF_RESKEY_admin_address
admin_username=$OCF_RESKEY_admin_username
admin_password=$OCF_RESKEY_admin_password
admin_lua_script=$OCF_RESKEY_admin_lua_script
proxy_address=$OCF_RESKEY_proxy_address
log_level=$OCF_RESKEY_log_level
keepalive=$OCF_RESKEY_keepalive
plugins=`echo $OCF_RESKEY_plugins | tr "[:space:]" "\n" | sort -u`
mysql=$OCF_RESKEY_client_binary
parameters=$OCF_RESKEY_parameters
plugin_support=false
has_plugin_admin=0 # 0 because this simplifies the if statements
# debugging stuff
#echo OCF_RESKEY_binary=$OCF_RESKEY_binary >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_defaults_file=$OCF_RESKEY_defaults_file >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_proxy_backend_addresses=$OCF_RESKEY_proxy_backend_addresses >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_proxy_read_only_backend_addresses=$OCF_RESKEY_proxy_read_only_backend_addresses >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_proxy_address=$OCF_RESKEY_proxy_address >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_log_level=$OCF_RESKEY_log_level >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_keepalive=$OCF_RESKEY_keepalive >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_admin_address=$OCF_RESKEY_admin_address >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_admin_username=$OCF_RESKEY_admin_username >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_admin_password=$OCF_RESKEY_admin_password >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_admin_lua_script=$OCF_RESKEY_admin_lua_script >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_parameters=$OCF_RESKEY_parameters >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_pidfile=$OCF_RESKEY_pidfile >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
# handle some parameters before performing any additional checks
case $1 in
meta-data) meta_data
exit $?
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
# determine MySQL Proxy version
check_binary $binary
version=`$binary --version | grep ^mysql-proxy | awk '{print $NF}'`
# version 0.7.0 (and later) support the plugin architecture and load the admin plugin by default
# version 0.8.1 loads admin plugin by default and requires the admin parameters to be set
# version 0.8.2 does not load the admin plugin by default anymore
ocf_version_cmp "$version" "0.7.0"
ret=$?
if [ $ret -eq 1 -o $ret -eq 2 ]; then
plugin_support=true
has_plugin_admin=1
fi
# perform action
case $1 in
start) mysqlproxy_validate_all &&
mysqlproxy_start
exit $?
;;
stop) mysqlproxy_validate_all &&
mysqlproxy_stop
exit $?
;;
reload) mysqlproxy_reload
exit $?
;;
status) if mysqlproxy_status; then
ocf_log info "MySQL Proxy is running."
exit $OCF_SUCCESS
else
ocf_log info "MySQL Proxy is stopped."
exit $OCF_NOT_RUNNING
fi
;;
monitor) mysqlproxy_monitor
exit $?
;;
validate-all) mysqlproxy_validate_all
exit $?
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/portblock b/heartbeat/portblock
index e88ecc2a1..450e37208 100755
--- a/heartbeat/portblock
+++ b/heartbeat/portblock
@@ -1,672 +1,672 @@
#!/bin/sh
#
# portblock: iptables temporary portblocking control
#
# Author: Sun Jiang Dong (initial version)
# Philipp Reisner (per-IP filtering)
#
# License: GNU General Public License (GPL)
#
# Copyright: (C) 2005 International Business Machines
#
# OCF parameters are as below:
# OCF_RESKEY_protocol
# OCF_RESKEY_portno
# OCF_RESKEY_action
# OCF_RESKEY_ip
# OCF_RESKEY_tickle_dir
# OCF_RESKEY_sync_script
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_protocol_default=""
OCF_RESKEY_portno_default=""
OCF_RESKEY_direction_default="in"
OCF_RESKEY_action_default=""
OCF_RESKEY_ip_default="0.0.0.0/0"
OCF_RESKEY_reset_local_on_unblock_stop_default="false"
OCF_RESKEY_tickle_dir_default=""
OCF_RESKEY_sync_script_default=""
: ${OCF_RESKEY_protocol=${OCF_RESKEY_protocol_default}}
: ${OCF_RESKEY_portno=${OCF_RESKEY_portno_default}}
: ${OCF_RESKEY_direction=${OCF_RESKEY_direction_default}}
: ${OCF_RESKEY_action=${OCF_RESKEY_action_default}}
: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}}
: ${OCF_RESKEY_reset_local_on_unblock_stop=${OCF_RESKEY_reset_local_on_unblock_stop_default}}
: ${OCF_RESKEY_tickle_dir=${OCF_RESKEY_tickle_dir_default}}
: ${OCF_RESKEY_sync_script=${OCF_RESKEY_sync_script_default}}
#######################################################################
CMD=`basename $0`
TICKLETCP=$HA_BIN/tickle_tcp
usage()
{
cat <<END >&2
usage: $CMD {start|stop|status|monitor|meta-data|validate-all}
$CMD is used to temporarily block ports using iptables.
It can be used to blackhole a port before bringing
up an IP address, and enable it after a service is started.
To do that for samba, the following can be used:
crm configure <<EOF
primitive portblock-samba ocf:heartbeat:portblock \\
params protocol=tcp portno=137,138 action=block
primitive portunblock-samba ocf:heartbeat:portblock \\
params protocol=tcp portno=137,138 action=unblock
primitive samba-vip ocf:heartbeat:IPaddr2 \\
params ip=10.10.10.20
group g-samba \\
portblock-samba samba-vip nmbd smbd portunblock-samba
EOF
This will do the following things:
- DROP all incoming packets for TCP ports 137 and 138
- Bring up the IP alias 10.10.10.20
- start the nmbd and smbd services
- Re-enable TCP ports 137 and 138
(enable normal firewall rules on those ports)
This prevents clients from getting TCP RST if they try to reconnect
to the service after the alias is enabled but before nmbd and smbd
are running. These packets will cause some clients to give up
attempting to reconnect to the server.
Attempts to connect to UDP and other non-TCP ports which have nothing
listening can result in ICMP port unreachable responses, which can
have the same undesirable affect on some clients.
NOTE: iptables is Linux-specific.
An additional feature in the portblock RA is the tickle ACK function
enabled by specifying the tickle_dir parameter. The tickle ACK
triggers the clients to faster reconnect their TCP connections to the
fail-overed server.
Please note that this feature is often used for the floating IP fail-
over scenario where the long-lived TCP connections need to be tickled.
It doesn't support the cluster alias IP scenario.
When using the tickle ACK function, in addition to the normal usage
of portblock RA, the parameter tickle_dir must be specified in the
action=unblock instance of the portblock resources.
For example, you may stack resources like below:
portblock action=block
services
portblock action=unblock tickle_dir=/tickle/state/dir
If you want to tickle all the TCP connections which connected to _one_
floating IP but different ports, no matter how many portblock resources
you have defined, you should enable tickles for _one_ portblock
resource(action=unblock) only.
The tickle_dir is a location which stores the established TCP
connections. It can be a shared directory(which is cluster-visible to
all nodes) or a local directory.
If you use the shared directory, you needn't do any other things.
If you use the local directory, you must also specify the sync_script
paramater. We recommend you to use csync2 as the sync_script.
For example, if you use the local directory /tmp/tickle as tickle_dir,
you could setup the csync2 as the csync2 documentation says and
configure your /etc/csync2/csync2.cfg like:
group ticklegroup {
host node1;
host node2;
key /etc/csync2/ticklegroup.key;
include /etc/csync2/csync2.cfg;
include /tmp/tickle;
auto younger;
}
Then specify the parameter sync_script as "csync2 -xv".
END
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="portblock" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Resource script for portblock. It is used to temporarily block ports
using iptables. In addition, it may allow for faster TCP reconnects
for clients on failover. Use that if there are long lived TCP
connections to an HA service. This feature is enabled by setting the
tickle_dir parameter and only in concert with action set to unblock.
Note that the tickle ACK function is new as of version 3.0.2 and
hasn't yet seen widespread use.
</longdesc>
<shortdesc lang="en">Block and unblocks access to TCP and UDP ports</shortdesc>
<parameters>
<parameter name="protocol" unique="0" required="1">
<longdesc lang="en">
The protocol used to be blocked/unblocked.
</longdesc>
<shortdesc lang="en">protocol</shortdesc>
<content type="string" default="${OCF_RESKEY_protocol_default}" />
</parameter>
<parameter name="portno" unique="0" required="1">
<longdesc lang="en">
The port number used to be blocked/unblocked.
</longdesc>
<shortdesc lang="en">portno</shortdesc>
<content type="string" default="${OCF_RESKEY_portno_default}" />
</parameter>
<parameter name="action" unique="0" required="1">
<longdesc lang="en">
The action (block/unblock) to be done on the protocol::portno.
</longdesc>
<shortdesc lang="en">action</shortdesc>
<content type="string" default="${OCF_RESKEY_action_default}" />
</parameter>
<parameter name="reset_local_on_unblock_stop" unique="0" required="0">
<longdesc lang="en">
If for some reason the long lived server side TCP sessions won't be cleaned up
by a reconfiguration/flush/stop of whatever services this portblock protects,
they would linger in the connection table, even after the IP is gone
and services have been switched over to another node.
An example would be the default NFS kernel server.
These "known" connections may seriously confuse and delay a later switchback.
Enabling this option will cause this agent to try to get rid of these connections
by injecting a temporary iptables rule to TCP-reset outgoing packets from the
blocked ports, and additionally tickle them locally,
just before it starts to DROP incoming packets on "unblock stop".
</longdesc>
<shortdesc lang="en">(try to) reset server TCP sessions when unblock stops</shortdesc>
<content type="boolean" default="${OCF_RESKEY_reset_local_on_unblock_stop_default}" />
</parameter>
<parameter name="ip" unique="0" required="0">
<longdesc lang="en">
The IP address used to be blocked/unblocked.
</longdesc>
<shortdesc lang="en">ip</shortdesc>
<content type="string" default="${OCF_RESKEY_ip_default}" />
</parameter>
<parameter name="tickle_dir" unique="0" required="0">
<longdesc lang="en">
The shared or local directory (_must_ be absolute path) which
stores the established TCP connections.
</longdesc>
<shortdesc lang="en">Tickle directory</shortdesc>
<content type="string" default="${OCF_RESKEY_tickle_dir_default}" />
</parameter>
<parameter name="sync_script" unique="0" required="0">
<longdesc lang="en">
If the tickle_dir is a local directory, then the TCP connection state
file has to be replicated to other nodes in the cluster. It can be
csync2 (default), some wrapper of rsync, or whatever. It takes the
file name as a single argument. For csync2, set it to "csync2 -xv".
</longdesc>
<shortdesc lang="en">Connection state file synchronization script</shortdesc>
<content type="string" default="${OCF_RESKEY_sync_script_default}" />
</parameter>
<parameter name="direction" unique="0" required="0">
<longdesc lang="en">
Whether to block incoming or outgoing traffic. Can be either "in",
"out", or "both".
If "in" is used, the incoming ports are blocked on the INPUT chain.
If "out" is used, the outgoing ports are blocked on the OUTPUT chain.
If "both" is used, both the incoming and outgoing ports are blocked.
</longdesc>
<shortdesc lang="en">Whether to block incoming or outgoing traffic, or both</shortdesc>
<content type="string" default="${OCF_RESKEY_direction_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="status" depth="0" timeout="10s" interval="10s" />
<action name="monitor" depth="0" timeout="10s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5s" />
</actions>
</resource-agent>
END
}
#
# Because this is the normal usage, we consider "block"
# resources to be pseudo-resources -- that is, their status can't
# be reliably determined through external means.
# This is because we expect an "unblock" resource to come along
# and disable us -- but we're still in some sense active...
#
#active_grep_pat {udp|tcp} portno,portno ip {d|s}
# d = look for destination ports
# s = look for source ports
active_grep_pat()
{
w="[ ][ ]*"
any="0\\.0\\.0\\.0/0"
src=$any dst=$3
if [ "$4" = "s" ]; then
local src=$3
local dst=$any
fi
# iptables 1.8.9 briefly broke the output format, returning the
# numeric protocol value instead of a string. Support both variants.
if [ "$1" = "tcp" ]; then
local prot="(tcp|6)"
else
local prot="(udp|17)"
fi
echo "^DROP${w}${prot}${w}--${w}${src}${w}${dst}${w}multiport${w}${4}ports${w}${2}$"
}
#chain_isactive {udp|tcp} portno,portno ip chain
chain_isactive()
{
[ "$4" = "OUTPUT" ] && ds="s" || ds="d"
PAT=$(active_grep_pat "$1" "$2" "$3" "$ds")
$IPTABLES $wait -n -L "$4" | grep -qE "$PAT"
}
# netstat -tn and ss -Htn, split on whitespace and colon,
# look very similar:
# tcp 0 0 10.43.55.1 675 10.43.9.8 2049 ESTABLISHED
# ESTAB 0 0 10.43.55.1 675 10.43.9.8 2049
# so we can write one awk script for both
get_established_tcp_connections()
{
local columns
if [ -z "$1" ] ; then
columns='$4,$5, $6,$7'
else
# swap local and remote for "tickle_local"
columns='$6,$7, $4,$5'
fi
$ss_or_netstat | awk -F '[:[:space:]]+' '
( $8 == "ESTABLISHED" || $1 == "ESTAB" ) && $4 == "'$OCF_RESKEY_ip'" \
{printf "%s:%s\t%s:%s\n", '"$columns"'}'
}
save_tcp_connections()
{
[ -z "$OCF_RESKEY_tickle_dir" ] && return
statefile=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip
# If we have _no_ sync script, we probably have a shared
# (or replicated) directory, and need to fsync, or we might
# end up with the just truncated file after failover, exactly
# when we need it.
#
# If we _do_ have a sync script, it is not that important whether
# the local state file is fsync'ed or not, the sync script is
# responsible to "atomically" communicate the state to the peer(s).
if [ -z "$OCF_RESKEY_sync_script" ]; then
get_established_tcp_connections |
dd of="$statefile".new conv=fsync status=none &&
mv "$statefile".new "$statefile"
else
get_established_tcp_connections > $statefile
$OCF_RESKEY_sync_script $statefile > /dev/null 2>&1 &
fi
}
tickle_remote()
{
[ -z "$OCF_RESKEY_tickle_dir" ] && return
f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip
[ -r $f ] || return
$TICKLETCP -n 3 < $f
}
tickle_local()
{
[ -z "$OCF_RESKEY_tickle_dir" ] && return
f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip
[ -r $f ] || return
# swap "local" and "remote" address,
# so we tickle ourselves.
# We set up a REJECT with tcp-reset before we do so, so we get rid of
# the no longer wanted potentially long lived "ESTABLISHED" connection
# entries on the IP we are going to delet in a sec. These would get in
# the way if we switch-over and then switch-back in quick succession.
local i
awk '{ print $2, $1; }' $f | $TICKLETCP
$ss_or_netstat | grep -Fw $OCF_RESKEY_ip || return
for i in 0.1 0.5 1 2 4 ; do
sleep $i
# now kill what is currently in the list,
# not what was recorded during last monitor
get_established_tcp_connections swap | $TICKLETCP
$ss_or_netstat | grep -Fw $OCF_RESKEY_ip || break
done
}
SayActive()
{
echo "$CMD DROP rule [$*] is running (OK)"
}
SayConsideredActive()
{
echo "$CMD DROP rule [$*] considered to be running (OK)"
}
SayInactive()
{
echo "$CMD DROP rule [$*] is inactive"
}
#IptablesStatus {udp|tcp} portno,portno ip {in|out|both} {block|unblock}
IptablesStatus() {
local rc
rc=$OCF_ERR_GENERIC
is_active=0
if [ "$4" = "in" ] || [ "$4" = "both" ]; then
chain_isactive "$1" "$2" "$3" INPUT
is_active=$?
fi
if [ "$4" = "out" ] || [ "$4" = "both" ]; then
chain_isactive "$1" "$2" "$3" OUTPUT
r=$?
[ $r -gt $is_active ] && is_active=$r
fi
if [ $is_active -eq 0 ]; then
case $5 in
block)
SayActive $*
rc=$OCF_SUCCESS
;;
*)
SayInactive $*
rc=$OCF_NOT_RUNNING
;;
esac
else
case $5 in
block)
if ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" status; then
SayConsideredActive $*
rc=$OCF_SUCCESS
else
SayInactive $*
rc=$OCF_NOT_RUNNING
fi
;;
*)
if ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" status; then
SayActive $*
#This is only run on real monitor events.
save_tcp_connections
rc=$OCF_SUCCESS
else
SayInactive $*
rc=$OCF_NOT_RUNNING
fi
;;
esac
fi
return $rc
}
#DoIptables {-I|-D} {udp|tcp} portno,portno ip chain
DoIptables()
{
op=$1 proto=$2 ports=$3 ip=$4 chain=$5
active=0; chain_isactive "$proto" "$ports" "$ip" "$chain" && active=1
want_active=0; [ "$op" = "-I" ] && want_active=1
echo "active: $active want_active: $want_active"
if [ $active -eq $want_active ] ; then
: Chain already in desired state
else
[ "$chain" = "OUTPUT" ] && ds="s" || ds="d"
$IPTABLES $wait "$op" "$chain" -p "$proto" -${ds} "$ip" -m multiport --${ds}ports "$ports" -j DROP
fi
}
#IptablesBLOCK {udp|tcp} portno,portno ip {in|out|both} {block|unblock}
IptablesBLOCK()
{
local rc_in=0
local rc_out=0
if [ "$4" = "in" ] || [ "$4" = "both" ]; then
local try_reset=false
if [ "$1/$5/$__OCF_ACTION" = tcp/unblock/stop ] &&
ocf_is_true $reset_local_on_unblock_stop
then
try_reset=true
fi
if
chain_isactive "$1" "$2" "$3" INPUT
then
: OK -- chain already active
else
if $try_reset ; then
$IPTABLES $wait -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset
tickle_local
fi
$IPTABLES $wait -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP
rc_in=$?
if $try_reset ; then
$IPTABLES $wait -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset
fi
fi
fi
if [ "$4" = "out" ] || [ "$4" = "both" ]; then
DoIptables -I "$1" "$2" "$3" OUTPUT
rc_out=$?
fi
[ $rc_in -gt $rc_out ] && return $rc_in || return $rc_out
}
#IptablesUNBLOCK {udp|tcp} portno,portno ip {in|out|both}
IptablesUNBLOCK()
{
if [ "$4" = "in" ] || [ "$4" = "both" ]; then
DoIptables -D "$1" "$2" "$3" INPUT
fi
if [ "$4" = "out" ] || [ "$4" = "both" ]; then
DoIptables -D "$1" "$2" "$3" OUTPUT
fi
return $?
}
#IptablesStart {udp|tcp} portno,portno ip {in|out|both} {block|unblock}
IptablesStart()
{
ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" start
case $5 in
block) IptablesBLOCK "$@";;
unblock)
IptablesUNBLOCK "$@"
rc=$?
tickle_remote
#ignore run_tickle_tcp exit code!
return $rc
;;
*) usage; return 1;
esac
return $?
}
#IptablesStop {udp|tcp} portno,portno ip {in|out|both} {block|unblock}
IptablesStop()
{
ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" stop
case $5 in
block) IptablesUNBLOCK "$@";;
unblock)
save_tcp_connections
IptablesBLOCK "$@"
;;
*) usage; return 1;;
esac
return $?
}
#
# Check if the port is valid, this function code is not decent, but works
#
CheckPort() {
# Examples of valid port: "1080", "1", "0080"
# Examples of invalid port: "1080bad", "0", "0000", ""
- echo $1 |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
+ echo $1 | $EGREP -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
}
IptablesValidateAll()
{
check_binary $IPTABLES
case $protocol in
tcp|udp)
;;
*)
ocf_log err "Invalid protocol $protocol!"
exit $OCF_ERR_CONFIGURED
;;
esac
if CheckPort "$portno"; then
:
else
ocf_log err "Invalid port number $portno!"
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$OCF_RESKEY_tickle_dir" ]; then
if [ x"$action" != x"unblock" ]; then
ocf_log err "Tickles are only useful with action=unblock!"
exit $OCF_ERR_CONFIGURED
fi
if [ ! -d "$OCF_RESKEY_tickle_dir" ]; then
ocf_log err "The tickle dir doesn't exist!"
exit $OCF_ERR_INSTALLED
fi
fi
case $action in
block|unblock)
;;
*)
ocf_log err "Invalid action $action!"
exit $OCF_ERR_CONFIGURED
;;
esac
if ocf_is_true $reset_local_on_unblock_stop; then
if [ $action != unblock ] ; then
ocf_log err "reset_local_on_unblock_stop is only relevant with action=unblock"
exit $OCF_ERR_CONFIGURED
fi
if [ -z $OCF_RESKEY_tickle_dir ] ; then
ocf_log warn "reset_local_on_unblock_stop works best with tickle_dir enabled as well"
fi
fi
return $OCF_SUCCESS
}
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
*) ;;
esac
if [ -z "$OCF_RESKEY_protocol" ]; then
ocf_log err "Please set OCF_RESKEY_protocol"
exit $OCF_ERR_CONFIGURED
fi
if [ -z "$OCF_RESKEY_portno" ]; then
ocf_log err "Please set OCF_RESKEY_portno"
exit $OCF_ERR_CONFIGURED
fi
if [ -z "$OCF_RESKEY_action" ]; then
ocf_log err "Please set OCF_RESKEY_action"
exit $OCF_ERR_CONFIGURED
fi
# iptables v1.4.20+ is required to use -w (wait)
version=$(iptables -V | awk -F ' v' '{print $NF}')
ocf_version_cmp "$version" "1.4.19.1"
if [ "$?" -eq "2" ]; then
wait="-w"
else
wait=""
fi
protocol=$OCF_RESKEY_protocol
portno=$OCF_RESKEY_portno
direction=$OCF_RESKEY_direction
action=$OCF_RESKEY_action
ip=$OCF_RESKEY_ip
reset_local_on_unblock_stop=$OCF_RESKEY_reset_local_on_unblock_stop
# If "tickle" is enabled, we need to record the list of currently established
# connections during monitor. Use ss where available, and netstat otherwise.
if [ -n "$OCF_RESKEY_tickle_dir" ] ; then
if have_binary ss ; then
ss_or_netstat="ss -Htn"
elif have_binary netstat ; then
ss_or_netstat="netstat -tn"
else
ocf_log err "Neither ss nor netstat found, but needed to record estblished connections."
exit $OCF_ERR_INSTALLED
fi
fi
case $1 in
start)
IptablesStart $protocol $portno $ip $direction $action
;;
stop)
IptablesStop $protocol $portno $ip $direction $action
;;
status|monitor)
IptablesStatus $protocol $portno $ip $direction $action
;;
validate-all)
IptablesValidateAll
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/rabbitmq-server-ha b/heartbeat/rabbitmq-server-ha
index 8b3cd9ee9..f48338035 100755
--- a/heartbeat/rabbitmq-server-ha
+++ b/heartbeat/rabbitmq-server-ha
@@ -1,2444 +1,2444 @@
#!/bin/sh
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# See usage() function below for more details ...
#
# Note that the script uses an external file to setup RabbitMQ policies
# so make sure to create it from an example shipped with the package.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Fill in some defaults if no values are specified
PATH=/sbin:/usr/sbin:/bin:/usr/bin
OCF_RESKEY_binary_default="/usr/sbin/rabbitmq-server"
OCF_RESKEY_ctl_default="/usr/sbin/rabbitmqctl"
OCF_RESKEY_debug_default=false
OCF_RESKEY_username_default="rabbitmq"
OCF_RESKEY_groupname_default="rabbitmq"
OCF_RESKEY_admin_user_default="guest"
OCF_RESKEY_admin_password_default="guest"
OCF_RESKEY_definitions_dump_file_default="/etc/rabbitmq/definitions"
OCF_RESKEY_pid_file_default="/var/run/rabbitmq/pid"
OCF_RESKEY_log_dir_default="/var/log/rabbitmq"
OCF_RESKEY_mnesia_base_default="/var/lib/rabbitmq/mnesia"
OCF_RESKEY_mnesia_schema_base_default="/var/lib/rabbitmq"
OCF_RESKEY_host_ip_default="127.0.0.1"
OCF_RESKEY_node_port_default=5672
OCF_RESKEY_default_vhost_default="/"
OCF_RESKEY_erlang_cookie_default=false
OCF_RESKEY_erlang_cookie_file_default="/var/lib/rabbitmq/.erlang.cookie"
OCF_RESKEY_use_fqdn_default=false
OCF_RESKEY_fqdn_prefix_default=""
OCF_RESKEY_max_rabbitmqctl_timeouts_default=3
OCF_RESKEY_policy_file_default="/usr/local/sbin/set_rabbitmq_policy"
OCF_RESKEY_rmq_feature_health_check_default=true
OCF_RESKEY_rmq_feature_local_list_queues_default=true
OCF_RESKEY_limit_nofile_default=65535
OCF_RESKEY_avoid_using_iptables_default=false
OCF_RESKEY_allowed_cluster_nodes_default=""
: ${HA_LOGTAG="lrmd"}
: ${HA_LOGFACILITY="daemon"}
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_ctl=${OCF_RESKEY_ctl_default}}
: ${OCF_RESKEY_debug=${OCF_RESKEY_debug_default}}
: ${OCF_RESKEY_username=${OCF_RESKEY_username_default}}
: ${OCF_RESKEY_groupname=${OCF_RESKEY_groupname_default}}
: ${OCF_RESKEY_admin_user=${OCF_RESKEY_admin_user_default}}
: ${OCF_RESKEY_admin_password=${OCF_RESKEY_admin_password_default}}
: ${OCF_RESKEY_definitions_dump_file=${OCF_RESKEY_definitions_dump_file_default}}
: ${OCF_RESKEY_log_dir=${OCF_RESKEY_log_dir_default}}
: ${OCF_RESKEY_mnesia_base=${OCF_RESKEY_mnesia_base_default}}
: ${OCF_RESKEY_mnesia_schema_base=${OCF_RESKEY_mnesia_schema_base_default}}
: ${OCF_RESKEY_pid_file=${OCF_RESKEY_pid_file_default}}
: ${OCF_RESKEY_node_port=${OCF_RESKEY_node_port_default}}
: ${OCF_RESKEY_default_vhost=${OCF_RESKEY_default_vhost_default}}
: ${OCF_RESKEY_erlang_cookie=${OCF_RESKEY_erlang_cookie_default}}
: ${OCF_RESKEY_erlang_cookie_file=${OCF_RESKEY_erlang_cookie_file_default}}
: ${OCF_RESKEY_use_fqdn=${OCF_RESKEY_use_fqdn_default}}
: ${OCF_RESKEY_fqdn_prefix=${OCF_RESKEY_fqdn_prefix_default}}
: ${OCF_RESKEY_max_rabbitmqctl_timeouts=${OCF_RESKEY_max_rabbitmqctl_timeouts_default}}
: ${OCF_RESKEY_policy_file=${OCF_RESKEY_policy_file_default}}
: ${OCF_RESKEY_rmq_feature_health_check=${OCF_RESKEY_rmq_feature_health_check_default}}
: ${OCF_RESKEY_rmq_feature_local_list_queues=${OCF_RESKEY_rmq_feature_local_list_queues_default}}
: ${OCF_RESKEY_limit_nofile=${OCF_RESKEY_limit_nofile_default}}
: ${OCF_RESKEY_avoid_using_iptables=${OCF_RESKEY_avoid_using_iptables_default}}
: ${OCF_RESKEY_allowed_cluster_nodes=${OCF_RESKEY_allowed_cluster_nodes_default}}
#######################################################################
OCF_RESKEY_CRM_meta_timeout_default=30000
: ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}}
OCF_RESKEY_start_time_default=$((OCF_RESKEY_CRM_meta_timeout / 6000 + 2))
: ${OCF_RESKEY_start_time=${OCF_RESKEY_start_time_default}}
OCF_RESKEY_stop_time_default=${OCF_RESKEY_start_time_default}
: ${OCF_RESKEY_stop_time=${OCF_RESKEY_start_time_default}}
OCF_RESKEY_command_timeout_default=""
: ${OCF_RESKEY_command_timeout=${OCF_RESKEY_command_timeout_default}}
TIMEOUT_ARG=$((OCF_RESKEY_CRM_meta_timeout / 6000 + 30))
COMMAND_TIMEOUT="/usr/bin/timeout ${OCF_RESKEY_command_timeout} ${TIMEOUT_ARG}"
RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1`
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an ${OCF_RESKEY_binary} process as an HA resource
The 'start' operation starts the networking service.
The 'stop' operation stops the networking service.
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the networking service is running
The 'monitor' operation reports whether the networking service seems to be working
UEND
}
meta_data() {
# The EXTENDED_OCF_PARAMS parameter below does not exist by default
# and hence converted to an empty string unless overridden. It
# could be used by an extention script to add new parameters. For
# example see https://review.openstack.org/#/c/249180/10
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="rabbitmq-server-ha">
<version>1.0</version>
<longdesc lang="en">
Resource agent for RabbitMQ promotes a node, then cluster nodes can join it
</longdesc>
<shortdesc lang="en">Resource agent for RabbitMQ HA cluster</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
RabbitMQ binary
</longdesc>
<shortdesc lang="en">RabbitMQ binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="ctl" unique="0" required="0">
<longdesc lang="en">
rabbitctl binary
</longdesc>
<shortdesc lang="en">rabbitctl binary binary</shortdesc>
<content type="string" default="${OCF_RESKEY_ctl_default}" />
</parameter>
<parameter name="pid_file" unique="0" required="0">
<longdesc lang="en">
RabbitMQ PID file
</longdesc>
<shortdesc lang="en">RabbitMQ PID file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_file_default}" />
</parameter>
<parameter name="log_dir" unique="0" required="0">
<longdesc lang="en">
RabbitMQ log directory
</longdesc>
<shortdesc lang="en">RabbitMQ log directory</shortdesc>
<content type="string" default="${OCF_RESKEY_log_dir_default}" />
</parameter>
<parameter name="username" unique="0" required="0">
<longdesc lang="en">
RabbitMQ user name
</longdesc>
<shortdesc lang="en">RabbitMQ user name</shortdesc>
<content type="string" default="${OCF_RESKEY_username_default}" />
</parameter>
<parameter name="groupname" unique="0" required="0">
<longdesc lang="en">
RabbitMQ group name
</longdesc>
<shortdesc lang="en">RabbitMQ group name</shortdesc>
<content type="string" default="${OCF_RESKEY_groupname_default}" />
</parameter>
<parameter name="admin_user" unique="0" required="0">
<longdesc lang="en">
RabbitMQ default admin user for API
</longdesc>
<shortdesc lang="en">RabbitMQ admin user</shortdesc>
<content type="string" default="${OCF_RESKEY_admin_user_default}" />
</parameter>
<parameter name="admin_password" unique="0" required="0">
<longdesc lang="en">
RabbitMQ default admin user password for API
</longdesc>
<shortdesc lang="en">RabbitMQ admin password</shortdesc>
<content type="string" default="${OCF_RESKEY_admin_password_default}" />
</parameter>
<parameter name="definitions_dump_file" unique="0" required="0">
<longdesc lang="en">
RabbitMQ default definitions dump file
</longdesc>
<shortdesc lang="en">RabbitMQ definitions dump file</shortdesc>
<content type="string" default="${OCF_RESKEY_definitions_dump_file}" />
</parameter>
<parameter name="command_timeout" unique="0" required="0">
<longdesc lang="en">
Timeout command arguments for issued commands termination (value is auto evaluated)
</longdesc>
<shortdesc lang="en">Arguments for timeout wrapping command</shortdesc>
<content type="string" default="${OCF_RESKEY_command_timeout_default}" />
</parameter>
<parameter name="start_time" unique="0" required="0">
<longdesc lang="en">
Timeout for start rabbitmq server
</longdesc>
<shortdesc lang="en">Timeout for start rabbitmq server</shortdesc>
<content type="string" default="${OCF_RESKEY_start_time_default}" />
</parameter>
<parameter name="stop_time" unique="0" required="0">
<longdesc lang="en">
Timeout for stopping rabbitmq server
</longdesc>
<shortdesc lang="en">Timeout for stopping rabbitmq server</shortdesc>
<content type="string" default="${OCF_RESKEY_stop_time_default}" />
</parameter>
<parameter name="debug" unique="0" required="0">
<longdesc lang="en">
The debug flag for agent (${OCF_RESKEY_binary}) instance.
In the /tmp/ directory will be created rmq-* files for log
some operations and ENV values inside OCF-script.
</longdesc>
<shortdesc lang="en">AMQP server (${OCF_RESKEY_binary}) debug flag</shortdesc>
<content type="boolean" default="${OCF_RESKEY_debug_default}" />
</parameter>
<parameter name="mnesia_base" unique="0" required="0">
<longdesc lang="en">
Base directory for storing Mnesia files
</longdesc>
<shortdesc lang="en">Base directory for storing Mnesia files</shortdesc>
<content type="boolean" default="${OCF_RESKEY_mnesia_base_default}" />
</parameter>
<parameter name="mnesia_schema_base" unique="0" required="0">
<longdesc lang="en">
Parent directory for Mnesia schema directory
</longdesc>
<shortdesc lang="en">Parent directory for Mnesia schema directory</shortdesc>
<content type="string" default="${OCF_RESKEY_mnesia_schema_base_default}" />
</parameter>
<parameter name="host_ip" unique="0" required="0">
<longdesc lang="en">
${OCF_RESKEY_binary} should listen on this IP address
</longdesc>
<shortdesc lang="en">${OCF_RESKEY_binary} should listen on this IP address</shortdesc>
<content type="boolean" default="${OCF_RESKEY_host_ip_default}" />
</parameter>
<parameter name="node_port" unique="0" required="0">
<longdesc lang="en">
${OCF_RESKEY_binary} should listen on this port
</longdesc>
<shortdesc lang="en">${OCF_RESKEY_binary} should listen on this port</shortdesc>
<content type="boolean" default="${OCF_RESKEY_node_port_default}" />
</parameter>
<parameter name="default_vhost" unique="0" required="0">
<longdesc lang="en">
Default virtual host used for monitoring if a node is fully synchronized with
the rest of the cluster. In normal operation, the resource agent will wait for
queues from this virtual host on this node to be synchronized elsewhere before
stopping RabbitMQ. This also means queues in other virtual hosts may not be
fully synchronized on stop operations.
</longdesc>
<shortdesc lang="en">Default virtual host used for waiting for synchronization</shortdesc>
<content type="string" default="${OCF_RESKEY_default_vhost_default}" />
</parameter>
<parameter name="erlang_cookie" unique="0" required="0">
<longdesc lang="en">
Erlang cookie for clustering. If specified, will be updated at the mnesia reset
</longdesc>
<shortdesc lang="en">Erlang cookie</shortdesc>
<content type="boolean" default="${OCF_RESKEY_erlang_cookie_default}" />
</parameter>
<parameter name="erlang_cookie_file" unique="0" required="0">
<longdesc lang="en">
Erlang cookie file path where the cookie will be put, if requested
</longdesc>
<shortdesc lang="en">Erlang cookie file</shortdesc>
<content type="boolean" default="${OCF_RESKEY_erlang_cookie_file_default}" />
</parameter>
<parameter name="use_fqdn" unique="0" required="0">
<longdesc lang="en">
Either to use FQDN or a shortname for the rabbitmq node
</longdesc>
<shortdesc lang="en">Use FQDN</shortdesc>
<content type="boolean" default="${OCF_RESKEY_use_fqdn_default}" />
</parameter>
<parameter name="fqdn_prefix" unique="0" required="0">
<longdesc lang="en">
Optional FQDN prefix for RabbitMQ nodes in cluster.
FQDN prefix can be specified to host multiple RabbitMQ instances on a node or
in case of RabbitMQ running in dedicated network/interface.
</longdesc>
<shortdesc lang="en">FQDN prefix</shortdesc>
<content type="string" default="${OCF_RESKEY_fqdn_prefix_default}" />
</parameter>
<parameter name="max_rabbitmqctl_timeouts" unique="0" required="0">
<longdesc lang="en">
If during monitor call rabbitmqctl times out, the timeout is ignored
unless it is Nth timeout in a row. Here N is the value of the current parameter.
If too many timeouts happen in a raw, the monitor call will return with error.
</longdesc>
<shortdesc lang="en">Fail only if that many rabbitmqctl timeouts in a row occurred</shortdesc>
<content type="string" default="${OCF_RESKEY_max_rabbitmqctl_timeouts_default}" />
</parameter>
<parameter name="policy_file" unique="0" required="0">
<longdesc lang="en">
A path to the shell script to setup RabbitMQ policies
</longdesc>
<shortdesc lang="en">A policy file path</shortdesc>
<content type="string" default="${OCF_RESKEY_policy_file_default}" />
</parameter>
<parameter name="rmq_feature_health_check" unique="0" required="0">
<longdesc lang="en">
Since rabbit 3.6.4 list_queues/list_channels-based monitoring should
be replaced with "node_health_check" command, as it creates no network
load at all.
</longdesc>
<shortdesc lang="en">Use node_health_check for monitoring</shortdesc>
<content type="boolean" default="${OCF_RESKEY_rmq_feature_health_check_default}" />
</parameter>
<parameter name="rmq_feature_local_list_queues" unique="0" required="0">
<longdesc lang="en">
For rabbit version that implements --local flag for list_queues, this
can greatly reduce network overhead in cases when node is
stopped/demoted.
</longdesc>
<shortdesc lang="en">Use --local option for list_queues</shortdesc>
<content type="boolean" default="${OCF_RESKEY_rmq_feature_local_list_queues_default}" />
</parameter>
<parameter name="limit_nofile" unique="0" required="0">
<longdesc lang="en">
Soft and hard limit for NOFILE
</longdesc>
<shortdesc lang="en">NOFILE limit</shortdesc>
<content type="string" default="${OCF_RESKEY_limit_nofile_default}" />
</parameter>
<parameter name="avoid_using_iptables" unique="0" required="0">
<longdesc lang="en">
When set to true the iptables calls to block client access become
noops. This is useful when we run inside containers.
</longdesc>
<shortdesc lang="en">Disable iptables use entirely</shortdesc>
<content type="boolean" default="${OCF_RESKEY_avoid_using_iptables_default}" />
</parameter>
<parameter name="allowed_cluster_nodes" unique="0" required="0">
<longdesc lang="en">
When set to anything other than the empty string it must container the list of
cluster node names, separated by spaces, where the rabbitmq resource is allowed to run.
Tis is needed when rabbitmq is running on a subset of nodes part of a larger
cluster. The default ("") is to assume that all nodes part of the cluster will
run the rabbitmq resource.
</longdesc>
<shortdesc lang="en">List of cluster nodes where rabbitmq is allowed to run</shortdesc>
<content type="string" default="${OCF_RESKEY_allowed_cluster_nodes}" />
</parameter>
$EXTENDED_OCF_PARAMS
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="status" timeout="20s" />
<action name="monitor" depth="0" timeout="30s" interval="5s" />
<action name="monitor" depth="0" timeout="30s" interval="3s" role="Promoted"/>
<action name="promote" timeout="30s" />
<action name="demote" timeout="30s" />
<action name="notify" timeout="20s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
MIN_MASTER_SCORE=100
BEST_MASTER_SCORE=1000
#######################################################################
# Functions invoked by resource manager actions
#TODO(bogdando) move proc_kill, proc_stop to shared OCF functions
# to be shipped with HA cluster packages
###########################################################
# Attempts to kill a process with retries and checks procfs
# to make sure the process is stopped.
#
# Globals:
# LL
# Arguments:
# $1 - pid of the process to try and kill
# $2 - service name used for logging and match-based kill, if the pid is "none"
# $3 - signal to use, defaults to SIGTERM
# $4 - number of retries, defaults to 5
# $5 - time to sleep between retries, defaults to 2
# Returns:
# 0 - if successful
# 1 - if process is still running according to procfs
# 2 - if invalid parameters passed in
###########################################################
proc_kill()
{
local pid="${1}"
local service_name="${2}"
local signal="${3:-SIGTERM}"
local count="${4:-5}"
local process_sleep="${5:-2}"
local LH="${LL} proc_kill():"
local pgrp="$(ps -o pgid= ${pid} 2>/dev/null | tr -d '[[:space:]]')"
if [ "${pid}" ] && [ "${pgrp}" = "1" ] ; then
ocf_log err "${LH} shall not kill by the bad pid 1 (init)!"
return 2
fi
if [ "${pid}" = "none" ]; then
local matched
matched="$(pgrep -fla ${service_name})"
if [ -z "${matched}" ] ; then
ocf_log info "${LH} cannot find any processes matching the ${service_name}, considering target process to be already dead"
return 0
fi
ocf_log debug "${LH} no pid provided, will try the ${service_name}, matched list: ${matched}"
while [ $count -gt 0 ]; do
if [ -z "${matched}" ]; then
break
else
matched="$(pgrep -fla ${service_name})"
ocf_log debug "${LH} Stopping ${service_name} with ${signal}..."
ocf_run pkill -f -"${signal}" "${service_name}"
fi
sleep $process_sleep
count=$(( count-1 ))
done
pgrep -f "${service_name}" > /dev/null
if [ $? -ne 0 ] ; then
ocf_log debug "${LH} Stopped ${service_name} with ${signal}"
return 0
else
ocf_log warn "${LH} Failed to stop ${service_name} with ${signal}"
return 1
fi
else
# pid is not none
while [ $count -gt 0 ]; do
if [ ! -d "/proc/${pid}" ]; then
break
else
ocf_log debug "${LH} Stopping ${service_name} with ${signal}..."
ocf_run pkill -"${signal}" -g "${pgrp}"
fi
sleep $process_sleep
count=$(( count-1 ))
done
# Check if the process ended after the last sleep
if [ ! -d "/proc/${pid}" ] ; then
ocf_log debug "${LH} Stopped ${service_name} with ${signal}"
return 0
fi
ocf_log warn "${LH} Failed to stop ${service_name} with ${signal}"
return 1
fi
}
###########################################################
# Attempts to kill a process with the given pid or pid file
# using proc_kill and will retry with sigkill if sigterm is
# unsuccessful.
#
# Globals:
# OCF_ERR_GENERIC
# OCF_SUCCESS
# LL
# Arguments:
# $1 - pidfile or pid or 'none', if stopping by the name matching
# $2 - service name used for logging or for the failback stopping method
# $3 - stop process timeout (in sec), used to determine how many times we try
# SIGTERM and an upper limit on how long this function should try and
# stop the process. Defaults to 15.
# Returns:
# OCF_SUCCESS - if successful
# OCF_ERR_GENERIC - if process is still running according to procfs
###########################################################
proc_stop()
{
local pid_param="${1}"
local service_name="${2}"
local timeout="${3:-15}"
local LH="${LL} proc_stop():"
local i
local pid
local pidfile
if [ "${pid_param}" = "none" ] ; then
pid="none"
else
# check if provide just a number
- echo "${pid_param}" | egrep -q '^[0-9]+$'
+ echo "${pid_param}" | $EGREP -q '^[0-9]+$'
if [ $? -eq 0 ]; then
pid="${pid_param}"
elif [ -e "${pid_param}" ]; then # check if passed in a pid file
pidfile="${pid_param}"
pid=$(cat "${pidfile}" 2>/dev/null | tr -s " " "\n" | sort -u)
else
ocf_log warn "${LH} pid param ${pid_param} is not a file or a number, try match by ${service_name}"
pid="none"
fi
fi
# number of times to try a SIGTEM is (timeout - 5 seconds) / 2 seconds
local stop_count=$(( ($timeout-5)/2 ))
# make sure we stop at least once
if [ $stop_count -le 0 ]; then
stop_count=1
fi
if [ -z "${pid}" ] ; then
ocf_log warn "${LH} unable to get PID from ${pidfile}, try match by ${service_name}"
pid="none"
fi
if [ -n "${pid}" ]; then
for i in ${pid} ; do
[ "${i}" ] || break
ocf_log info "${LH} Stopping ${service_name} by PID ${i}"
proc_kill "${i}" "${service_name}" SIGTERM $stop_count
if [ $? -ne 0 ]; then
# SIGTERM failed, send a single SIGKILL
proc_kill "${i}" "${service_name}" SIGKILL 1 2
if [ $? -ne 0 ]; then
ocf_log err "${LH} ERROR: could not stop ${service_name}"
return "${OCF_ERR_GENERIC}"
fi
fi
done
fi
# Remove the pid file here which will remove empty pid files as well
if [ -n "${pidfile}" ]; then
rm -f "${pidfile}"
fi
ocf_log info "${LH} Stopped ${service_name}"
return "${OCF_SUCCESS}"
}
# Invokes the given command as a rabbitmq user and wrapped in the
# timeout command.
su_rabbit_cmd() {
local timeout
if [ "$1" = "-t" ]; then
timeout="/usr/bin/timeout ${OCF_RESKEY_command_timeout} $2"
shift 2
else
timeout=$COMMAND_TIMEOUT
fi
local cmd="${1:-status}"
local LH="${LL} su_rabbit_cmd():"
local rc=1
local user=$OCF_RESKEY_username
local mail=/var/spool/mail/rabbitmq
local pwd=/var/lib/rabbitmq
local home=/var/lib/rabbitmq
ocf_log debug "${LH} invoking a command: ${cmd}"
su $user -s /bin/sh -c "USER=${user} MAIL=${mail} PWD=${pwd} HOME=${home} LOGNAME=${user} \
${timeout} ${cmd}"
rc=$?
ocf_log info "${LH} the invoked command exited ${rc}: ${cmd}"
return $rc
}
now() {
date -u +%s
}
set_limits() {
local current_limit=$(su $OCF_RESKEY_username -s /bin/sh -c "ulimit -n")
if [ ! -z $OCF_RESKEY_limit_nofile ] && [ $OCF_RESKEY_limit_nofile -gt $current_limit ] ; then
ulimit -n $OCF_RESKEY_limit_nofile
fi
}
master_score() {
local LH="${LL} master_score():"
local score=$1
if [ -z $score ] ; then
score=0
fi
ocf_log info "${LH} Updating master score attribute with ${score}"
ocf_run crm_master -N $THIS_PCMK_NODE -l reboot -v $score || return $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
# Return either FQDN or shortname, depends on the OCF_RESKEY_use_fqdn.
get_hostname() {
local os=$(uname -s)
if ! ocf_is_true "${OCF_RESKEY_use_fqdn}"; then
if [ "$os" = "SunOS" ]; then
echo "$(hostname | sed 's@\..*@@')"
else
echo "$(hostname -s)"
fi
else
if [ "$os" = "SunOS" ]; then
echo "$(hostname)"
else
echo "$(hostname -f)"
fi
fi
}
# Strip the FQDN to the shortname, if OCF_RESKEY_use_fqdn was set;
# Prepend prefix to the hostname
process_fqdn() {
if ! ocf_is_true "${OCF_RESKEY_use_fqdn}"; then
echo "${OCF_RESKEY_fqdn_prefix}$1" | awk -F. '{print $1}'
else
echo "${OCF_RESKEY_fqdn_prefix}$1"
fi
}
# Return OCF_SUCCESS, if current host is in the list of given hosts.
# Otherwise, return 10
my_host() {
local hostlist="$1"
local hostname
local hn
local rc=10
local LH="${LL} my_host():"
hostname=$(process_fqdn $(get_hostname))
ocf_log debug "${LH} hostlist is: $hostlist"
for host in $hostlist ; do
hn=$(process_fqdn "${host}")
ocf_log debug "${LH} comparing '$hostname' with '$hn'"
if [ "${hostname}" = "${hn}" ] ; then
rc=$OCF_SUCCESS
break
fi
done
return $rc
}
get_integer_node_attr() {
local value
value=$(crm_attribute -N $1 -l reboot --name "$2" --query 2>/dev/null | awk '{ split($3, vals, "="); if (vals[2] != "(null)") print vals[2] }')
if [ $? -ne 0 ] || [ -z "$value" ] ; then
value=0
fi
echo $value
}
get_node_start_time() {
get_integer_node_attr $1 'rabbit-start-time'
}
get_node_master_score() {
get_integer_node_attr $1 "master-${RESOURCE_NAME}"
}
# Return either rabbit node name as FQDN or shortname, depends on the OCF_RESKEY_use_fqdn.
rabbit_node_name() {
echo "rabbit@$(process_fqdn $(ocf_attribute_target $1))"
}
rmq_setup_env() {
local H
local dir
local name
H="$(get_hostname)"
export RABBITMQ_NODENAME=$(rabbit_node_name $H)
if [ "$OCF_RESKEY_node_port" != "$OCF_RESKEY_node_port_default" ]; then
export RABBITMQ_NODE_PORT=$OCF_RESKEY_node_port
fi
export RABBITMQ_PID_FILE=$OCF_RESKEY_pid_file
MNESIA_FILES="${OCF_RESKEY_mnesia_base}/$(rabbit_node_name $H)"
if ! ocf_is_true "${OCF_RESKEY_use_fqdn}"; then
name="-sname"
else
name="-name"
fi
export RABBITMQ_SERVER_START_ARGS="${RABBITMQ_SERVER_START_ARGS} -mnesia dir \"${MNESIA_FILES}\" ${name} $(rabbit_node_name $H)"
RMQ_START_TIME="${MNESIA_FILES}/ocf_server_start_time.txt"
MASTER_FLAG_FILE="${MNESIA_FILES}/ocf_master_for_${OCF_RESOURCE_INSTANCE}"
THIS_PCMK_NODE=$(ocf_attribute_target)
TOTALVMEM=`free -mt | awk '/Total:/ {print $2}'`
# check and make PID file dir
local PID_DIR=$( dirname $OCF_RESKEY_pid_file )
if [ ! -d ${PID_DIR} ] ; then
mkdir -p ${PID_DIR}
chown -R ${OCF_RESKEY_username}:${OCF_RESKEY_groupname} ${PID_DIR}
chmod 755 ${PID_DIR}
fi
# Regardless of whether we just created the directory or it
# already existed, check whether it is writable by the configured
# user
for dir in ${PID_DIR} "${OCF_RESKEY_mnesia_base}" "${OCF_RESKEY_log_dir}"; do
if test -e ${dir}; then
local files
files=$(su -s /bin/sh - $OCF_RESKEY_username -c "find ${dir} ! -writable")
if [ "${files}" ]; then
ocf_log warn "Directory ${dir} is not writable by ${OCF_RESKEY_username}, chowning."
chown -R ${OCF_RESKEY_username}:${OCF_RESKEY_groupname} "${dir}"
fi
fi
done
export LL="${OCF_RESOURCE_INSTANCE}[$$]:"
update_cookie
}
# Return a RabbitMQ node to its virgin state.
# For reset and force_reset to succeed the RabbitMQ application must have been stopped.
# If the app cannot be stopped, beam will be killed and mnesia files will be removed.
reset_mnesia() {
local LH="${LL} reset_mnesia():"
local make_amnesia=false
local rc=$OCF_ERR_GENERIC
# check status of a beam process
get_status
rc=$?
if [ $rc -eq 0 ] ; then
# beam is running
# check status of rabbit app and stop it, if it is running
get_status rabbit
rc=$?
if [ $rc -eq 0 ] ; then
# rabbit app is running, have to stop it
ocf_log info "${LH} Stopping RMQ-app prior to reset the mnesia."
stop_rmq_server_app
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log warn "${LH} RMQ-app can't be stopped."
make_amnesia=true
fi
fi
if ! $make_amnesia ; then
# rabbit app is not running, reset mnesia
ocf_log info "${LH} Execute reset with timeout: ${TIMEOUT_ARG}"
su_rabbit_cmd "${OCF_RESKEY_ctl} reset"
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log info "${LH} Execute force_reset with timeout: ${TIMEOUT_ARG}"
su_rabbit_cmd "${OCF_RESKEY_ctl} force_reset"
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log warn "${LH} Mnesia couldn't cleaned, even by force-reset command."
make_amnesia=true
fi
fi
fi
else
# there is no beam running
make_amnesia=true
ocf_log warn "${LH} There is no Beam process running."
fi
# remove mnesia files, if required
if $make_amnesia ; then
kill_rmq_and_remove_pid
ocf_run rm -rf "${MNESIA_FILES}"
mnesia_schema_location="${OCF_RESKEY_mnesia_schema_base}/Mnesia.$(rabbit_node_name $(get_hostname))"
ocf_run rm -rf "$mnesia_schema_location"
ocf_log warn "${LH} Mnesia files appear corrupted and have been removed from ${MNESIA_FILES} and $mnesia_schema_location"
fi
# always return OCF SUCCESS
return $OCF_SUCCESS
}
block_client_access()
{
# When OCF_RESKEY_avoid_using_iptables is true iptables calls are noops
if ocf_is_true "${OCF_RESKEY_avoid_using_iptables}"; then
return $OCF_SUCCESS
fi
# do not add temporary RMQ blocking rule, if it is already exist
# otherwise, try to add a blocking rule with max of 5 retries
local tries=5
until $(iptables -nvL --wait | grep -q 'temporary RMQ block') || [ $tries -eq 0 ]; do
tries=$((tries-1))
iptables --wait -I INPUT -p tcp -m tcp --dport ${OCF_RESKEY_node_port} -m state --state NEW,RELATED,ESTABLISHED \
-m comment --comment 'temporary RMQ block' -j REJECT --reject-with tcp-reset
sleep 1
done
if [ $tries -eq 0 ]; then
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
}
unblock_client_access()
{
local lhtext="none"
if [ -z $1 ] ; then
lhtext=$1
fi
# When OCF_RESKEY_avoid_using_iptables is true iptables calls are noops
if ocf_is_true "${OCF_RESKEY_avoid_using_iptables}"; then
return
fi
# remove all temporary RMQ blocking rules, if there are more than one exist
for i in $(iptables -nvL --wait --line-numbers | awk '/temporary RMQ block/ {print $1}'); do
iptables --wait -D INPUT -p tcp -m tcp --dport ${OCF_RESKEY_node_port} -m state --state NEW,RELATED,ESTABLISHED \
-m comment --comment 'temporary RMQ block' -j REJECT --reject-with tcp-reset
done
ocf_log info "${lhtext} unblocked access to RMQ port"
}
get_nodes__base(){
local infotype=''
local rc=$OCF_ERR_GENERIC
local c_status
if [ "$1" = 'nodes' ]
then
infotype='db_nodes'
elif [ "$1" = 'running' ]
then
infotype='running_db_nodes'
fi
c_status=`${OCF_RESKEY_ctl} eval "mnesia:system_info(${infotype})." 2>/dev/null`
rc=$?
if [ $rc -ne 0 ] ; then
echo ''
return $OCF_ERR_GENERIC
fi
# translate line like '{running_nodes,['rabbit@node-1','rabbit@node-2','rabbit@node-3']},' to node_list
echo $(echo "${c_status}" | awk -F, '{ for (i=1;i<=NF;i++) { if ($i ~ /@/) { gsub(/[\[\]}{]/,"",$i); print $i; } }}' | tr -d "\'")
return $OCF_SUCCESS
}
get_nodes() {
echo $(get_nodes__base nodes)
return $?
}
get_running_nodes() {
echo $(get_nodes__base running)
return $?
}
# Get alive cluster nodes in visible partition, but the specified one
get_alive_pacemaker_nodes_but()
{
if [ -z "$1" ]; then
tmp_pcmk_node_list=`crm_node -l -p | sed -e '/(null)/d'`
else
tmp_pcmk_node_list=`crm_node -l -p | sed -e "s/${1}//g" | sed -e '/(null)/d'`
fi
# If OCF_RESKEY_allowed_cluster_nodes is set then we only want the intersection
# of the cluster node output and the allowed_cluster_nodes list
if [ -z "${OCF_RESKEY_allowed_cluster_nodes}" ]; then
pcmk_node_list=$tmp_pcmk_node_list
else
pcmk_node_list=`for i in $tmp_pcmk_node_list ${OCF_RESKEY_allowed_cluster_nodes}; do echo $i; done | sort | uniq -d`
fi
echo $pcmk_node_list
}
# Get current master. If a parameter is provided,
# do not check node with that name
get_master_name_but()
{
local node
for node in $(get_alive_pacemaker_nodes_but "$@")
do
ocf_log info "${LH} looking if $node is master"
if is_master $node; then
ocf_log info "${LH} master is $node"
echo $node
break
fi
done
}
# Evals some erlang code on current node
erl_eval() {
local fmt="${1:?}"
shift
$COMMAND_TIMEOUT ${OCF_RESKEY_ctl} eval "$(printf "$fmt" "$@")" 2>/dev/null
}
# Returns 0 if we are clustered with provideded node
is_clustered_with()
{
local LH="${LH}: is_clustered_with: "
local node_name
local rc
node_name=$(rabbit_node_name $1)
local seen_as_running
seen_as_running=$(erl_eval "lists:member('%s', rabbit_mnesia:cluster_nodes(running))." "$node_name")
rc=$?
if [ "$rc" -ne 0 ]; then
ocf_log err "${LH} Failed to check whether '$node_name' is considered running by us"
# We had a transient local error; that doesn't mean the remote node is
# not part of the cluster, so ignore this
elif [ "$seen_as_running" != true ]; then
ocf_log info "${LH} Node $node_name is not running, considering it not clustered with us"
return 1
fi
local seen_as_partitioned
seen_as_partitioned=$(erl_eval "lists:member('%s', rabbit_node_monitor:partitions())." "$node_name")
rc=$?
if [ "$rc" -ne 0 ]; then
ocf_log err "${LH} Failed to check whether '$node_name' is partitioned with us"
# We had a transient local error; that doesn't mean the remote node is
# partitioned with us, so ignore this
elif [ "$seen_as_partitioned" != false ]; then
ocf_log info "${LH} Node $node_name is partitioned from us"
return 1
fi
return $?
}
check_need_join_to() {
local join_to
local node
local running_nodes
local rc=$OCF_ERR_GENERIC
rc=0
join_to=$(rabbit_node_name $1)
running_nodes=$(get_running_nodes)
for node in $running_nodes ; do
if [ "${join_to}" = "${node}" ] ; then
rc=1
break
fi
done
return $rc
}
# Update erlang cookie, if it has been specified
update_cookie() {
local cookie_file_content
if [ "${OCF_RESKEY_erlang_cookie}" != 'false' ] ; then
if [ -f "${OCF_RESKEY_erlang_cookie_file}" ]; then
# First line of cookie file without newline
cookie_file_content=$(head -n1 "${OCF_RESKEY_erlang_cookie_file}" | perl -pe chomp)
fi
# As there is a brief period of time when the file is empty
# (shell redirection has already opened and truncated file,
# and echo hasn't finished its job), we are doing this write
# only when cookie has changed.
if [ "${OCF_RESKEY_erlang_cookie}" != "${cookie_file_content}" ]; then
echo "${OCF_RESKEY_erlang_cookie}" > "${OCF_RESKEY_erlang_cookie_file}"
fi
# And this are idempotent operations, so we don't have to
# check any preconditions for running them.
chown ${OCF_RESKEY_username}:${OCF_RESKEY_groupname} "${OCF_RESKEY_erlang_cookie_file}"
chmod 600 "${OCF_RESKEY_erlang_cookie_file}"
fi
return $OCF_SUCCESS
}
# Stop rmq beam process by pid and by rabbit node name match. Returns SUCCESS/ERROR
kill_rmq_and_remove_pid() {
local LH="${LL} kill_rmq_and_remove_pid():"
# Stop the rabbitmq-server by its pidfile, use the name matching as a fallback,
# and ignore the exit code
proc_stop "${OCF_RESKEY_pid_file}" "beam.*${RABBITMQ_NODENAME}" "${OCF_RESKEY_stop_time}"
# Ensure the beam.smp stopped by the rabbit node name matching as well
proc_stop none "beam.*${RABBITMQ_NODENAME}" "${OCF_RESKEY_stop_time}"
if [ $? -eq 0 ] ; then
return $OCF_SUCCESS
else
return $OCF_ERR_GENERIC
fi
}
trim_var(){
local string="$*"
echo ${string%% }
}
action_validate() {
# todo(sv): validate some incoming parameters
OCF_RESKEY_CRM_meta_notify_post=$(trim_var $OCF_RESKEY_CRM_meta_notify_post)
OCF_RESKEY_CRM_meta_notify_pre=$(trim_var $OCF_RESKEY_CRM_meta_notify_pre)
OCF_RESKEY_CRM_meta_notify_start=$(trim_var $OCF_RESKEY_CRM_meta_notify_start)
OCF_RESKEY_CRM_meta_notify_stop=$(trim_var $OCF_RESKEY_CRM_meta_notify_stop)
OCF_RESKEY_CRM_meta_notify_start_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_start_resource)
OCF_RESKEY_CRM_meta_notify_stop_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_stop_resource)
OCF_RESKEY_CRM_meta_notify_active_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_active_resource)
OCF_RESKEY_CRM_meta_notify_inactive_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_inactive_resource)
OCF_RESKEY_CRM_meta_notify_start_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_start_uname)
OCF_RESKEY_CRM_meta_notify_stop_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_stop_uname)
OCF_RESKEY_CRM_meta_notify_active_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_active_uname)
OCF_RESKEY_CRM_meta_notify_master_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_master_resource)
OCF_RESKEY_CRM_meta_notify_master_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_master_uname)
OCF_RESKEY_CRM_meta_notify_demote_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_demote_resource)
OCF_RESKEY_CRM_meta_notify_demote_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_demote_uname)
OCF_RESKEY_CRM_meta_notify_slave_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_slave_resource)
OCF_RESKEY_CRM_meta_notify_slave_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_slave_uname)
OCF_RESKEY_CRM_meta_notify_promote_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_promote_resource)
OCF_RESKEY_CRM_meta_notify_promote_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_promote_uname)
return $OCF_SUCCESS
}
update_rabbit_start_time_if_rc() {
local nowtime
local rc=$1
if [ $rc -eq 0 ]; then
nowtime="$(now)"
ocf_log info "${LH} Rabbit app started successfully. Updating start time attribute with ${nowtime}"
ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update "${nowtime}"
fi
}
join_to_cluster() {
local node="$1"
local rmq_node
local rc=$OCF_ERR_GENERIC
local LH="${LL} join_to_cluster():"
ocf_log info "${LH} start."
rmq_node=$(rabbit_node_name $node)
ocf_log info "${LH} Joining to cluster by node '${rmq_node}'."
get_status rabbit
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
ocf_log info "${LH} rabbitmq app will be stopped."
stop_rmq_server_app
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log err "${LH} Can't stop rabbitmq app by stop_app command. Stopping."
action_stop
return $OCF_ERR_GENERIC
fi
fi
ocf_log info "${LH} Execute join_cluster with timeout: ${TIMEOUT_ARG}"
su_rabbit_cmd "${OCF_RESKEY_ctl} join_cluster $rmq_node"
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log err "${LH} Can't join to cluster by node '${rmq_node}'. Stopping."
action_stop
return $OCF_ERR_GENERIC
fi
sleep 2
try_to_start_rmq_app
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log err "${LH} Can't start RMQ app after join to cluster. Stopping."
action_stop
return $OCF_ERR_GENERIC
else
update_rabbit_start_time_if_rc 0
ocf_log info "${LH} Joined to cluster succesfully."
fi
ocf_log info "${LH} end."
return $rc
}
unjoin_nodes_from_cluster() {
# node names of the nodes where the pcs resource is being stopped
local nodelist="$1"
local hostname
local nodename
local rc=$OCF_ERR_GENERIC
local rnode
# nodes in rabbit cluster db
local nodes_in_cluster
local LH="${LL} unjoin_nodes_from_cluster():"
nodes_in_cluster=$(get_nodes)
rc=$?
if [ $rc -ne 0 ] ; then
# no nodes in node list, nothing to do
return $OCF_SUCCESS
fi
# unjoin all cluster nodes which are being stopped (i.e. recieved post-stop notify), except *this* node
# before to unjoin the nodes, make sure they were disconnected from *this* node
for hostname in $nodelist ; do
nodename=$(rabbit_node_name $hostname)
if [ "${nodename}" = "${RABBITMQ_NODENAME}" ] ; then
continue
fi
for rnode in $nodes_in_cluster ; do
if [ "${nodename}" = "${rnode}" ] ; then
# disconnect node being unjoined from this node
ocf_run ${OCF_RESKEY_ctl} eval "disconnect_node(list_to_atom(\"${nodename}\"))." 2>&1
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
ocf_log info "${LH} node '${nodename}' disconnected succesfully."
else
ocf_log info "${LH} disconnecting node '${nodename}' failed."
fi
# unjoin node
# when the rabbit node went down, its status
# remains 'running' for a while, so few retries are required
local tries=0
until [ $tries -eq 5 ]; do
tries=$((tries+1))
if is_clustered_with $nodename; then
ocf_log info "${LH} the ${nodename} is alive and cannot be kicked from the cluster yet"
else
break
fi
sleep 10
done
ocf_log info "${LH} Execute forget_cluster_node with timeout: ${TIMEOUT_ARG}"
su_rabbit_cmd "${OCF_RESKEY_ctl} forget_cluster_node ${nodename}"
rc=$?
if [ $rc -eq 0 ] ; then
ocf_log info "${LH} node '${nodename}' unjoined succesfully."
else
ocf_log warn "${LH} unjoining node '${nodename}' failed."
fi
fi
done
done
return $OCF_SUCCESS
}
# Stop RMQ beam server process. Returns SUCCESS/ERROR
stop_server_process() {
local pid
local rc=$OCF_ERR_GENERIC
local LH="${LL} stop_server_process():"
pid=$(cat ${OCF_RESKEY_pid_file})
rc=$?
if [ $rc -ne 0 ] ; then
# Try to stop without known PID
ocf_log err "${LH} RMQ-server process PIDFILE was not found!"
su_rabbit_cmd "${OCF_RESKEY_ctl} stop >> \"${OCF_RESKEY_log_dir}/shutdown_log\" 2>&1"
if [ $? -eq 0 ] ; then
ocf_log info "${LH} RMQ-server process stopped succesfully, although there was no PIDFILE found."
ocf_log info "${LH} grant a graceful termintation window ${OCF_RESKEY_stop_time} to end its beam"
sleep "${OCF_RESKEY_stop_time}"
else
kill_rmq_and_remove_pid
fi
elif [ "${pid}" ] ; then
# Try to stop gracefully by known PID
ocf_log info "${LH} Execute stop with timeout: ${TIMEOUT_ARG}"
su_rabbit_cmd "${OCF_RESKEY_ctl} stop ${OCF_RESKEY_pid_file} >> \"${OCF_RESKEY_log_dir}/shutdown_log\" 2>&1"
[ $? -eq 0 ] && ocf_log info "${LH} RMQ-server process (PID=${pid}) stopped succesfully."
fi
# Ensure there is no beam process and pidfile left
pgrep -f "beam.*${RABBITMQ_NODENAME}" > /dev/null
rc=$?
if [ -f ${OCF_RESKEY_pid_file} ] || [ $rc -eq 0 ] ; then
ocf_log warn "${LH} The pidfile or beam's still exist, forcing the RMQ-server cleanup"
kill_rmq_and_remove_pid
return $?
else
return $OCF_SUCCESS
fi
}
# Stop RMQ-app. Return OCF_SUCCESS, if the app was stopped,
# otherwise return OCF_ERR_GENERIC
stop_rmq_server_app() {
local rc=$OCF_ERR_GENERIC
# if the beam process isn't running, then rabbit app is stopped as well
get_status
rc=$?
if [ $rc -ne 0 ] ; then
return $OCF_SUCCESS
fi
# stop the app
ocf_log info "${LH} Execute stop_app with timeout: ${TIMEOUT_ARG}"
su_rabbit_cmd "${OCF_RESKEY_ctl} stop_app >> \"${OCF_RESKEY_log_dir}/shutdown_log\" 2>&1"
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log err "${LH} RMQ-server app cannot be stopped."
return $OCF_ERR_GENERIC
fi
get_status rabbit
rc=$?
if [ $rc -ne $OCF_SUCCESS ] ; then
ocf_log info "${LH} RMQ-server app stopped succesfully."
rc=$OCF_SUCCESS
else
ocf_log err "${LH} RMQ-server app cannot be stopped."
rc=$OCF_ERR_GENERIC
fi
return $rc
}
start_beam_process() {
local command
local rc=$OCF_ERR_GENERIC
local ts_end
local pf_end
local pid
local LH="${LL} start_beam_process():"
# remove old PID-file if it exists
if [ -f "${OCF_RESKEY_pid_file}" ] ; then
ocf_log warn "${LH} found old PID-file '${OCF_RESKEY_pid_file}'."
pid=$(cat ${OCF_RESKEY_pid_file})
if [ "${pid}" ] && [ -d "/proc/${pid}" ] ; then
ocf_run cat /proc/${pid}/cmdline | grep -c 'bin/beam' > /dev/null 2>&1
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
ocf_log warn "${LH} found beam process with PID=${pid}, killing...'."
ocf_run kill -TERM $pid
else
ocf_log err "${LH} found unknown process with PID=${pid} from '${OCF_RESKEY_pid_file}'."
return $OCF_ERR_GENERIC
fi
fi
ocf_run rm -f $OCF_RESKEY_pid_file
fi
[ -f /etc/default/rabbitmq-server ] && . /etc/default/rabbitmq-server
# RabbitMQ requires high soft and hard limits for NOFILE
set_limits
# run beam process
command="${OCF_RESKEY_binary} >> \"${OCF_RESKEY_log_dir}/startup_log\" 2>/dev/null"
RABBITMQ_NODE_ONLY=1 su rabbitmq -s /bin/sh -c "${command}"&
ts_end=$(( $(now) + ${OCF_RESKEY_start_time} ))
sleep 3 # give it some time, before attempting to start_app
# PID-file is now created later, if the application started successfully
# So assume beam.smp is started, and defer errors handling for start_app
return $OCF_SUCCESS
}
check_plugins() {
# Check if it's safe to load plugins and if we need to do so. Logic is:
# if (EnabledPlugins > 0) and (ActivePlugins == 0) ; then it's safe to load
# If we have at least one active plugin, then it's not safe to re-load them
# because plugins:setup() would remove existing dependency plugins in plugins_expand_dir.
${OCF_RESKEY_ctl} eval '{ok, EnabledFile} = application:get_env(rabbit, enabled_plugins_file), EnabledPlugins = rabbit_plugins:read_enabled(EnabledFile), ActivePlugins = rabbit_plugins:active(), if length(EnabledPlugins)>0 -> if length(ActivePlugins)==0 -> erlang:error("need_to_load_plugins"); true -> false end; true -> false end.'
return $?
}
load_plugins() {
check_plugins
local rc=$?
if [ $rc -eq 0 ] ; then
return 0
else
${OCF_RESKEY_ctl} eval 'ToBeLoaded = rabbit_plugins:setup(), ok = app_utils:load_applications(ToBeLoaded), StartupApps = app_utils:app_dependency_order(ToBeLoaded,false), app_utils:start_applications(StartupApps).'
return $?
fi
}
list_active_plugins() {
local list
list=`${OCF_RESKEY_ctl} eval 'rabbit_plugins:active().' 2>/dev/null`
echo "${list}"
}
try_to_start_rmq_app() {
local startup_log="${1:-${OCF_RESKEY_log_dir}/startup_log}"
local rc=$OCF_ERR_GENERIC
local LH="${LL} try_to_start_rmq_app():"
get_status
rc=$?
if [ $rc -ne $OCF_SUCCESS ] ; then
ocf_log info "${LH} RMQ-runtime (beam) not started, starting..."
start_beam_process
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
ocf_log err "${LH} Failed to start beam - returning from the function"
return $OCF_ERR_GENERIC
fi
fi
if [ -z "${startup_log}" ] ; then
startup_log="${OCF_RESKEY_log_dir}/startup_log"
fi
ocf_log info "${LH} begin."
ocf_log info "${LH} Execute start_app with timeout: ${TIMEOUT_ARG}"
su_rabbit_cmd "${OCF_RESKEY_ctl} start_app >>${startup_log} 2>&1"
rc=$?
if [ $rc -eq 0 ] ; then
ocf_log info "${LH} start_app was successful."
ocf_log info "${LH} waiting for start to finish with timeout: ${TIMEOUT_ARG}"
su_rabbit_cmd "${OCF_RESKEY_ctl} wait ${OCF_RESKEY_pid_file}"
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log err "${LH} RMQ-server app failed to wait for start."
return $OCF_ERR_GENERIC
fi
rc=$OCF_SUCCESS
# Loading enabled modules
ocf_log info "${LH} start plugins."
load_plugins
local mrc=$?
if [ $mrc -eq 0 ] ; then
local mlist
mlist=`list_active_plugins`
ocf_log info "${LH} Starting plugins: ${mlist}"
else
ocf_log info "${LH} Starting plugins: failed."
fi
else
ocf_log info "${LH} start_app failed."
rc=$OCF_ERR_GENERIC
fi
return $rc
}
start_rmq_server_app() {
local rc=$OCF_ERR_GENERIC
local startup_log="${OCF_RESKEY_log_dir}/startup_log"
local startup_output
local LH="${LL} start_rmq_server_app():"
local a
#We are performing initial start check.
#We are not ready to provide service.
#Clients should not have access.
ocf_log info "${LH} begin."
# Safe-unblock the rules, if there are any
unblock_client_access "${LH}"
# Apply the blocking rule
block_client_access
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "${LH} blocked access to RMQ port"
else
ocf_log err "${LH} cannot block access to RMQ port!"
return $OCF_ERR_GENERIC
fi
get_status
rc=$?
if [ $rc -ne $OCF_SUCCESS ] ; then
ocf_log info "${LH} RMQ-runtime (beam) not started, starting..."
start_beam_process
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
unblock_client_access "${LH}"
return $OCF_ERR_GENERIC
fi
fi
ocf_log info "${LH} RMQ-server app not started, starting..."
try_to_start_rmq_app "$startup_log"
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
# rabbitmq-server started successfuly as master of cluster
master_score $MIN_MASTER_SCORE
stop_rmq_server_app
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log err "${LH} RMQ-server app can't be stopped. Beam will be killed."
kill_rmq_and_remove_pid
unblock_client_access "${LH}"
return $OCF_ERR_GENERIC
fi
else
# error at start RMQ-server
ocf_log warn "${LH} RMQ-server app can't start without Mnesia cleaning."
for a in $(seq 1 10) ; do
rc=$OCF_ERR_GENERIC
reset_mnesia || break
try_to_start_rmq_app "$startup_log"
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
stop_rmq_server_app
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "${LH} RMQ-server app Mnesia cleaned successfully."
rc=$OCF_SUCCESS
master_score $MIN_MASTER_SCORE
break
else
ocf_log err "${LH} RMQ-server app can't be stopped during Mnesia cleaning. Beam will be killed."
kill_rmq_and_remove_pid
unblock_client_access "${LH}"
return $OCF_ERR_GENERIC
fi
fi
done
fi
if [ $rc -eq $OCF_ERR_GENERIC ] ; then
ocf_log err "${LH} RMQ-server can't be started while many tries. Beam will be killed."
kill_rmq_and_remove_pid
fi
ocf_log info "${LH} end."
unblock_client_access "${LH}"
return $rc
}
# check status of rabbit beam process or a rabbit app, if rabbit arg specified
# by default, test if the kernel app is running, otherwise consider it is "not running"
get_status() {
local what="${1:-kernel}"
local rc=$OCF_NOT_RUNNING
local LH="${LL} get_status():"
local body
local beam_running
body=$( ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} eval 'rabbit_misc:which_applications().' 2>&1 )
rc=$?
pgrep -f "beam.*${RABBITMQ_NODENAME}" > /dev/null
beam_running=$?
# report not running only if the which_applications() reported an error AND the beam is not running
if [ $rc -ne 0 ] && [ $beam_running -ne 0 ] ; then
ocf_log info "${LH} failed with code ${rc}. Command output: ${body}"
return $OCF_NOT_RUNNING
# return a generic error, if there were errors and beam is found running
elif [ $rc -ne 0 ] ; then
ocf_log info "${LH} found the beam process running but failed with code ${rc}. Command output: ${body}"
return $OCF_ERR_GENERIC
fi
# try to parse the which_applications() output only if it exited w/o errors
if [ "${what}" ] && [ $rc -eq 0 ] ; then
rc=$OCF_NOT_RUNNING
echo "$body" | grep "\{${what}," > /dev/null 2>&1 && rc=$OCF_SUCCESS
if [ $rc -ne $OCF_SUCCESS ] ; then
ocf_log info "${LH} app ${what} was not found in command output: ${body}"
fi
fi
[ $rc -ne $OCF_SUCCESS ] && rc=$OCF_NOT_RUNNING
return $rc
}
action_status() {
local rc=$OCF_ERR_GENERIC
get_status
rc=$?
return $rc
}
# return 0, if given node has a master attribute in CIB,
# otherwise, return 1
is_master() {
local result
result=`crm_attribute -N "${1}" -l reboot --name 'rabbit-master' --query 2>/dev/null |\
awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'`
if [ "${result}" != 'true' ] ; then
return 1
fi
return 0
}
# Verify if su_rabbit_cmd exited by timeout by checking its return code.
# If it did not, return 0. If it did AND it is
# $OCF_RESKEY_max_rabbitmqctl_timeouts'th timeout in a row,
# return 2 to signal get_monitor that it should
# exit with error. Otherwise return 1 to signal that there was a timeout,
# but it should be ignored. Timeouts for different operations are tracked
# separately. The second argument is used to distingush them.
check_timeouts() {
local op_rc=$1
local timeouts_attr_name=$2
local op_name=$3
# 75 is EX_TEMPFAIL from sysexits, and is used by rabbitmqctl to signal about
# timeout.
if [ $op_rc -ne 124 ] && [ $op_rc -ne 137 ] && [ $op_rc -ne 75 ]; then
ocf_update_private_attr $timeouts_attr_name 0
return 0
fi
local count
count=$(ocf_get_private_attr $timeouts_attr_name 0)
count=$((count+1))
# There is a slight chance that this piece of code will be executed twice simultaneously.
# As a result, $timeouts_attr_name's value will be one less than it should be. But we don't need
# precise calculation here.
ocf_update_private_attr $timeouts_attr_name $count
if [ $count -lt $OCF_RESKEY_max_rabbitmqctl_timeouts ]; then
ocf_log warn "${LH} 'rabbitmqctl $op_name' timed out $count of max. $OCF_RESKEY_max_rabbitmqctl_timeouts time(s) in a row. Doing nothing for now."
return 1
else
ocf_log err "${LH} 'rabbitmqctl $op_name' timed out $count of max. $OCF_RESKEY_max_rabbitmqctl_timeouts time(s) in a row and is not responding. The resource is failed."
return 2
fi
}
wait_sync() {
local wait_time=$1
local queues
local opt_arg=""
if ocf_is_true "$OCF_RESKEY_rmq_feature_local_list_queues"; then
opt_arg="--local"
fi
queues="${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} -p ${OCF_RESKEY_default_vhost} list_queues $opt_arg name state"
su_rabbit_cmd -t "${wait_time}" "sh -c \"while ${queues} | grep -q 'syncing,'; \
do sleep 2; done\""
return $?
}
get_monitor() {
local rc=$OCF_ERR_GENERIC
local LH="${LL} get_monitor():"
local status_master=1
local rabbit_running
local name
local node
local node_start_time
local nowtime
local partitions_report
local node_partitions
ocf_log info "${LH} CHECK LEVEL IS: ${OCF_CHECK_LEVEL}"
get_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ] ; then
ocf_log info "${LH} get_status() returns ${rc}."
ocf_log info "${LH} ensuring this slave does not get promoted."
master_score 0
return $OCF_NOT_RUNNING
elif [ $rc -eq $OCF_SUCCESS ] ; then
ocf_log info "${LH} get_status() returns ${rc}."
ocf_log info "${LH} also checking if we are master."
get_status rabbit
rabbit_running=$?
is_master $THIS_PCMK_NODE
status_master=$?
ocf_log info "${LH} master attribute is ${status_master}"
if [ $status_master -eq 0 ] && [ $rabbit_running -eq $OCF_SUCCESS ]
then
ocf_log info "${LH} We are the running master"
rc=$OCF_RUNNING_MASTER
elif [ $status_master -eq 0 ] && [ $rabbit_running -ne $OCF_SUCCESS ] ; then
ocf_log err "${LH} We are the master and RMQ-runtime (beam) is not running. this is a failure"
exit $OCF_FAILED_MASTER
fi
fi
get_status rabbit
rabbit_running=$?
ocf_log info "${LH} checking if rabbit app is running"
if [ $rc -eq $OCF_RUNNING_MASTER ]; then
if [ $rabbit_running -eq $OCF_SUCCESS ]; then
ocf_log info "${LH} rabbit app is running and is master of cluster"
else
ocf_log err "${LH} we are the master and rabbit app is not running. This is a failure"
exit $OCF_FAILED_MASTER
fi
else
start_time=$((180 + $(ocf_get_private_attr 'rabbit-start-phase-1-time' 0)))
restart_order_time=$((60 + $(ocf_get_private_attr 'rabbit-ordered-to-restart' 0)))
nowtime=$(now)
# If we started more than 3 minutes ago, and
# we got order to restart less than 1 minute ago
if [ $nowtime -lt $restart_order_time ]; then
if [ $nowtime -gt $start_time ]; then
ocf_log err "${LH} failing because we have received an order to restart from the master"
stop_server_process
rc=$OCF_ERR_GENERIC
else
ocf_log warn "${LH} received an order to restart from the master, ignoring it because we have just started"
fi
fi
fi
if [ $rc -eq $OCF_ERR_GENERIC ]; then
ocf_log err "${LH} get_status() returns generic error ${rc}"
ocf_log info "${LH} ensuring this slave does not get promoted."
master_score 0
return $OCF_ERR_GENERIC
fi
# Recounting our master score
ocf_log info "${LH} preparing to update master score for node"
local our_start_time
local new_score
local node_start_time
local node_score
our_start_time=$(get_node_start_time $THIS_PCMK_NODE)
if [ $our_start_time -eq 0 ]; then
new_score=$MIN_MASTER_SCORE
else
new_score=$BEST_MASTER_SCORE
for node in $(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE)
do
node_start_time=$(get_node_start_time $node)
node_score=$(get_node_master_score $node)
ocf_log info "${LH} comparing us (start time: $our_start_time, score: $new_score) with $node (start time: $node_start_time, score: $node_score)"
if [ $node_start_time -ne 0 ] && [ $node_score -ne 0 ] && [ $node_start_time -lt $our_start_time ]; then
new_score=$((node_score - 10 < new_score ? node_score - 10 : new_score ))
elif [ $node_start_time -ne 0 ] && [ $node_score -ne 0 ] && [ $node_start_time -eq $our_start_time ]; then
# Do not get promoted if the other node is already master and we have the same start time
if is_master $node; then
new_score=$((node_score - 10 < new_score ? node_score - 10 : new_score ))
fi
fi
done
fi
if [ "$new_score" -ne "$(get_node_master_score $THIS_PCMK_NODE)" ]; then
master_score $new_score
fi
ocf_log info "${LH} our start time is $our_start_time and score is $new_score"
# Skip all other checks if rabbit app is not running
if [ $rabbit_running -ne $OCF_SUCCESS ]; then
ocf_log info "${LH} RabbitMQ is not running, get_monitor function ready to return ${rc}"
return $rc
fi
# rc can be SUCCESS or RUNNING_MASTER, don't touch it unless there
# is some error uncovered by node_health_check
if ! node_health_check; then
rc=$OCF_ERR_GENERIC
fi
if [ $rc -eq $OCF_RUNNING_MASTER ] ; then
# If we are the master and healthy, perform various
# connectivity checks for other nodes in the cluster.
# Order a member to restart if something fishy happens with it.
# All cross-node checks MUST happen only here.
partitions_report="$(partitions_report)"
for node in $(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE); do
# Restart node if we don't consider ourselves clustered with it
if ! is_clustered_with $node; then
ocf_log warn "${LH} node $node is not connected with us"
order_node_restart "$node"
continue
fi
# Restart node if it has any unresolved partitions
node_partitions=$(grep_partitions_report $node "$partitions_report")
if [ ! -z "$node_partitions" ]; then
ocf_log warn "${LH} Node $node thinks that it is partitoned with $node_partitions"
order_node_restart "$node"
continue
fi
done
fi
ocf_log info "${LH} get_monitor function ready to return ${rc}"
return $rc
}
order_node_restart() {
local node=${1:?}
ocf_log warn "${LH} Ordering node '$node' to restart"
ocf_update_private_attr 'rabbit-ordered-to-restart' "$(now)" "$node"
}
# Checks whether node is mentioned somewhere in report returned by
# partitions_report()
grep_partitions_report() {
local node="${1:?}"
local report="${2:?}"
local rabbit_node
rabbit_node=$(rabbit_node_name "$node")
echo "$report" | grep "PARTITIONED $rabbit_node:" | sed -e 's/^[^:]\+: //'
}
# Report partitions (if any) from viewpoint of every running node in cluster.
# It is parseable/grepable version of `rabbitmqctl cluster_status`.
#
# If node sees partition, report will contain the line like:
# PARTITIONED node-name: list-of-nodes, which-node-name-considers, itself-partitioned-with
partitions_report() {
$COMMAND_TIMEOUT xargs -0 ${OCF_RESKEY_ctl} eval <<EOF
RpcTimeout = 10,
Nodes = rabbit_mnesia:cluster_nodes(running),
{Replies, _BadNodes} = gen_server:multi_call(Nodes, rabbit_node_monitor, partitions, RpcTimeout * 1000),
lists:foreach(fun ({_, []}) -> ok;
({Node, Partitions}) ->
PartitionsStr = string:join([atom_to_list(Part) || Part <- Partitions],
", "),
io:format("PARTITIONED ~s: ~s~n",
[Node, PartitionsStr])
end, Replies),
ok.
EOF
}
# Check if the rabbitmqctl control plane is alive.
node_health_check() {
local rc
if [ "$OCF_RESKEY_rmq_feature_health_check" = true ]; then
node_health_check_local
rc=$?
else
node_health_check_legacy
rc=$?
fi
return $rc
}
node_health_check_local() {
local LH="${LH} node_health_check_local():"
local rc
local rc_timeouts
# Give node_health_check some time to handle timeout by itself.
# By using internal rabbitmqctl timeouts, we allow it to print
# more useful diagnostics
local timeout=$((TIMEOUT_ARG - 2))
su_rabbit_cmd "${OCF_RESKEY_ctl} node_health_check -t $timeout"
rc=$?
check_timeouts $rc "rabbit_node_health_check_timeouts" "node_health_check"
rc_timeouts=$?
if [ "$rc_timeouts" -eq 2 ]; then
master_score 0
ocf_log info "${LH} node_health_check timed out, retry limit reached"
return $OCF_ERR_GENERIC
elif [ "$rc_timeouts" -eq 1 ]; then
ocf_log info "${LH} node_health_check timed out, going to retry"
return $OCF_SUCCESS
fi
if [ "$rc" -ne 0 ]; then
ocf_log err "${LH} rabbitmqctl node_health_check exited with errors."
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
}
node_health_check_legacy() {
local rc_alive
local timeout_alive
su_rabbit_cmd "${OCF_RESKEY_ctl} list_channels > /dev/null 2>&1"
rc_alive=$?
{ [ $rc_alive -eq 137 ] || [ $rc_alive -eq 124 ] ; } && ocf_log err "${LH} 'rabbitmqctl list_channels' timed out, per-node explanation: $(enhanced_list_channels)"
check_timeouts $rc_alive "rabbit_list_channels_timeouts" "list_channels"
timeout_alive=$?
if [ $timeout_alive -eq 2 ]; then
master_score 0
return $OCF_ERR_GENERIC
elif [ $timeout_alive -eq 0 ]; then
if [ $rc_alive -ne 0 ]; then
ocf_log err "${LH} rabbitmqctl list_channels exited with errors."
rc=$OCF_ERR_GENERIC
fi
fi
# Check for memory alarms for this Master or Slave node.
# If alert found, reset the alarm
# and restart the resource as it likely means a dead end situation
# when rabbitmq cluster is running with blocked publishing due
# to high memory watermark exceeded.
local alarms
local rc_alarms
local timeout_alarms
alarms=`su_rabbit_cmd "${OCF_RESKEY_ctl} -q eval 'rabbit_alarm:get_alarms().'" 2>/dev/null`
rc_alarms=$?
check_timeouts $rc_alarms "rabbit_get_alarms_timeouts" "get_alarms"
timeout_alarms=$?
if [ $timeout_alarms -eq 2 ]; then
master_score 0
return $OCF_ERR_GENERIC
elif [ $timeout_alarms -eq 0 ]; then
if [ $rc_alarms -ne 0 ]; then
ocf_log err "${LH} rabbitmqctl get_alarms exited with errors."
rc=$OCF_ERR_GENERIC
elif [ -n "${alarms}" ]; then
for node in ${alarms}; do
name=`echo ${node} | perl -n -e "m/memory,'(?<n>\S+)+'/ && print \"$+{n}\n\""`
if [ "${name}" = "${RABBITMQ_NODENAME}" ] ; then
ocf_log err "${LH} Found raised memory alarm. Erasing the alarm and restarting."
su_rabbit_cmd "${OCF_RESKEY_ctl} set_vm_memory_high_watermark 10 > /dev/null 2>&1"
rc=$OCF_ERR_GENERIC
break
fi
done
fi
fi
if ! is_cluster_status_ok ; then
rc=$OCF_ERR_GENERIC
fi
# Check if the list of all queues is available,
# Also report some queues stats and total virtual memory.
local queues
local rc_queues
local timeout_queues
queues=`su_rabbit_cmd "${OCF_RESKEY_ctl} -q -p ${OCF_RESKEY_default_vhost} list_queues memory messages consumer_utilisation"`
rc_queues=$?
check_timeouts $rc_queues "rabbit_list_queues_timeouts" "list_queues"
timeout_queues=$?
if [ $timeout_queues -eq 2 ]; then
master_score 0
return $OCF_ERR_GENERIC
elif [ $timeout_queues -eq 0 ]; then
if [ $rc_queues -ne 0 ]; then
ocf_log err "${LH} rabbitmqctl list_queues exited with errors."
rc=$OCF_ERR_GENERIC
elif [ -n "${queues}" ]; then
local q_c
q_c=`printf %b "${queues}\n" | wc -l`
local mem
mem=`printf %b "${queues}\n" | awk -v sum=0 '{sum+=$1} END {print (sum/1048576)}'`
local mes
mes=`printf %b "${queues}\n" | awk -v sum=0 '{sum+=$2} END {print sum}'`
local c_u
c_u=`printf %b "${queues}\n" | awk -v sum=0 -v cnt=${q_c} '{sum+=$3} END {print (sum+1)/(cnt+1)}'`
local status
status=`echo $(su_rabbit_cmd "${OCF_RESKEY_ctl} -q status")`
ocf_log info "${LH} RabbitMQ is running ${q_c} queues consuming ${mem}m of ${TOTALVMEM}m total, with ${mes} queued messages, average consumer utilization ${c_u}"
ocf_log info "${LH} RabbitMQ status: ${status}"
fi
fi
return $rc
}
ocf_get_private_attr() {
local attr_name="${1:?}"
local attr_default_value="${2:?}"
local nodename="${3:-$THIS_PCMK_NODE}"
local count
count=$(attrd_updater -p --name "$attr_name" --node "$nodename" --query)
if [ $? -ne 0 ]; then
echo $attr_default_value
else
echo "$count" | awk -vdef_val="$attr_default_value" '{ gsub(/"/, "", $3); split($3, vals, "="); if (vals[2] != "") print vals[2]; else print def_val }'
fi
}
ocf_update_private_attr() {
local attr_name="${1:?}"
local attr_value="${2:?}"
local nodename="${3:-$THIS_PCMK_NODE}"
ocf_run attrd_updater -p --name "$attr_name" --node "$nodename" --update "$attr_value"
}
rabbitmqctl_with_timeout_check() {
local command="${1:?}"
local timeout_attr_name="${2:?}"
su_rabbit_cmd "${OCF_RESKEY_ctl} $command"
local rc=$?
check_timeouts $rc $timeout_attr_name "$command"
local has_timed_out=$?
case "$has_timed_out" in
0)
return $rc;;
1)
return 0;;
2)
return 1;;
esac
}
is_cluster_status_ok() {
local LH="${LH}: is_cluster_status_ok:"
rabbitmqctl_with_timeout_check cluster_status rabbit_cluster_status_timeouts > /dev/null 2>&1
}
action_monitor() {
local rc=$OCF_ERR_GENERIC
local LH="${LL} monitor:"
ocf_log debug "${LH} action start."
if ocf_is_true "${OCF_RESKEY_debug}"; then
d=`date '+%Y%m%d %H:%M:%S'`
echo $d >> /tmp/rmq-monitor.log
env >> /tmp/rmq-monitor.log
echo "$d [monitor] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
get_monitor
rc=$?
ocf_log debug "${LH} role: ${OCF_RESKEY_CRM_meta_role}"
ocf_log debug "${LH} result: $rc"
ocf_log debug "${LH} action end."
return $rc
}
action_start() {
local rc=$OCF_ERR_GENERIC
local LH="${LL} start:"
local nowtime
if ocf_is_true "${OCF_RESKEY_debug}"; then
d=`date '+%Y%m%d %H:%M:%S'`
echo $d >> /tmp/rmq-start.log
env >> /tmp/rmq-start.log
echo "$d [start] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
ocf_log info "${LH} action begin."
get_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
ocf_log warn "${LH} RMQ-runtime (beam) already started."
return $OCF_SUCCESS
fi
local attrs_to_zero="rabbit_list_channels_timeouts rabbit_get_alarms_timeouts rabbit_list_queues_timeouts rabbit_cluster_status_timeouts rabbit_node_health_check_timeouts"
local attr_name_to_reset
for attr_name_to_reset in $attrs_to_zero; do
ocf_update_private_attr $attr_name_to_reset 0
done
nowtime=$(now)
ocf_log info "${LH} Setting phase 1 one start time to $nowtime"
ocf_update_private_attr 'rabbit-start-phase-1-time' "$nowtime"
ocf_log info "${LH} Deleting start time attribute"
ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
ocf_log info "${LH} Deleting master attribute"
ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
ocf_log info "${LH} RMQ going to start."
start_rmq_server_app
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
ocf_log info "${LH} RMQ prepared for start succesfully."
fi
ocf_log info "${LH} action end."
return $rc
}
action_stop() {
local rc=$OCF_ERR_GENERIC
local LH="${LL} stop:"
if ocf_is_true "${OCF_RESKEY_debug}"; then
d=$(date '+%Y%m%d %H:%M:%S')
echo $d >> /tmp/rmq-stop.log
env >> /tmp/rmq-stop.log
echo "$d [stop] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
ocf_log info "${LH} action begin."
ocf_log info "${LH} Deleting master attribute"
ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
master_score 0
ocf_log info "${LH} Deleting start time attribute"
ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
# Wait for synced state first
ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
wait_sync $((OCF_RESKEY_stop_time/2))
ocf_log info "${LH} RMQ-runtime (beam) going to down."
stop_server_process
if [ $? -ne $OCF_SUCCESS ] ; then
ocf_log err "RMQ-runtime (beam) couldn't be stopped and will likely became unmanaged. Take care of it manually!"
ocf_log info "${LH} action end."
exit $OCF_ERR_GENERIC
fi
ocf_log info "${LH} RMQ-runtime (beam) not running."
ocf_log info "${LH} action end."
return $OCF_SUCCESS
}
#######################################################################
# Enhanced list_channels:
# - nodes are processed in parallel
# - report contains information about which nodes timed out
#
# 'list_channels' is used as a healh-check for current node, but it
# actually checks overall health of all node in cluster. And there were
# some bugs where only one (non-local) channel became stuck, but OCF
# script was wrongfully killing local node.
#
# Hopefully all such bugs are fixed, but if not - it will allow to
# detect such conditions.
#
# Somewhat strange implementation is due to the following reasons:
# - ability to support older versions of RabbitMQ which have reached
# end-of-life with single version of the script
# - zero dependencies - for older versions this functionality could be
# implemented as a plugin, but it'll require this plugin installation
enhanced_list_channels() {
# One second less than timeout of su_rabbit_cmd
local timeout=$((${TIMEOUT_ARG:-5} - 1))
su_rabbit_cmd "xargs -0 ${OCF_RESKEY_ctl} eval" <<EOF
SecondsToCompletion = $timeout,
%% Milliseconds since unix epoch
Now = fun() ->
{Mega, Secs, Micro} = os:timestamp(),
Mili = Micro div 1000,
Mili + 1000 * (Secs + 1000000 * Mega)
end,
%% We shouldn't continue execution past this time
ShouldEndAt = Now() + SecondsToCompletion * 1000,
%% How many milliseconds we still have
Timeout = fun() ->
case ShouldEndAt - Now() of
Past when Past =< 0 ->
0;
Timeout ->
Timeout
end
end,
%% Lambda combinator - for defining anonymous recursive functions
Y = fun(F) ->
(fun (X) -> F(fun(Y) -> (X(X))(Y) end) end)(
fun (X) -> F(fun(Y) -> (X(X))(Y) end) end)
end,
Parent = self(),
ListChannels = Y(fun(Rec) ->
fun (({Node, [], OkChannelsCount})) ->
Parent ! {Node, ok, OkChannelsCount};
({Node, [Chan|Rest], OkChannelsCount}) ->
case catch rpc:call(Node, rabbit_channel, info, [Chan], Timeout()) of
Infos when is_list(Infos) ->
Rec({Node, Rest, OkChannelsCount + 1});
{badrpc, {'EXIT', {noproc, _}}} ->
%% Channel became dead before we could request it's status, don't care
Rec({Node, Rest, OkChannelsCount});
Err ->
Parent ! {Node, Err, OkChannelsCount}
end
end
end),
SingleNodeListing = fun(Node) ->
case catch rpc:call(Node, pg_local, get_members, [rabbit_channels], Timeout()) of
LocalChannels when is_list(LocalChannels) ->
ListChannels({Node, LocalChannels, 0});
Err ->
Parent ! {Node, Err, 0}
end
end,
AllNodes = rabbit_mnesia:cluster_nodes(running),
[ spawn(fun() -> SingleNodeListing(Node) end) || Node <- AllNodes ],
WaitForNodes = Y(fun(Rec) ->
fun ({[], Acc}) ->
Acc;
({RemainingNodes, Acc}) ->
receive
{Node, _Status, _ChannelCount} = Smth ->
RemainingNodes1 = lists:delete(Node, RemainingNodes),
Rec({RemainingNodes1, [Smth|Acc]})
after Timeout() + 100 ->
Acc
end
end
end),
Result = WaitForNodes({AllNodes, []}),
ExpandedResult = [ case lists:keysearch(Node, 1, Result) of
{value, NodeResult} ->
NodeResult;
false ->
{Node, no_data_collected, 0}
end || Node <- AllNodes ],
ExpandedResult.
EOF
}
#######################################################################
# Join the cluster and return OCF_SUCCESS, if joined.
# Return 10, if node is trying to join to itself or empty destination.
# Return OCF_ERR_GENERIC, if cannot join.
jjj_join () {
local join_to="$1"
local rc=$OCF_ERR_GENERIC
local LH="${LL} jjj_join:"
my_host ${join_to}
rc=$?
ocf_log debug "${LH} node='${join_to}' rc='${rc}'"
# Check whether we are joining to ourselves
# or master host is not given
if [ $rc -ne 0 ] && [ "${join_to}" ] ; then
ocf_log info "${LH} Joining to cluster by node '${join_to}'"
join_to_cluster "${join_to}"
rc=$?
if [ $rc -ne $OCF_SUCCESS ] ; then
ocf_log err "${LH} Failed to join the cluster. The mnesia will be reset."
reset_mnesia
rc=$OCF_ERR_GENERIC
fi
fi
return $rc
}
action_notify() {
local rc_join=$OCF_SUCCESS
local rc=$OCF_ERR_GENERIC
local rc2=$OCF_ERR_GENERIC
local LH="${LL} notify:"
local nodelist
if ocf_is_true "${OCF_RESKEY_debug}"; then
d=`date '+%Y%m%d %H:%M:%S'`
echo $d >> /tmp/rmq-notify.log
env >> /tmp/rmq-notify.log
echo "$d [notify] ${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation} promote='${OCF_RESKEY_CRM_meta_notify_promote_uname}' demote='${OCF_RESKEY_CRM_meta_notify_demote_uname}' master='${OCF_RESKEY_CRM_meta_notify_master_uname}' slave='${OCF_RESKEY_CRM_meta_notify_slave_uname}' start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'post' ] ; then
# POST- anything notify section
case "$OCF_RESKEY_CRM_meta_notify_operation" in
promote)
ocf_log info "${LH} post-promote begin."
rc=$OCF_SUCCESS
# Do nothing, if the list of nodes being promoted reported empty.
# Delegate recovery, if needed, to the "running out of the cluster" monitor's logic
if [ -z "${OCF_RESKEY_CRM_meta_notify_promote_uname}" ] ; then
ocf_log warn "${LH} there are no nodes to join to reported on post-promote. Nothing to do."
elif my_host "${OCF_RESKEY_CRM_meta_notify_promote_uname}"; then
ocf_log info "${LH} ignoring post-promote of self"
elif is_clustered_with "${OCF_RESKEY_CRM_meta_notify_promote_uname}"; then
if get_status rabbit; then
ocf_log info "${LH} we are already clustered with master - ${OCF_RESKEY_CRM_meta_notify_promote_uname}. Nothing to do."
else
ocf_log info "${LH} we are already clustered with master - ${OCF_RESKEY_CRM_meta_notify_promote_uname}. We only need to start the app."
try_to_start_rmq_app
rc2=$?
update_rabbit_start_time_if_rc $rc2
fi
else
# Note, this should fail when the mnesia is inconsistent.
# For example, when the "old" master processing the promition of the new one.
# Later this ex-master node will rejoin the cluster at post-start.
jjj_join "${OCF_RESKEY_CRM_meta_notify_promote_uname}"
rc=$?
if [ $rc -eq $OCF_ERR_GENERIC ] ; then
ocf_log err "${LH} Failed to join the cluster on post-promote. The resource will be restarted."
fi
fi
ocf_log info "${LH} post-promote end."
return $rc
;;
start)
ocf_log info "${LH} post-start begin."
local nodes_list="${OCF_RESKEY_CRM_meta_notify_start_uname} ${OCF_RESKEY_CRM_meta_notify_active_uname}"
# Do nothing, if the list of nodes being started or running reported empty
# Delegate recovery, if needed, to the "running out of the cluster" monitor's logic
if [ -z "${OCF_RESKEY_CRM_meta_notify_start_uname}" ] && [ -z "${OCF_RESKEY_CRM_meta_notify_active_uname}" ] ; then
ocf_log warn "${LH} I'm a last man standing and I must survive!"
ocf_log info "${LH} post-start end."
return $OCF_SUCCESS
fi
# check did this event from this host
my_host "${nodes_list}"
rc=$?
# Do nothing, if there is no master reported
# Delegate recovery, if needed, to the "running out of the cluster" monitor's logic
if [ -z "${OCF_RESKEY_CRM_meta_notify_master_uname}" ] ; then
ocf_log warn "${LH} there are no nodes to join to reported on post-start. Nothing to do."
ocf_log info "${LH} post-start end."
return $OCF_SUCCESS
fi
if [ $rc -eq $OCF_SUCCESS ] ; then
# Now we need to:
# a. join to the cluster if we are not joined yet
# b. start the RabbitMQ application, which is always
# stopped after start action finishes
check_need_join_to ${OCF_RESKEY_CRM_meta_notify_master_uname}
rc_join=$?
if [ $rc_join -eq $OCF_SUCCESS ]; then
ocf_log warn "${LH} Going to join node ${OCF_RESKEY_CRM_meta_notify_master_uname}"
jjj_join "${OCF_RESKEY_CRM_meta_notify_master_uname}"
rc2=$?
else
ocf_log warn "${LH} We are already clustered with node ${OCF_RESKEY_CRM_meta_notify_master_uname}"
try_to_start_rmq_app
rc2=$?
update_rabbit_start_time_if_rc $rc2
fi
if [ -s "${OCF_RESKEY_definitions_dump_file}" ] ; then
ocf_log info "File ${OCF_RESKEY_definitions_dump_file} exists"
ocf_run curl --silent --show-error --request POST --user $OCF_RESKEY_admin_user:$OCF_RESKEY_admin_password $OCF_RESKEY_host_ip:15672/api/definitions --header "Content-Type:application/json" --data @$OCF_RESKEY_definitions_dump_file
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
ocf_log info "RMQ definitions have imported succesfully."
else
ocf_log err "RMQ definitions have not imported."
fi
fi
if [ $rc2 -eq $OCF_ERR_GENERIC ] ; then
ocf_log warn "${LH} Failed to join the cluster on post-start. The resource will be restarted."
ocf_log info "${LH} post-start end."
return $OCF_ERR_GENERIC
fi
fi
ocf_log info "${LH} post-start end."
;;
stop)
# if rabbitmq-server stops on any another node, we should remove it from cluster (as ordinary operation)
ocf_log info "${LH} post-stop begin."
# Report not running, if there are no nodes being stopped reported
if [ -z "${OCF_RESKEY_CRM_meta_notify_stop_uname}" ] ; then
ocf_log warn "${LH} there are no nodes being stopped reported on post-stop. The resource will be restarted."
ocf_log info "${LH} post-stop end."
return $OCF_ERR_GENERIC
fi
my_host "${OCF_RESKEY_CRM_meta_notify_stop_uname}"
rc=$?
if [ $rc -ne $OCF_SUCCESS ] ; then
# Wait for synced state first
ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
wait_sync $((OCF_RESKEY_stop_time/2))
# On other nodes processing the post-stop, make sure the stopped node will be forgotten
unjoin_nodes_from_cluster "${OCF_RESKEY_CRM_meta_notify_stop_uname}"
else
# On the nodes being stopped, reset the master score
ocf_log info "${LH} resetting the master score."
master_score 0
fi
# always returns OCF_SUCCESS
ocf_log info "${LH} post-stop end."
;;
*) ;;
esac
fi
return $OCF_SUCCESS
}
action_promote() {
local rc=$OCF_ERR_GENERIC
local LH="${LL} promote:"
if ocf_is_true "${OCF_RESKEY_debug}"; then
d=$(date '+%Y%m%d %H:%M:%S')
echo $d >> /tmp/rmq-promote.log
env >> /tmp/rmq-promote.log
echo "$d [promote] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
ocf_log info "${LH} action begin."
get_monitor
rc=$?
ocf_log info "${LH} get_monitor returns ${rc}"
case "$rc" in
"$OCF_SUCCESS")
# Running as slave. Normal, expected behavior.
ocf_log info "${LH} Resource is currently running as Slave"
# rabbitmqctl start_app if need
get_status rabbit
rc=$?
ocf_log info "${LH} Updating cluster master attribute"
ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --update 'true'
if [ $rc -ne $OCF_SUCCESS ] ; then
ocf_log info "${LH} RMQ app is not started. Starting..."
start_rmq_server_app
rc=$?
if [ $rc -eq 0 ] ; then
try_to_start_rmq_app
rc=$?
if [ $rc -ne 0 ] ; then
ocf_log err "${LH} Can't start RMQ app. Master resource is failed."
ocf_log info "${LH} action end."
exit $OCF_FAILED_MASTER
fi
[ -f "${OCF_RESKEY_policy_file}" ] && . "${OCF_RESKEY_policy_file}"
update_rabbit_start_time_if_rc $rc
ocf_log info "${LH} Checking master status"
get_monitor
rc=$?
ocf_log info "${LH} Master status is $rc"
if [ $rc = $OCF_RUNNING_MASTER ]
then
rc=$OCF_SUCCESS
else
ocf_log err "${LH} Master resource is failed."
ocf_log info "${LH} action end."
exit $OCF_FAILED_MASTER
fi
else
ocf_log err "${LH} Can't start RMQ-runtime."
rc=$OCF_ERR_GENERIC
fi
fi
return $rc
;;
"$OCF_RUNNING_MASTER")
# Already a master. Unexpected, but not a problem.
ocf_log warn "${LH} Resource is already running as Master"
rc=$OCF_SUCCESS
;;
"$OCF_FAILED_MASTER")
# Master failed.
ocf_log err "${LH} Master resource is failed and not running"
ocf_log info "${LH} action end."
exit $OCF_FAILED_MASTER
;;
"$OCF_NOT_RUNNING")
# Currently not running.
ocf_log err "${LH} Resource is currently not running"
rc=$OCF_NOT_RUNNING
;;
*)
# Failed resource. Let the cluster manager recover.
ocf_log err "${LH} Unexpected error, cannot promote"
ocf_log info "${LH} action end."
exit $rc
;;
esac
# transform slave RMQ-server to master
ocf_log info "${LH} action end."
return $rc
}
action_demote() {
local LH="${LL} demote:"
ocf_log info "${LH} action begin."
ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
ocf_log info "${LH} action end."
return $OCF_SUCCESS
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
rmq_setup_env
# Anything except meta-data and help must pass validation
action_validate || exit $?
# What kind of method was invoked?
case "$1" in
start) action_start;;
stop) action_stop;;
status) action_status;;
monitor) action_monitor;;
validate) action_validate;;
promote) action_promote;;
demote) action_demote;;
notify) action_notify;;
validate-all) action_validate;;
*) usage;;
esac
###
diff --git a/heartbeat/sapdb.sh b/heartbeat/sapdb.sh
index 66e9854b6..dccd36e17 100755
--- a/heartbeat/sapdb.sh
+++ b/heartbeat/sapdb.sh
@@ -1,367 +1,367 @@
#
# sapdb.sh - for systems having SAPHostAgent installed
# (sourced by SAPDatabase)
#
# Description: This code is separated from the SAPDatabase agent to
# introduce new functions for systems which having
# SAPHostAgent installed.
# Someday it might be merged back into SAPDatabase agein.
#
# Author: Alexander Krauth, September 2010
# Support: linux@sap.com
# License: GNU General Public License (GPL)
# Copyright: (c) 2010, 2012 Alexander Krauth
#
#
# background_check_saphostexec : Run a request to saphostexec in a separate task, to be able to react to a hanging process
#
background_check_saphostexec() {
timeout=600
count=0
$SAPHOSTCTRL -function ListDatabases >/dev/null 2>&1 &
pid=$!
while kill -0 $pid > /dev/null 2>&1
do
sleep 0.1
count=$(( $count + 1 ))
if [ $count -ge $timeout ]; then
kill -9 $pid >/dev/null 2>&1
ocf_log warn "saphostexec did not respond to the method 'ListDatabases' within 60 seconds"
return $OCF_ERR_GENERIC # Timeout
fi
done
# child has already finished, now evaluate its returncode
wait $pid
}
#
# cleanup_saphostexec : make sure to cleanup the SAPHostAgent in case of any
# misbehavior
#
cleanup_saphostexec() {
pkill -9 -f "$SAPHOSTEXEC"
pkill -9 -f "$SAPHOSTSRV"
oscolpid=$(pgrep -f "$SAPHOSTOSCOL") # we check saposcol pid, because it
# might not run under control of
# saphostexec
# cleanup saposcol shared memory, otherwise it will not start again
if [ -n "$oscolpid" ];then
kill -9 $oscolpid
oscolipc=$(ipcs -m | grep "4dbe " | awk '{print $2}')
if [ -n "$oscolipc" ]; then
ipcrm -m $oscolipc
fi
fi
# removing the unix domain socket file as it might have wrong permissions or
# ownership - it will be recreated by saphostexec during next start
[ -r /tmp/.sapstream1128 ] && rm -f /tmp/.sapstream1128
}
#
# check_saphostexec : Before using saphostctrl we make sure that the
# saphostexec is running on the current node.
#
check_saphostexec() {
chkrc=$OCF_SUCCESS
running=$(pgrep -f "$SAPHOSTEXEC" | wc -l)
if [ $running -gt 0 ]; then
if background_check_saphostexec; then
return $OCF_SUCCESS
else
ocf_log warn "saphostexec did not respond to the method 'ListDatabases' correctly (rc=$?), it will be killed now"
running=0
fi
fi
if [ $running -eq 0 ]; then
ocf_log warn "saphostexec is not running on node `hostname`, it will be started now"
cleanup_saphostexec
output=`$SAPHOSTEXEC -restart 2>&1`
# now make sure the daemon has been started and is able to respond
srvrc=1
while [ $srvrc -ne 0 ] && [ "$(pgrep -f "$SAPHOSTEXEC" | wc -l)" -gt 0 ]
do
sleep 1
background_check_saphostexec
srvrc=$?
done
if [ $srvrc -eq 0 ]
then
ocf_log info "saphostexec on node $(hostname) was restarted !"
chkrc=$OCF_SUCCESS
else
ocf_log error "saphostexec on node $(hostname) could not be started! - $output"
chkrc=$OCF_ERR_GENERIC
fi
fi
return $chkrc
}
#
# sapdatabase_start : Start the SAP database
#
sapdatabase_start() {
check_saphostexec
rc=$?
if [ $rc -eq $OCF_SUCCESS ]
then
sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT"
DBINST=""
if [ -n "$OCF_RESKEY_DBINSTANCE" ]
then
DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE "
fi
FORCE=""
if ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER
then
FORCE="-force"
fi
DBOSUSER=""
if [ -n "$OCF_RESKEY_DBOSUSER" ]
then
DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER "
fi
output=`$SAPHOSTCTRL -function StartDatabase -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER $FORCE -service`
sapdatabase_monitor 1
rc=$?
if [ $rc -eq 0 ]
then
ocf_log info "SAP database $SID started: $output"
rc=$OCF_SUCCESS
sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT"
else
ocf_log err "SAP database $SID start failed: $output"
rc=$OCF_ERR_GENERIC
fi
fi
return $rc
}
#
# sapdatabase_stop: Stop the SAP database
#
sapdatabase_stop() {
check_saphostexec
rc=$?
if [ $rc -eq $OCF_SUCCESS ]
then
sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT"
DBINST=""
if [ -n "$OCF_RESKEY_DBINSTANCE" ]
then
DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE "
fi
DBOSUSER=""
if [ -n "$OCF_RESKEY_DBOSUSER" ]
then
DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER "
fi
output=`$SAPHOSTCTRL -function StopDatabase -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER -force -service`
if [ $? -eq 0 ]
then
ocf_log info "SAP database $SID stopped: $output"
rc=$OCF_SUCCESS
else
ocf_log err "SAP database $SID stop failed: $output"
rc=$OCF_ERR_GENERIC
fi
fi
sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT"
return $rc
}
#
# sapdatabase_monitor: Can the given database instance do anything useful?
#
sapdatabase_monitor() {
strict=$1
rc=$OCF_SUCCESS
if ! ocf_is_true $strict
then
sapdatabase_status
rc=$?
else
check_saphostexec
rc=$?
if [ $rc -eq $OCF_SUCCESS ]
then
count=0
DBINST=""
if [ -n "$OCF_RESKEY_DBINSTANCE" ]
then
DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE "
fi
if [ -n "$OCF_RESKEY_DBOSUSER" ]
then
DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER "
fi
output=`$SAPHOSTCTRL -function GetDatabaseStatus -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER`
# we have to parse the output, because the returncode doesn't tell anything about the instance status
for SERVICE in `echo "$output" | grep -i 'Component[ ]*Name *[:=] [A-Za-z][A-Za-z0-9_]* (' | sed 's/^.*Component[ ]*Name *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i'`
do
COLOR=`echo "$output" | grep -i "Component[ ]*Name *[:=] *$SERVICE (" | sed 's/^.*Status *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i' | uniq`
STATE=0
case $COLOR in
Running) STATE=$OCF_SUCCESS;;
*) STATE=$OCF_NOT_RUNNING;;
esac
SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'`
- if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ]
+ if [ `echo "$SERVICE" | $EGREP -c "$SEARCH"` -eq 1 ]
then
if [ $STATE -eq $OCF_NOT_RUNNING ]
then
ocf_log err "SAP database service $SERVICE is not running with status $COLOR !"
rc=$STATE
fi
count=1
fi
done
if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ]
then
ocf_log err "The resource does not run any services which this RA could monitor!"
rc=$OCF_ERR_ARGS
fi
if [ $rc -ne $OCF_SUCCESS ]
then
ocf_log err "The SAP database $SID is not running: $output"
fi
fi
fi
return $rc
}
#
# sapdatabase_status: Are there any database processes on this host ?
#
sapdatabase_status() {
sid=`echo $SID | tr '[:upper:]' '[:lower:]'`
SUSER=${OCF_RESKEY_DBOSUSER:-""}
case $DBTYPE in
ADA) SEARCH="$SID/db/pgm/kernel"
[ -z "$SUSER" ] && SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'`
SNUM=2
;;
ORA) DBINST=${OCF_RESKEY_DBINSTANCE}
DBINST=${OCF_RESKEY_DBINSTANCE:-${SID}}
SEARCH="ora_[a-z][a-z][a-z][a-z]_$DBINST"
if [ -z "$SUSER" ]; then
id "oracle" > /dev/null 2> /dev/null && SUSER="oracle"
id "ora${sid}" > /dev/null 2> /dev/null && SUSER="${SUSER:+${SUSER},}ora${sid}"
fi
SNUM=4
;;
DB6) SEARCH="db2[a-z][a-z][a-z]"
[ -z "$SUSER" ] && SUSER="db2${sid}"
SNUM=2
;;
SYB) SEARCH="dataserver"
[ -z "$SUSER" ] && SUSER="syb${sid}"
SNUM=1
;;
HDB) SEARCH="hdb[a-z]*server"
[ -z "$SUSER" ] && SUSER="${sid}adm"
SNUM=1
;;
esac
[ -z "$SUSER" ] && return $OCF_ERR_INSTALLED
cnt=`ps -u $SUSER -o args 2> /dev/null | grep -v grep | grep -c $SEARCH`
[ $cnt -ge $SNUM ] && return $OCF_SUCCESS
return $OCF_NOT_RUNNING
}
#
# sapdatabase_recover:
#
sapdatabase_recover() {
OCF_RESKEY_AUTOMATIC_RECOVER=1
sapdatabase_stop
sapdatabase_start
}
#
# sapdatabase_validate: Check the semantics of the input parameters
#
sapdatabase_validate() {
rc=$OCF_SUCCESS
if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ]
then
ocf_log err "Parsing parameter SID: '$SID' is not a valid system ID!"
rc=$OCF_ERR_ARGS
fi
case "$DBTYPE" in
ORA|ADA|DB6|SYB|HDB) ;;
*) ocf_log err "Parsing parameter DBTYPE: '$DBTYPE' is not a supported database type!"
rc=$OCF_ERR_ARGS ;;
esac
return $rc
}
#
# sapdatabase_init: initialize global variables at the beginning
#
sapdatabase_init() {
OCF_RESKEY_AUTOMATIC_RECOVER_default=0
: ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}}
if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ]
then
case $DBTYPE in
ORA) export OCF_RESKEY_MONITOR_SERVICES="Instance|Database|Listener"
;;
ADA) export OCF_RESKEY_MONITOR_SERVICES="Database"
;;
DB6) db2sid="db2`echo $SID | tr '[:upper:]' '[:lower:]'`"
export OCF_RESKEY_MONITOR_SERVICES="${SID}|${db2sid}"
;;
SYB) export OCF_RESKEY_MONITOR_SERVICES="Server"
;;
HDB) export OCF_RESKEY_MONITOR_SERVICES="hdbindexserver|hdbnameserver"
;;
esac
fi
}
diff --git a/heartbeat/smb-share.in b/heartbeat/smb-share.in
index 8a1a0a860..3e5bf60bf 100755
--- a/heartbeat/smb-share.in
+++ b/heartbeat/smb-share.in
@@ -1,494 +1,494 @@
#!@BASH_SHELL@
#
# OCF Resource Agent for create samba config snippets.
#
#
#
# Copyright (c) 2021 B1 Systems GmbH <info@b1-systems.de>
# Author:
# Tobias D. Oestreicher <oestreicher@b1-systems.de>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#
#
# OCF parameters are as below:
# OCF_RESKEY_outfile
# OCF_RESKEY_includesfile
# OCF_RESKEY_confd
# OCF_RESKEY_share
# OCF_RESKEY_path
# OCF_RESKEY_hosts_allow
# OCF_RESKEY_browseable
# OCF_RESKEY_writeable
# OCF_RESKEY_read_only
# OCF_RESKEY_guest_ok
# OCF_RESKEY_directory_mask
# OCF_RESKEY_create_mask
# OCF_RESKEY_printable
# OCF_RESKEY_valid_users
# OCF_RESKEY_force_user
# OCF_RESKEY_force_group
# OCF_RESKEY_extraopt
# OCF_RESKEY_extraopt_list
#
#######################################################################
#######################################################################
#
# Purpose:
# --------
# This RA is used to control samba shares on the fly.
# For adding and removing samba shares no restart of the samba daemon
# is needed. This is the equivalent of the exportfs RA which is used
# for nfs shares.
#
# How to use:
# -----------
# For this RA to work as expected you need a cloned samba daemon which
# have to be startet before.
# After this RA manages config snippets placed in the filesystem and
# after a fence of that node these snippets will still located there
# you will have to use a tmpfs mount for this.
# Also you need a basic smb.conf file in which all global parameters an
# permanent shares should be placed.
# Within this smb.conf also you must put a line in the global section
# like this:
#
# include = /etc/samba/conf.d/pacemaker-includes.conf
#
# The filename can be changed by setting the parameter "includesfile".
# Every share created by this RA will create a new file located in:
#
# /etc/samba/conf.d/
#
# This directory also can be changed by setting the RA parameter "confd".
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_outfile_default=""
OCF_RESKEY_includesfile_default="/etc/samba/conf.d/pacemaker-includes.conf"
OCF_RESKEY_confd_default="/etc/samba/conf.d"
OCF_RESKEY_share_default=""
OCF_RESKEY_path_default=""
OCF_RESKEY_hosts_allow_default=""
OCF_RESKEY_browseable_default=""
OCF_RESKEY_writeable_default=""
OCF_RESKEY_read_only_default=""
OCF_RESKEY_guest_ok_default=""
OCF_RESKEY_directory_mask_default=""
OCF_RESKEY_create_mask_default=""
OCF_RESKEY_printable_default=""
OCF_RESKEY_valid_users_default=""
OCF_RESKEY_force_user_default=""
OCF_RESKEY_force_group_default=""
OCF_RESKEY_extraopt_default=""
OCF_RESKEY_extraopt_list_default=""
: ${OCF_RESKEY_outfile=${OCF_RESKEY_outfile_default}}
: ${OCF_RESKEY_includesfile=${OCF_RESKEY_includesfile_default}}
: ${OCF_RESKEY_confd=${OCF_RESKEY_confd_default}}
: ${OCF_RESKEY_share=${OCF_RESKEY_share_default}}
: ${OCF_RESKEY_path=${OCF_RESKEY_path_default}}
: ${OCF_RESKEY_hosts_allow=${OCF_RESKEY_hosts_allow_default}}
: ${OCF_RESKEY_browseable=${OCF_RESKEY_browseable_default}}
: ${OCF_RESKEY_writeable=${OCF_RESKEY_writeable_default}}
: ${OCF_RESKEY_read_only=${OCF_RESKEY_read_only_default}}
: ${OCF_RESKEY_guest_ok=${OCF_RESKEY_guest_ok_default}}
: ${OCF_RESKEY_directory_mask=${OCF_RESKEY_directory_mask_default}}
: ${OCF_RESKEY_create_mask=${OCF_RESKEY_create_mask_default}}
: ${OCF_RESKEY_printable=${OCF_RESKEY_printable_default}}
: ${OCF_RESKEY_valid_users=${OCF_RESKEY_valid_users_default}}
: ${OCF_RESKEY_force_user=${OCF_RESKEY_force_user_default}}
: ${OCF_RESKEY_force_group=${OCF_RESKEY_force_group_default}}
: ${OCF_RESKEY_extraopt=${OCF_RESKEY_extraopt_default}}
: ${OCF_RESKEY_extraopt_list=${OCF_RESKEY_extraopt_list_default}}
#######################################################################
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="smb-share" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This RA is used to control samba shares on the fly.
For adding and removing samba shares no restart of the samba daemon
is needed. This is the equivalent of the exportfs RA which is used
for nfs shares.
For this RA to work as expected you need a cloned samba daemon which
have to be startet before.
After this RA manages config snippets placed in the filesystem and
after a fence of that node these snippets will still located there
you will have to use a tmpfs mount for this.
Also you need a basic smb.conf file in which all global parameters an
permanent shares should be placed.
Within this smb.conf also you must put a line in the global section
like this:
include = /etc/samba/conf.d/pacemaker-includes.conf
The filename can be changed by setting the parameter includesfile.
Every share created by this RA will create a new file located in:
/etc/samba/conf.d/
This directory also can be changed by setting the RA parameter confd.
</longdesc>
<shortdesc lang="en">Manages samba shares on the fly</shortdesc>
<parameters>
<parameter name="share" unique="1" required="1">
<longdesc lang="en">
Set the name of a windows share which should be added to Samba
example name "myshare" resulting in [myshare].
</longdesc>
<shortdesc lang="en">sharename</shortdesc>
<content type="string" default="${OCF_RESKEY_share_default}" />
</parameter>
<parameter name="path" unique="1" required="0">
<longdesc lang="en">
Set the path to share for cifs clients.
example path "/srv/data/myshare".
</longdesc>
<shortdesc lang="en">path to share</shortdesc>
<content type="string" default="${OCF_RESKEY_path_default}" />
</parameter>
<parameter name="hosts_allow" unique="0" required="0">
<longdesc lang="en">
This parameter is a comma, space, or tab delimited set of hosts which are permitted to access a service.
</longdesc>
<shortdesc lang="en">hosts allow parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_hosts_allow_default}" />
</parameter>
<parameter name="browseable" unique="0" required="0">
<longdesc lang="en">
This controls whether this share is seen in the list of available shares in a net view and in the browse list.
</longdesc>
<shortdesc lang="en">browseable parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_browseable_default}" />
</parameter>
<parameter name="writeable" unique="0" required="0">
<longdesc lang="en">
Inverted synonym for read only.
</longdesc>
<shortdesc lang="en">writeable parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_writeable_default}" />
</parameter>
<parameter name="read_only" unique="0" required="0">
<longdesc lang="en">
This option can be used to turn the writing backends tdb, tdb2, and ldap into read only mode.
This can be useful e.g. in cases where a pre-filled database exists that should not be extended automatically.
</longdesc>
<shortdesc lang="en">read only parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_read_only_default}" />
</parameter>
<parameter name="guest_ok" unique="0" required="0">
<longdesc lang="en">
If this parameter is yes for a service, then no password is required to connect to the service. Privileges will be those of the guest account.
</longdesc>
<shortdesc lang="en">guest ok parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_guest_ok_default}" />
</parameter>
<parameter name="directory_mask" unique="0" required="0">
<longdesc lang="en">
This parameter is the octal modes which are used when converting DOS modes to UNIX modes when creating UNIX directories.
</longdesc>
<shortdesc lang="en">directory mask parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_directory_mask_default}" />
</parameter>
<parameter name="create_mask" unique="0" required="0">
<longdesc lang="en">
When a file is created, the necessary permissions are calculated according to the mapping from DOS modes to UNIX permissions,
and the resulting UNIX mode is then bit-wise ANDed with this parameter. This parameter may be thought of as a bit-wise MASK for the UNIX modes of a file.
</longdesc>
<shortdesc lang="en">create mask parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_create_mask_default}" />
</parameter>
<parameter name="printable" unique="0" required="0">
<longdesc lang="en">
If this parameter is yes, then clients may open, write to and submit spool files on the directory specified for the service.
</longdesc>
<shortdesc lang="en">printable parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_printable_default}" />
</parameter>
<parameter name="valid_users" unique="0" required="0">
<longdesc lang="en">
This is a list of users that should be allowed to login to this service. Names starting with @, + and &amp; are interpreted
using the same rules as described in the invalid users parameter.
</longdesc>
<shortdesc lang="en">valid users parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_valid_users_default}" />
</parameter>
<parameter name="force_user" unique="0" required="0">
<longdesc lang="en">
This specifies a UNIX user name that will be assigned as the default user for all users connecting to this service. This is useful for sharing files.
</longdesc>
<shortdesc lang="en">force user parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_force_user_default}" />
</parameter>
<parameter name="force_group" unique="0" required="0">
<longdesc lang="en">
This specifies a UNIX group name that will be assigned as the default primary group for all users connecting to this service.
This is useful for sharing files by ensuring that all access to files on service will use the named group for their permissions checking.
</longdesc>
<shortdesc lang="en">force group parameter</shortdesc>
<content type="string" default="${OCF_RESKEY_force_group_default}" />
</parameter>
<parameter name="extraopt" unique="0" required="0">
<longdesc lang="en">
This option can be used to define an additional key = value pair.
In this parameter also a semicolon could be placed.
Need to set e.g somthinspecial = value
</longdesc>
<shortdesc lang="en">additional key value pair</shortdesc>
<content type="string" default="${OCF_RESKEY_extraopt_default}" />
</parameter>
<parameter name="extraopt_list" unique="0" required="0">
<longdesc lang="en">
This option can be used to define multiple additional key = value pairs.
Define the list of element semicolon separated.
e.g somethingspecial = value;one more = value2
</longdesc>
<shortdesc lang="en">additional key value pairs as semicolon separated list</shortdesc>
<content type="string" default="${OCF_RESKEY_extraopt_list_default}" />
</parameter>
<parameter name="outfile" unique="1" required="1">
<longdesc lang="en">
Set the path and filename where the snipped should be written.
example "/etc/samba/conf.d/myshare.inc".
</longdesc>
<shortdesc lang="en">outputfile</shortdesc>
<content type="string" default="${OCF_RESKEY_outfile_default}" />
</parameter>
<parameter name="confd" unique="0" required="0">
<longdesc lang="en">
Set the path where the includes will be written. This folder have to be a tmpfs mount
This defaults to "/etc/samba/conf.d".
</longdesc>
<shortdesc lang="en">confd directory</shortdesc>
<content type="string" default="${OCF_RESKEY_confd_default}" />
</parameter>
<parameter name="includesfile" unique="0" required="0">
<longdesc lang="en">
Set the path and filename in which the include should be placed.
example includesfile "/etc/samba/conf.d/pacemaker-includes.conf".
This option manages the include= parameter within this file
</longdesc>
<shortdesc lang="en">includesfile for smb.conf</shortdesc>
<content type="string" default="${OCF_RESKEY_includesfile_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="status" depth="0" timeout="20s" interval="10s" />
<action name="monitor" depth="0" timeout="20s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
smb_share_addinclude() {
if [ ! -e ${OCF_RESKEY_includesfile} ];then
echo '[global]' > ${OCF_RESKEY_includesfile}
fi
if [ $(grep -c "include = $OCF_RESKEY_outfile" ${OCF_RESKEY_includesfile}) -eq 0 ];then
echo "include = $OCF_RESKEY_outfile" >> ${OCF_RESKEY_includesfile}
fi
}
smb_share_delinclude() {
ESCAPED=$(echo $OCF_RESKEY_outfile|sed 's,/,\\/,g')
sed -i /include.=.$ESCAPED/d ${OCF_RESKEY_includesfile}
}
smb_share_create() {
echo "[${OCF_RESKEY_share}]" > $OCF_RESKEY_outfile
if [ ! -z "$OCF_RESKEY_path" ];then echo " path = $OCF_RESKEY_path" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_hosts_allow" ];then echo " hosts allow = $OCF_RESKEY_hosts_allow" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_browseable" ];then echo " browseable = $OCF_RESKEY_browseable" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_writeable" ];then echo " writeable = $OCF_RESKEY_writeable" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_read_only" ];then echo " read only = $OCF_RESKEY_read_only" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_guest_ok" ];then echo " guest ok = $OCF_RESKEY_guest_ok" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_directory_mask" ];then echo " directory mask = $OCF_RESKEY_directory_mask" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_create_mask" ];then echo " create mask = $OCF_RESKEY_create_mask" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_printable" ];then echo " printable = $OCF_RESKEY_printable" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_valid_users" ];then echo " valid users = $OCF_RESKEY_valid_users" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_force_user" ];then echo " force user = $OCF_RESKEY_force_user" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_force_group" ];then echo " force group = $OCF_RESKEY_force_group" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_extraopt" ];then echo " $OCF_RESKEY_extraopt" >> $OCF_RESKEY_outfile; fi
if [ ! -z "$OCF_RESKEY_extraopt_list" ];then
IFS=';' read -r -a array <<< "$OCF_RESKEY_extraopt_list"
for i in "${array[@]}";do
echo " $i" >> $OCF_RESKEY_outfile;
done
fi
smb_share_addinclude
}
smb_share_delete() {
if [ -e $OCF_RESKEY_outfile ];then
rm -f $OCF_RESKEY_outfile 2>/dev/null
smb_share_delinclude
exit $OCF_SUCCESS
fi
}
smb_share_reloadconfig() {
smbcontrol smbd reload-config 2>/dev/null
if [ $? -eq 0 ];then
exit $OCF_SUCCESS
else
ocf_log err "Seems you have an error in your samba configuration"
exit $OCF_ERR_CONFIGURED
fi
}
smb_share_checktmpmount() {
ISMOUNT=$(grep $OCF_RESKEY_confd /proc/mounts|grep -c tmpfs)
if [ $ISMOUNT -eq 0 ];then
ocf_log err "The directoy /etc/samba/conf.d need to be a tmpfs mount"
exit $OCF_ERR_CONFIGURED
fi
}
######################################################################
smb_share_usage() {
cat <<END
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
return $OCF_SUCCESS
}
smb_share_start() {
smb_share_create
smb_share_reloadconfig
}
smb_share_stop() {
smbcontrol smbd close-share $OCF_RESKEY_share > /dev/null 2>&1
smb_share_delete
smb_share_reloadconfig
}
smb_share_monitor() {
RES=$(smbcontrol smbd ping > /dev/null 2>&1)
if [ $? -eq 0 ];then
- if [ $(testparm -s 2>/dev/null| egrep -c \\[$OCF_RESKEY_share\\]) -eq 1 ];then
+ if [ $(testparm -s 2>/dev/null| $EGREP -c \\[$OCF_RESKEY_share\\]) -eq 1 ];then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi
else
return $OCF_NOT_RUNNING
fi
}
smb_share_state() {
smb_share_checktmpmount
RES=$(smbcontrol smbd ping > /dev/null 2>&1)
if [ $? -eq 0 ];then
- if [ $(testparm -s 2>/dev/null| egrep -c \\[$OCF_RESKEY_share\\]) -eq 1 ];then
+ if [ $(testparm -s 2>/dev/null| $EGREP -c \\[$OCF_RESKEY_share\\]) -eq 1 ];then
ocf_log info "Samba share $OCF_RESKEY_share is active"
return $OCF_SUCCESS
else
ocf_log info "Samba share $OCF_RESKEY_share is not active"
return $OCF_NOT_RUNNING
fi
else
ocf_log info "Samba share $OCF_RESKEY_share is not active"
return $OCF_NOT_RUNNING
fi
}
smb_share_validate() {
return $OCF_SUCCESS
}
case $__OCF_ACTION in
meta-data) meta_data
;;
usage|help) smb_share_usage
;;
esac
case $__OCF_ACTION in
start) smb_share_start
;;
stop) smb_share_stop
;;
status) smb_share_state
;;
monitor) smb_share_monitor
;;
validate-all) smb_share_validate
;;
*) smb_share_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
# vi:sw=4:ts=8:
diff --git a/heartbeat/symlink b/heartbeat/symlink
index decd9f74e..82a667a01 100755
--- a/heartbeat/symlink
+++ b/heartbeat/symlink
@@ -1,245 +1,245 @@
#!/bin/sh
#
#
# An OCF RA that manages a symlink
#
# Copyright (c) 2011 Dominik Klein
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="symlink" version="1.1">
<version>1.0</version>
<longdesc lang="en">
This resource agent that manages a symbolic link (symlink).
It is primarily intended to manage configuration files which should be
enabled or disabled based on where the resource is running, such as
cron job definitions and the like.
</longdesc>
<shortdesc lang="en">Manages a symbolic link</shortdesc>
<parameters>
<parameter name="link" required="1">
<longdesc lang="en">
Full path of the symbolic link to be managed. This must obviously be
in a filesystem that supports symbolic links.
</longdesc>
<shortdesc lang="en">Full path of the symlink</shortdesc>
<content type="string"/>
</parameter>
<parameter name="target" required="1">
<longdesc lang="en">
Full path to the link target (the file or directory which the symlink points to).
</longdesc>
<shortdesc lang="en">Full path to the link target</shortdesc>
<content type="string" />
</parameter>
<parameter name="backup_suffix">
<longdesc lang="en">
A suffix to append to any files that the resource agent moves out of
the way because they clash with "link".
If this is unset (the default), then the resource agent will simply
refuse to create a symlink if it clashes with an existing file.
</longdesc>
<shortdesc lang="en">Suffix to append to backup files</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="15s" />
<action name="stop" timeout="15s" />
<action name="monitor" depth="0" timeout="15s" interval="60s"/>
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="10s" />
</actions>
</resource-agent>
END
}
symlink_monitor() {
# This applies the following logic:
#
# * If $OCF_RESKEY_link does not exist, then the resource is
# definitely stopped.
#
# * If $OCF_RESKEY_link exists and is a symlink that points to
# ${OCF_RESKEY_target}, then the resource is definitely started.
#
# * If $OCF_RESKEY_link exists, but is anything other than a
# symlink to ${OCF_RESKEY_target}, then the status depends on whether
# ${OCF_RESKEY_backup_suffix} is set:
#
# - if ${OCF_RESKEY_backup_suffix} is set, then the resource is
# simply not running. The existing file will be moved out of
# the way, to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix},
# when the resource starts.
#
# - if ${OCF_RESKEY_backup_suffix} is not set, then an existing
# file ${OCF_RESKEY_link} is an error condition, and the
# resource can't start here.
rc=$OCF_ERR_GENERIC
# Using ls here instead of "test -e", as "test -e" returns false
# if the file does exist, but it a symlink to a file that doesn't
if ! ls "$OCF_RESKEY_link" >/dev/null 2>&1; then
ocf_log debug "$OCF_RESKEY_link does not exist"
rc=$OCF_NOT_RUNNING
elif [ ! -L "$OCF_RESKEY_link" ]; then
if [ -z "$OCF_RESKEY_backup_suffix" ]; then
ocf_exit_reason "$OCF_RESKEY_link exists but is not a symbolic link!"
exit $OCF_ERR_INSTALLED
else
ocf_log debug "$OCF_RESKEY_link exists but is not a symbolic link, will be moved to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix} on start"
rc=$OCF_NOT_RUNNING
fi
- elif readlink -m "$OCF_RESKEY_link" | egrep -q "^$(readlink -m ${OCF_RESKEY_target})$"; then
+ elif readlink -m "$OCF_RESKEY_link" | $EGREP -q "^$(readlink -m ${OCF_RESKEY_target})$"; then
ocf_log debug "$OCF_RESKEY_link exists and is a symbolic link to ${OCF_RESKEY_target}."
rc=$OCF_SUCCESS
else
if [ -z "$OCF_RESKEY_backup_suffix" ]; then
ocf_exit_reason "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}!"
exit $OCF_ERR_INSTALLED
else
ocf_log debug "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}, will be moved to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix} on start"
rc=$OCF_NOT_RUNNING
fi
fi
return $rc
}
symlink_start() {
if ! symlink_monitor; then
if [ -e "$OCF_RESKEY_link" ]; then
if [ -z "$OCF_RESKEY_backup_suffix" ]; then
# Shouldn't happen, because symlink_monitor should
# have errored out. But there is a chance that
# something else put that file there after
# symlink_monitor ran.
ocf_exit_reason "$OCF_RESKEY_link exists and no backup_suffix is set, won't overwrite."
exit $OCF_ERR_GENERIC
else
ocf_log debug "Found $OCF_RESKEY_link, moving to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}"
ocf_run mv -v "$OCF_RESKEY_link" "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" \
|| exit $OCF_ERR_GENERIC
fi
fi
ocf_run ln -sv "$OCF_RESKEY_target" "$OCF_RESKEY_link"
symlink_monitor
return $?
else
return $OCF_SUCCESS
fi
}
symlink_stop() {
if symlink_monitor; then
ocf_run rm -vf "$OCF_RESKEY_link" || exit $OCF_ERR_GENERIC
if ! symlink_monitor; then
if [ -e "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" ]; then
ocf_log debug "Found backup ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}, moving to $OCF_RESKEY_link"
# if restoring the backup fails then still return with
# $OCF_SUCCESS, but log a warning
ocf_run -warn mv "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" "$OCF_RESKEY_link"
fi
return $OCF_SUCCESS
else
ocf_exit_reason "Removing $OCF_RESKEY_link failed."
return $OCF_ERR_GENERIC
fi
else
return $OCF_SUCCESS
fi
}
symlink_validate_all() {
if [ "x${OCF_RESKEY_link}" = "x" ]; then
ocf_exit_reason "Mandatory parameter link is unset"
exit $OCF_ERR_CONFIGURED
fi
if [ "x${OCF_RESKEY_target}" = "x" ]; then
ocf_exit_reason "Mandatory parameter target is unset"
exit $OCF_ERR_CONFIGURED
fi
# Having a non-existant target is technically not an error, as
# symlinks are allowed to point to non-existant paths. But it
# still doesn't hurt to warn people if the target does not exist
# (but only during non-probes).
if [ ! -e "${OCF_RESKEY_target}" ]; then
ocf_log warn "${OCF_RESKEY_target} does not exist!"
fi
}
symlink_usage() {
cat <<EOF
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
EOF
}
if [ $# -ne 1 ]; then
symlink_usage
exit $OCF_ERR_ARGS
fi
case $__OCF_ACTION in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage)
symlink_usage
exit $OCF_SUCCESS
esac
# Everything except usage and meta-data must pass the validate test
symlink_validate_all || exit
case $__OCF_ACTION in
start)
symlink_start
;;
stop)
symlink_stop
;;
status|monitor)
symlink_monitor
;;
validate-all)
;;
*)
symlink_usage
exit $OCF_ERR_UNIMPLEMENTED
esac
# exit code is the exit code (return code) of the last command (shell function)

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jan 25, 12:21 PM (12 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1317572
Default Alt Text
(408 KB)

Event Timeline