diff --git a/configure.ac b/configure.ac index 951a05430..bb2bbaded 100644 --- a/configure.ac +++ b/configure.ac @@ -1,1110 +1,1110 @@ dnl dnl autoconf for Agents dnl dnl License: GNU General Public License (GPL) dnl =============================================== dnl Bootstrap dnl =============================================== AC_PREREQ(2.63) dnl Suggested structure: dnl information on the package dnl checks for programs dnl checks for libraries dnl checks for header files dnl checks for types dnl checks for structures dnl checks for compiler characteristics dnl checks for library functions dnl checks for system services AC_INIT([resource-agents], m4_esyscmd([make/git-version-gen .tarball-version]), [developers@clusterlabs.org]) AC_USE_SYSTEM_EXTENSIONS CRM_DTD_VERSION="1.0" AC_CONFIG_AUX_DIR(.) AC_CONFIG_MACRO_DIR([m4]) AC_CANONICAL_HOST dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below) dnl dnl Internal header: include/config.h dnl - Contains ALL defines dnl - include/config.h.in is generated automatically by autoheader dnl - NOT to be included in any header files except lha_internal.h dnl (which is also not to be included in any other header files) dnl dnl External header: include/agent_config.h dnl - Contains a subset of defines checked here dnl - Manually edit include/agent_config.h.in to have configure include new defines dnl - Should not include HAVE_* defines dnl - Safe to include anywhere AM_CONFIG_HEADER(include/config.h include/agent_config.h) ALL_LINGUAS="en fr" AC_ARG_WITH(version, [ --with-version=version Override package version (if you're a packager needing to pretend) ], [ PACKAGE_VERSION="$withval" ]) AC_ARG_WITH(pkg-name, [ --with-pkg-name=name Override package name (if you're a packager needing to pretend) ], [ PACKAGE_NAME="$withval" ]) dnl dnl AM_INIT_AUTOMAKE([1.11.1 foreign dist-bzip2 dist-xz]) dnl AM_INIT_AUTOMAKE([1.10.1 foreign dist-bzip2]) AC_DEFINE_UNQUOTED(AGENTS_VERSION, "$PACKAGE_VERSION", Current agents version) CC_IN_CONFIGURE=yes export CC_IN_CONFIGURE LDD=ldd dnl ======================================================================== dnl Compiler characteristics dnl ======================================================================== # check stolen from gnulib/m4/gnu-make.m4 if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then AC_MSG_ERROR([you don't seem to have GNU make; it is required]) fi AC_PROG_CC dnl Can force other with environment variable "CC". AM_PROG_CC_C_O AC_PROG_CC_STDC AC_PROG_CPP AC_PROG_AWK AC_PROG_LN_S AC_PROG_INSTALL AC_PROG_MAKE_SET AC_C_STRINGIZE AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_SSIZE_T AC_TYPE_UID_T AC_TYPE_UINT16_T AC_TYPE_UINT8_T AC_TYPE_UINT32_T AC_CHECK_SIZEOF(char) AC_CHECK_SIZEOF(short) AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(long long) AC_STRUCT_TIMEZONE dnl =============================================== dnl Helpers dnl =============================================== cc_supports_flag() { local CPPFLAGS="$@" AC_MSG_CHECKING(whether $CC supports "$@") AC_PREPROC_IFELSE([AC_LANG_PROGRAM([])], [RC=0; AC_MSG_RESULT([yes])], [RC=1; AC_MSG_RESULT([no])]) return $RC } extract_header_define() { AC_MSG_CHECKING(for $2 in $1) Cfile=$srcdir/extract_define.$2.${$} printf "#include \n" > ${Cfile}.c printf "#include <%s>\n" $1 >> ${Cfile}.c printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c $CC $CFLAGS ${Cfile}.c -o ${Cfile} value=`${Cfile}` AC_MSG_RESULT($value) printf $value rm -f ${Cfile}.c ${Cfile} } AC_MSG_NOTICE(Sanitizing prefix: ${prefix}) case $prefix in NONE) prefix=/usr dnl Fix default variables - "prefix" variable if not specified if test "$localstatedir" = "\${prefix}/var"; then localstatedir="/var" fi if test "$sysconfdir" = "\${prefix}/etc"; then sysconfdir="/etc" fi ;; esac # ordering is important, PKG_PROG_PKG_CONFIG is to be invoked before any other PKG_* related stuff PKG_PROG_PKG_CONFIG(0.18) # PKG_CHECK_MODULES will fail if systemd is not found by default, so make sure # we set the proper vars and deal with it PKG_CHECK_MODULES([systemd], [systemd], [HAS_SYSTEMD=yes], [HAS_SYSTEMD=no]) if test "x$HAS_SYSTEMD" = "xyes"; then PKG_CHECK_VAR([SYSTEMD_UNIT_DIR], [systemd], [systemdsystemunitdir]) if test "x$SYSTEMD_UNIT_DIR" = "x"; then AC_MSG_ERROR([Unable to detect systemd unit dir automatically]) fi PKG_CHECK_VAR([SYSTEMD_TMPFILES_DIR], [systemd], [tmpfilesdir]) if test "x$SYSTEMD_TMPFILES_DIR" = "x"; then AC_MSG_ERROR([Unable to detect systemd tmpfiles directory automatically]) fi # sanitize systed vars when using non standard prefix if test "$prefix" != "/usr"; then SYSTEMD_UNIT_DIR="$prefix/$SYSTEMD_UNIT_DIR" AC_SUBST([SYSTEMD_UNIT_DIR]) SYSTEMD_TMPFILES_DIR="$prefix/$SYSTEMD_TMPFILES_DIR" AC_SUBST([SYSTEMD_TMPFILES_DIR]) fi fi AM_CONDITIONAL(HAVE_SYSTEMD, [test "x$HAS_SYSTEMD" = xyes ]) dnl =============================================== dnl Configure Options dnl =============================================== dnl Some systems, like Solaris require a custom package name AC_ARG_WITH(pkgname, [ --with-pkgname=name name for pkg (typically for Solaris) ], [ PKGNAME="$withval" ], [ PKGNAME="LXHAhb" ], ) AC_SUBST(PKGNAME) AC_ARG_ENABLE([ansi], [ --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers. [default=no]]) AC_ARG_ENABLE([fatal-warnings], [ --enable-fatal-warnings very pedantic and fatal warnings for gcc [default=yes]]) INITDIR="" AC_ARG_WITH(initdir, [ --with-initdir=DIR directory for init (rc) scripts [${INITDIR}]], [ INITDIR="$withval" ]) OCF_ROOT_DIR="${prefix}/lib/ocf" AC_ARG_WITH(ocf-root, [ --with-ocf-root=DIR directory for OCF scripts [${OCF_ROOT_DIR}]], [ OCF_ROOT_DIR="$withval" ]) HA_RSCTMPDIR=${localstatedir}/run/resource-agents AC_ARG_WITH(rsctmpdir, [ --with-rsctmpdir=DIR directory for resource agents state files [${HA_RSCTMPDIR}]], [ HA_RSCTMPDIR="$withval" ]) AC_ARG_ENABLE([libnet], [ --enable-libnet Use libnet for ARP based functionality, [default=try]], [enable_libnet="$enableval"], [enable_libnet=try]) BUILD_RGMANAGER=0 BUILD_LINUX_HA=0 RASSET=linux-ha AC_ARG_WITH(ras-set, [ --with-ras-set=SET build/install only linux-ha, rgmanager or all resource-agents [default: linux-ha]], [ RASSET="$withval" ]) if test x$RASSET = xyes || test x$RASSET = xall ; then BUILD_RGMANAGER=1 BUILD_LINUX_HA=1 fi if test x$RASSET = xlinux-ha; then BUILD_LINUX_HA=1 fi if test x$RASSET = xrgmanager; then BUILD_RGMANAGER=1 fi if test $BUILD_LINUX_HA -eq 0 && test $BUILD_RGMANAGER -eq 0; then AC_MSG_ERROR([Are you really sure you want this package?]) exit 1 fi AM_CONDITIONAL(BUILD_LINUX_HA, test $BUILD_LINUX_HA -eq 1) AM_CONDITIONAL(BUILD_RGMANAGER, test $BUILD_RGMANAGER -eq 1) AC_ARG_WITH(compat-habindir, [ --with-compat-habindir use HA_BIN directory with compatibility for the Heartbeat stack [${libexecdir}]], [], [with_compat_habindir=no]) AM_CONDITIONAL(WITH_COMPAT_HABINDIR, test "x$with_compat_habindir" != "xno") dnl =============================================== dnl General Processing dnl =============================================== echo Our Host OS: $host_os/$host AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix}) case $exec_prefix in dnl For consistency with Heartbeat, map NONE->$prefix NONE) exec_prefix=$prefix;; prefix) exec_prefix=$prefix;; esac AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR}) case $INITDIR in prefix) INITDIR=$prefix;; "") AC_MSG_CHECKING(which init (rc) directory to use) for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \ /usr/local/etc/rc.d /etc/rc.d do if test -d $initdir then INITDIR=$initdir break fi done if test -z $INITDIR then INITDIR=${sysconfdir}/init.d fi AC_MSG_RESULT($INITDIR);; esac AC_SUBST(INITDIR) if test "${prefix}" = "/usr"; then INITDIRPREFIX="$INITDIR" else INITDIRPREFIX="${prefix}/$INITDIR" fi AC_SUBST(INITDIRPREFIX) AC_MSG_NOTICE(Sanitizing libdir: ${libdir}) case $libdir in dnl For consistency with Heartbeat, map NONE->$prefix *prefix*|NONE) AC_MSG_CHECKING(which lib directory to use) for aDir in lib64 lib do trydir="${exec_prefix}/${aDir}" if test -d ${trydir} then libdir=${trydir} break fi done AC_MSG_RESULT($libdir); ;; esac if test "x$with_compat_habindir" != "xno" ; then libexecdir=${libdir} fi dnl Expand autoconf variables so that we dont end up with '${prefix}' dnl in #defines and python scripts dnl NOTE: Autoconf deliberately leaves them unexpanded to allow dnl make exec_prefix=/foo install dnl No longer being able to do this seems like no great loss to me... eval prefix="`eval echo ${prefix}`" eval exec_prefix="`eval echo ${exec_prefix}`" eval bindir="`eval echo ${bindir}`" eval sbindir="`eval echo ${sbindir}`" eval libexecdir="`eval echo ${libexecdir}`" eval datadir="`eval echo ${datadir}`" eval sysconfdir="`eval echo ${sysconfdir}`" eval sharedstatedir="`eval echo ${sharedstatedir}`" eval localstatedir="`eval echo ${localstatedir}`" eval libdir="`eval echo ${libdir}`" eval includedir="`eval echo ${includedir}`" eval oldincludedir="`eval echo ${oldincludedir}`" eval infodir="`eval echo ${infodir}`" eval mandir="`eval echo ${mandir}`" dnl docdir is a recent addition to autotools eval docdir="`eval echo ${docdir}`" if test "x$docdir" = "x"; then docdir="`eval echo ${datadir}/doc`" fi AC_SUBST(docdir) dnl Home-grown variables eval INITDIR="${INITDIR}" for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ sharedstatedir localstatedir libdir includedir oldincludedir infodir \ mandir INITDIR docdir do dirname=`eval echo '${'${j}'}'` if test ! -d "$dirname" then AC_MSG_WARN([$j directory ($dirname) does not exist!]) fi done dnl This OS-based decision-making is poor autotools practice; dnl feature-based mechanisms are strongly preferred. dnl dnl So keep this section to a bare minimum; regard as a "necessary evil". REBOOT_OPTIONS="-f" POWEROFF_OPTIONS="-f" case "$host_os" in *bsd*) LIBS="-L/usr/local/lib" CPPFLAGS="$CPPFLAGS -I/usr/local/include" ;; *solaris*) REBOOT_OPTIONS="-n" POWEROFF_OPTIONS="-n" LDFLAGS+=" -lssp -lssp_nonshared" ;; *linux*) AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform) POWEROFF_OPTIONS="-nf" REBOOT_OPTIONS="-nf" ;; darwin*) AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform) LIBS="$LIBS -L${prefix}/lib" CFLAGS="$CFLAGS -I${prefix}/include" ;; esac AC_DEFINE_UNQUOTED(HA_LOG_FACILITY, LOG_DAEMON, Default logging facility) AC_MSG_NOTICE(Host CPU: $host_cpu) case "$host_cpu" in ppc64|powerpc64) case $CFLAGS in *powerpc64*) ;; *) if test "$GCC" = yes; then CFLAGS="$CFLAGS -m64" fi ;; esac esac AC_MSG_CHECKING(which format is needed to print uint64_t) case "$host_cpu" in s390x)U64T="%lu";; *64*) U64T="%lu";; *) U64T="%llu";; esac AC_MSG_RESULT($U64T) AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t) dnl Variables needed for substitution AC_CHECK_HEADERS(heartbeat/glue_config.h) if test "$ac_cv_header_heartbeat_glue_config_h" != "yes"; then enable_libnet=no fi AC_DEFINE_UNQUOTED(OCF_ROOT_DIR,"$OCF_ROOT_DIR", OCF root directory - specified by the OCF standard) AC_SUBST(OCF_ROOT_DIR) GLUE_STATE_DIR=${localstatedir}/run AC_DEFINE_UNQUOTED(GLUE_STATE_DIR,"$GLUE_STATE_DIR", Where to keep state files and sockets) AC_SUBST(GLUE_STATE_DIR) AC_DEFINE_UNQUOTED(HA_VARRUNDIR,"$GLUE_STATE_DIR", Where Heartbeat keeps state files and sockets - old name) HA_VARRUNDIR="$GLUE_STATE_DIR" AC_SUBST(HA_VARRUNDIR) # Expand $prefix eval HA_RSCTMPDIR="`eval echo ${HA_RSCTMPDIR}`" AC_DEFINE_UNQUOTED(HA_RSCTMPDIR,"$HA_RSCTMPDIR", Where Resource agents keep state files) AC_SUBST(HA_RSCTMPDIR) dnl Eventually move out of the heartbeat dir tree and create symlinks when needed HA_VARLIBHBDIR=${localstatedir}/lib/heartbeat AC_DEFINE_UNQUOTED(HA_VARLIBHBDIR,"$HA_VARLIBHBDIR", Whatever this used to mean) AC_SUBST(HA_VARLIBHBDIR) OCF_RA_DIR="${OCF_ROOT_DIR}/resource.d" AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs) AC_SUBST(OCF_RA_DIR) OCF_RA_DIR_PREFIX="$OCF_RA_DIR" AC_SUBST(OCF_RA_DIR_PREFIX) OCF_LIB_DIR="${OCF_ROOT_DIR}/lib" AC_DEFINE_UNQUOTED(OCF_LIB_DIR,"$OCF_LIB_DIR", Location for shared code for OCF RAs) AC_SUBST(OCF_LIB_DIR) OCF_LIB_DIR_PREFIX="$OCF_LIB_DIR" AC_SUBST(OCF_LIB_DIR_PREFIX) dnl =============================================== dnl rgmanager ras bits dnl =============================================== LOGDIR=${localstatedir}/log/cluster CLUSTERDATA=${datadir}/cluster AC_SUBST([LOGDIR]) AC_SUBST([CLUSTERDATA]) dnl =============================================== dnl Program Paths dnl =============================================== PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin" export PATH AC_CHECK_PROGS(MAKE, gmake make) AC_CHECK_PROGS(SHELLCHECK, shellcheck) AM_CONDITIONAL(CI_CHECKS, test "x$SHELLCHECK" != "x" ) AC_PATH_PROGS(BASH_SHELL, bash) if test x"${BASH_SHELL}" = x""; then AC_MSG_ERROR(You need bash installed in order to build ${PACKAGE}) fi AC_PATH_PROGS(XSLTPROC, xsltproc) AM_CONDITIONAL(BUILD_DOC, test "x$XSLTPROC" != "x" ) if test "x$XSLTPROC" = "x"; then AC_MSG_WARN([xsltproc not installed, unable to (re-)build manual pages]) fi AC_SUBST(XSLTPROC) AC_PATH_PROGS(XMLCATALOG, xmlcatalog) AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh) AC_PATH_PROGS(SCP, scp, /usr/bin/scp) AC_PATH_PROGS(TAR, tar) AC_PATH_PROGS(MD5, md5) AC_PATH_PROGS(TEST, test) AC_PATH_PROGS(PING, ping, /bin/ping) AC_PATH_PROGS(IFCONFIG, ifconfig, /sbin/ifconfig) AC_PATH_PROGS(MAILCMD, mailx mail, mail) -AC_PATH_PROGS(EGREP, egrep) AC_PATH_PROGS(RM, rm) +AC_PROG_EGREP +AC_PROG_FGREP AC_SUBST(BASH_SHELL) AC_SUBST(MAILCMD) -AC_SUBST(EGREP) AC_SUBST(SHELL) AC_SUBST(PING) AC_SUBST(RM) AC_SUBST(TEST) AM_PATH_PYTHON([3.6]) if test -z "$PYTHON"; then echo "*** Essential program python not found" 1>&2 exit 1 fi dnl Ensure PYTHON is an absolute path AC_PATH_PROG([PYTHON], [$PYTHON]) AM_PATH_PYTHON if test -z "$PYTHON"; then echo "*** Essential program python not found" 1>&2 fi AC_PYTHON_MODULE(json) AC_PYTHON_MODULE(pyroute2) AC_PYTHON_MODULE(requests) AC_PYTHON_MODULE(urllib3) AC_PYTHON_MODULE(ibm_cloud_fail_over) AS_VERSION_COMPARE([$PYTHON_VERSION], [3.6], [BUILD_OCF_PY=0], [BUILD_OCF_PY=1], [BUILD_OCF_PY=1]) BUILD_AZURE_EVENTS=1 if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then BUILD_AZURE_EVENTS=0 AC_MSG_WARN("Not building azure-events") fi AM_CONDITIONAL(BUILD_AZURE_EVENTS, test $BUILD_AZURE_EVENTS -eq 1) BUILD_AZURE_EVENTS_AZ=1 if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then BUILD_AZURE_EVENTS_AZ=0 AC_MSG_WARN("Not building azure-events-az") fi AM_CONDITIONAL(BUILD_AZURE_EVENTS_AZ, test $BUILD_AZURE_EVENTS_AZ -eq 1) BUILD_GCP_PD_MOVE=1 if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then BUILD_GCP_PD_MOVE=0 AC_MSG_WARN("Not building gcp-pd-move") fi AM_CONDITIONAL(BUILD_GCP_PD_MOVE, test $BUILD_GCP_PD_MOVE -eq 1) BUILD_GCP_VPC_MOVE_ROUTE=1 if test -z "$PYTHON" || test "x${HAVE_PYMOD_PYROUTE2}" != xyes || test $BUILD_OCF_PY -eq 0; then BUILD_GCP_VPC_MOVE_ROUTE=0 AC_MSG_WARN("Not building gcp-vpc-move-route") fi AM_CONDITIONAL(BUILD_GCP_VPC_MOVE_ROUTE, test $BUILD_GCP_VPC_MOVE_ROUTE -eq 1) BUILD_GCP_VPC_MOVE_VIP=1 if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then BUILD_GCP_VPC_MOVE_VIP=0 AC_MSG_WARN("Not building gcp-vpc-move-vip") fi AM_CONDITIONAL(BUILD_GCP_VPC_MOVE_VIP, test $BUILD_GCP_VPC_MOVE_VIP -eq 1) BUILD_POWERVS_SUBNET=1 if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0 || test "x${HAVE_PYMOD_REQUESTS}" != xyes || test "x${HAVE_PYMOD_URLLIB3}" != xyes; then BUILD_POWERVS_SUBNET=0 AC_MSG_WARN("Not building powervs-subnet") fi AM_CONDITIONAL(BUILD_POWERVS_SUBNET, test $BUILD_POWERVS_SUBNET -eq 1) BUILD_IBM_CLOUD_VPC_MOVE_ROUTE=1 if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0 || test "x${HAVE_PYMOD_IBM_CLOUD_FAIL_OVER}" != xyes; then BUILD_IBM_CLOUD_VPC_MOVE_ROUTE=0 AC_MSG_WARN("Not building ibm-cloud-vpc-cr-vip") fi AM_CONDITIONAL(BUILD_IBM_CLOUD_VPC_MOVE_ROUTE, test $BUILD_IBM_CLOUD_VPC_MOVE_ROUTE -eq 1) AC_PATH_PROGS(ROUTE, route) AC_DEFINE_UNQUOTED(ROUTE, "$ROUTE", path to route command) AC_MSG_CHECKING(ifconfig option to list interfaces) for IFCONFIG_A_OPT in "-A" "-a" "" do $IFCONFIG $IFCONFIG_A_OPT > /dev/null 2>&1 if test "$?" = 0 then AC_DEFINE_UNQUOTED(IFCONFIG_A_OPT, "$IFCONFIG_A_OPT", option for ifconfig command) AC_MSG_RESULT($IFCONFIG_A_OPT) break fi done AC_SUBST(IFCONFIG_A_OPT) if test x"${MAKE}" = x""; then AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE}) fi STYLESHEET_PREFIX="" if test x"${XSLTPROC}" != x""; then AC_MSG_CHECKING(docbook to manpage transform) # first try to figure out correct template using xmlcatalog query, # resort to extensive (semi-deterministic) file search if that fails DOCBOOK_XSL_URI='http://docbook.sourceforge.net/release/xsl/current' DOCBOOK_XSL_PATH='manpages/docbook.xsl' STYLESHEET_PREFIX=$(${XMLCATALOG} "" ${DOCBOOK_XSL_URI} \ | sed -n 's|^file://||p;q') if test x"${STYLESHEET_PREFIX}" = x""; then DIRS=$(find "${datadir}" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \ -type d | LC_ALL=C sort) if test x"${DIRS}" = x""; then # when datadir is not standard OS path, we cannot find docbook.xsl # use standard OS path as backup DIRS=$(find "/usr/share" "/usr/local/share" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \ -type d | LC_ALL=C sort) fi XSLT=$(basename ${DOCBOOK_XSL_PATH}) for d in ${DIRS}; do if test -f "${d}/${XSLT}"; then STYLESHEET_PREFIX=$(echo "${d}" | sed 's/\/manpages//') break fi done fi if test x"${STYLESHEET_PREFIX}" = x""; then AC_MSG_ERROR(You need docbook-style-xsl installed in order to build ${PACKAGE}) fi fi AC_MSG_RESULT($STYLESHEET_PREFIX) AC_SUBST(STYLESHEET_PREFIX) dnl =============================================== dnl Libraries dnl =============================================== AC_CHECK_LIB(socket, socket) AC_CHECK_LIB(gnugetopt, getopt_long) dnl if available if test "x${enable_thread_safe}" = "xyes"; then GPKGNAME="gthread-2.0" else GPKGNAME="glib-2.0" fi PKG_CHECK_MODULES([GLIB], [$GPKGNAME]) CPPFLAGS="$CPPFLAGS $GLIB_CFLAGS" LIBS="$LIBS $GLIB_LIBS" PKG_CHECK_MODULES([LIBQB], "libqb") dnl ======================================================================== dnl Headers dnl ======================================================================== AC_HEADER_STDC AC_CHECK_HEADERS(sys/socket.h) AC_CHECK_HEADERS(sys/sockio.h) AC_CHECK_HEADERS([arpa/inet.h]) AC_CHECK_HEADERS([fcntl.h]) AC_CHECK_HEADERS([limits.h]) AC_CHECK_HEADERS([malloc.h]) AC_CHECK_HEADERS([netdb.h]) AC_CHECK_HEADERS([netinet/in.h]) AC_CHECK_HEADERS([sys/file.h]) AC_CHECK_HEADERS([sys/ioctl.h]) AC_CHECK_HEADERS([sys/param.h]) AC_CHECK_HEADERS([sys/time.h]) AC_CHECK_HEADERS([syslog.h]) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_FUNC_FORK AC_FUNC_STRNLEN AC_CHECK_FUNCS([alarm gettimeofday inet_ntoa memset mkdir socket uname]) AC_CHECK_FUNCS([strcasecmp strchr strdup strerror strrchr strspn strstr strtol strtoul]) AC_PATH_PROGS(REBOOT, reboot, /sbin/reboot) AC_SUBST(REBOOT) AC_SUBST(REBOOT_OPTIONS) AC_DEFINE_UNQUOTED(REBOOT, "$REBOOT", path to the reboot command) AC_DEFINE_UNQUOTED(REBOOT_OPTIONS, "$REBOOT_OPTIONS", reboot options) AC_PATH_PROGS(POWEROFF_CMD, poweroff, /sbin/poweroff) AC_SUBST(POWEROFF_CMD) AC_SUBST(POWEROFF_OPTIONS) AC_DEFINE_UNQUOTED(POWEROFF_CMD, "$POWEROFF_CMD", path to the poweroff command) AC_DEFINE_UNQUOTED(POWEROFF_OPTIONS, "$POWEROFF_OPTIONS", poweroff options) AC_PATH_PROGS(POD2MAN, pod2man) AM_CONDITIONAL(BUILD_POD_DOC, test "x$POD2MAN" != "x" ) if test "x$POD2MAN" = "x"; then AC_MSG_WARN([pod2man not installed, unable to (re-)build ldirector manual page]) fi AC_SUBST(POD2MAN) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function])) dnl ======================================================================== dnl sfex dnl ======================================================================== build_sfex=no case $host_os in *Linux*|*linux*) if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then build_sfex=yes fi ;; esac AM_CONDITIONAL(BUILD_SFEX, test "$build_sfex" = "yes" ) dnl ======================================================================== dnl tickle (needs port to BSD platforms) dnl ======================================================================== AC_CHECK_MEMBERS([struct iphdr.saddr],,,[[#include ]]) AM_CONDITIONAL(BUILD_TICKLE, test "$ac_cv_member_struct_iphdr_saddr" = "yes" ) dnl ======================================================================== dnl libnet dnl ======================================================================== libnet="" libnet_version="none" LIBNETLIBS="" LIBNETDEFINES="" AC_MSG_CHECKING(if libnet is required) libnet_fatal=$enable_libnet case $enable_libnet in no) ;; yes|libnet10|libnet11|10|11) libnet_fatal=yes;; try) case $host_os in *Linux*|*linux*) libnet_fatal=no;; *) libnet_fatal=yes;; dnl legacy behavior esac ;; *) libnet_fatal=yes; enable_libnet=try;; esac AC_MSG_RESULT($libnet_fatal) if test "x$enable_libnet" != "xno"; then AC_PATH_PROGS(LIBNETCONFIG, libnet-config) AC_CHECK_LIB(nsl, t_open) dnl -lnsl AC_CHECK_LIB(socket, socket) dnl -lsocket AC_CHECK_LIB(net, libnet_get_hwaddr, LIBNETLIBS=" -lnet", []) fi AC_MSG_CHECKING(for libnet) if test "x$LIBNETLIBS" != "x" -o "x$enable_libnet" = "xlibnet11"; then LIBNETDEFINES="" if test "$ac_cv_lib_nsl_t_open" = yes; then LIBNETLIBS="-lnsl $LIBNETLIBS" fi if test "$ac_cv_lib_socket_socket" = yes; then LIBNETLIBS="-lsocket $LIBNETLIBS" fi libnet=net libnet_version="libnet1.1" fi if test "x$enable_libnet" = "xtry" -o "x$enable_libnet" = "xlibnet10"; then if test "x$LIBNETLIBS" = x -a "x${LIBNETCONFIG}" != "x" ; then LIBNETDEFINES="`$LIBNETCONFIG --defines` `$LIBNETCONFIG --cflags`"; LIBNETLIBS="`$LIBNETCONFIG --libs`"; libnet_version="libnet1.0 (old)" case $LIBNETLIBS in *-l*) libnet=`echo $LIBNETLIBS | sed 's%.*-l%%'`;; *) libnet_version=none;; esac CPPFLAGS="$CPPFLAGS $LIBNETDEFINES" AC_CHECK_HEADERS(libnet.h) if test "$ac_cv_header_libnet_h" = no; then libnet_version=none fi fi fi AC_MSG_RESULT(found $libnet_version) if test "$libnet_version" = none; then LIBNETLIBS="" LIBNETDEFINES="" if test $libnet_fatal = yes; then AC_MSG_ERROR(libnet not found) fi else AC_CHECK_LIB($libnet,libnet_init, [new_libnet=yes; AC_DEFINE(HAVE_LIBNET_1_1_API, 1, Libnet 1.1 API)], [new_libnet=no; AC_DEFINE(HAVE_LIBNET_1_0_API, 1, Libnet 1.0 API)],$LIBNETLIBS) AC_SUBST(LIBNETLIBS) fi if test "$new_libnet" = yes; then AC_MSG_CHECKING(for libnet API 1.1.4: ) save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -fgnu89-inline -Wall -Werror" AC_COMPILE_IFELSE([ AC_LANG_SOURCE(#include int main(){libnet_t *l=NULL; libnet_pblock_record_ip_offset(l, l->total_size); return(0); })], [AC_MSG_RESULT(no)], [AC_DEFINE(HAVE_LIBNET_1_1_4_API, 1, Libnet 1.1.4 API) AC_MSG_RESULT(yes)]) CFLAGS="$save_CFLAGS" fi sendarp_linux=0 case $host_os in *Linux*|*linux*) sendarp_linux=1;; esac redhat_based=0 AC_CHECK_FILE(/etc/redhat-release, [redhat_based=1]) AC_SUBST(LIBNETLIBS) AC_SUBST(LIBNETDEFINES) AM_CONDITIONAL(SENDARP_LINUX, test $sendarp_linux = 1 ) AM_CONDITIONAL(USE_LIBNET, test "x$libnet_version" != "xnone" ) AM_CONDITIONAL(NFSCONVERT, test $redhat_based = 1 ) dnl ************************************************************************ dnl * Check for netinet/icmp6.h to enable the IPv6addr resource agent AC_CHECK_HEADERS(netinet/icmp6.h,[],[],[#include ]) AM_CONDITIONAL(USE_IPV6ADDR_AGENT, test "$ac_cv_header_netinet_icmp6_h" = yes && test "$ac_cv_header_heartbeat_glue_config_h" = yes) AM_CONDITIONAL(IPV6ADDR_COMPATIBLE, test "$ac_cv_header_netinet_icmp6_h" = yes) dnl ======================================================================== dnl Compiler flags dnl ======================================================================== dnl Make sure that CFLAGS is not exported. If the user did dnl not have CFLAGS in their environment then this should have dnl no effect. However if CFLAGS was exported from the user's dnl environment, then the new CFLAGS will also be exported dnl to sub processes. CC_ERRORS="" CC_EXTRAS="" if export -p | fgrep " CFLAGS=" > /dev/null; then SAVED_CFLAGS="$CFLAGS" unset CFLAGS CFLAGS="$SAVED_CFLAGS" unset SAVED_CFLAGS fi if test "$GCC" != yes; then CFLAGS="$CFLAGS -g" enable_fatal_warnings=no else CFLAGS="$CFLAGS -ggdb3" # We had to eliminate -Wnested-externs because of libtool changes # Also remove -Waggregate-return because we use one libnet # call which returns a struct EXTRA_FLAGS="-fgnu89-inline -fstack-protector-all -Wall -Wbad-function-cast -Wcast-qual -Wdeclaration-after-statement -Wendif-labels -Wfloat-equal -Wformat=2 -Wformat-security -Wformat-nonliteral -Winline -Wmissing-prototypes -Wmissing-declarations -Wmissing-format-attribute -Wnested-externs -Wno-long-long -Wno-strict-aliasing -Wpointer-arith -Wstrict-prototypes -Wunsigned-char -Wwrite-strings -Wno-maybe-uninitialized" # Additional warnings it might be nice to enable one day # -Wshadow # -Wunreachable-code for j in $EXTRA_FLAGS do if cc_supports_flag $j then CC_EXTRAS="$CC_EXTRAS $j" fi done dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'` AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4) dnl System specific options case "$host_os" in *linux*|*bsd*) if test "${enable_fatal_warnings}" = "unknown"; then enable_fatal_warnings=yes fi ;; esac if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then enable_fatal_warnings=yes else enable_fatal_warnings=no fi if test "x${enable_ansi}" = xyes && cc_supports_flag -std=iso9899:199409; then AC_MSG_NOTICE(Enabling ANSI Compatibility) CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY" fi AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS}) fi CFLAGS="$CFLAGS $CC_EXTRAS" NON_FATAL_CFLAGS="$CFLAGS" AC_SUBST(NON_FATAL_CFLAGS) dnl dnl We reset CFLAGS to include our warnings *after* all function dnl checking goes on, so that our warning flags don't keep the dnl AC_*FUNCS() calls above from working. In particular, -Werror will dnl *always* cause us troubles if we set it before here. dnl dnl if test "x${enable_fatal_warnings}" = xyes ; then AC_MSG_NOTICE(Enabling Fatal Warnings) CFLAGS="$CFLAGS -Werror" fi AC_SUBST(CFLAGS) dnl This is useful for use in Makefiles that need to remove one specific flag CFLAGS_COPY="$CFLAGS" AC_SUBST(CFLAGS_COPY) AC_SUBST(LOCALE) AC_SUBST(CC) AC_SUBST(MAKE) dnl The Makefiles and shell scripts we output AC_CONFIG_FILES(Makefile \ resource-agents.pc \ include/Makefile \ heartbeat/Makefile \ heartbeat/ocf-binaries \ heartbeat/ocf-directories \ heartbeat/ocf-shellfuncs \ heartbeat/shellfuncs \ systemd/Makefile \ systemd/resource-agents.conf \ tools/Makefile \ tools/nfsconvert \ tools/ocf-tester \ tools/ocft/Makefile \ tools/ocft/ocft \ tools/ocft/caselib \ tools/ocft/README \ tools/ocft/README.zh_CN \ ldirectord/Makefile \ ldirectord/ldirectord \ ldirectord/init.d/Makefile \ ldirectord/init.d/ldirectord \ ldirectord/init.d/ldirectord.debian \ ldirectord/init.d/ldirectord.debian.default \ ldirectord/systemd/Makefile \ ldirectord/systemd/ldirectord.service \ ldirectord/logrotate.d/Makefile \ ldirectord/OCF/Makefile \ ldirectord/OCF/ldirectord \ doc/Makefile \ doc/man/Makefile \ rgmanager/Makefile \ rgmanager/src/Makefile \ rgmanager/src/resources/Makefile \ rgmanager/src/resources/ocf-shellfuncs \ rgmanager/src/resources/svclib_nfslock \ rgmanager/src/resources/lvm_by_lv.sh \ rgmanager/src/resources/lvm_by_vg.sh \ rgmanager/src/resources/utils/Makefile \ rgmanager/src/resources/utils/fs-lib.sh \ rgmanager/src/resources/utils/messages.sh \ rgmanager/src/resources/utils/config-utils.sh \ rgmanager/src/resources/utils/member_util.sh \ rgmanager/src/resources/utils/ra-skelet.sh \ ) dnl Files we output that need to be executable AC_CONFIG_FILES([heartbeat/azure-events], [chmod +x heartbeat/azure-events]) AC_CONFIG_FILES([heartbeat/azure-events-az], [chmod +x heartbeat/azure-events-az]) AC_CONFIG_FILES([heartbeat/AoEtarget], [chmod +x heartbeat/AoEtarget]) AC_CONFIG_FILES([heartbeat/ManageRAID], [chmod +x heartbeat/ManageRAID]) AC_CONFIG_FILES([heartbeat/ManageVE], [chmod +x heartbeat/ManageVE]) AC_CONFIG_FILES([heartbeat/Squid], [chmod +x heartbeat/Squid]) AC_CONFIG_FILES([heartbeat/SysInfo], [chmod +x heartbeat/SysInfo]) AC_CONFIG_FILES([heartbeat/aws-vpc-route53], [chmod +x heartbeat/aws-vpc-route53]) AC_CONFIG_FILES([heartbeat/clvm], [chmod +x heartbeat/clvm]) AC_CONFIG_FILES([heartbeat/conntrackd], [chmod +x heartbeat/conntrackd]) AC_CONFIG_FILES([heartbeat/dnsupdate], [chmod +x heartbeat/dnsupdate]) AC_CONFIG_FILES([heartbeat/dummypy], [chmod +x heartbeat/dummypy]) AC_CONFIG_FILES([heartbeat/eDir88], [chmod +x heartbeat/eDir88]) AC_CONFIG_FILES([heartbeat/fio], [chmod +x heartbeat/fio]) AC_CONFIG_FILES([heartbeat/galera], [chmod +x heartbeat/galera]) AC_CONFIG_FILES([heartbeat/gcp-pd-move], [chmod +x heartbeat/gcp-pd-move]) AC_CONFIG_FILES([heartbeat/gcp-vpc-move-ip], [chmod +x heartbeat/gcp-vpc-move-ip]) AC_CONFIG_FILES([heartbeat/gcp-vpc-move-vip], [chmod +x heartbeat/gcp-vpc-move-vip]) AC_CONFIG_FILES([heartbeat/gcp-vpc-move-route], [chmod +x heartbeat/gcp-vpc-move-route]) AC_CONFIG_FILES([heartbeat/ibm-cloud-vpc-cr-vip], [chmod +x heartbeat/ibm-cloud-vpc-cr-vip]) AC_CONFIG_FILES([heartbeat/iSCSILogicalUnit], [chmod +x heartbeat/iSCSILogicalUnit]) AC_CONFIG_FILES([heartbeat/iSCSITarget], [chmod +x heartbeat/iSCSITarget]) AC_CONFIG_FILES([heartbeat/jira], [chmod +x heartbeat/jira]) AC_CONFIG_FILES([heartbeat/kamailio], [chmod +x heartbeat/kamailio]) AC_CONFIG_FILES([heartbeat/lxc], [chmod +x heartbeat/lxc]) AC_CONFIG_FILES([heartbeat/lxd-info], [chmod +x heartbeat/lxd-info]) AC_CONFIG_FILES([heartbeat/machine-info], [chmod +x heartbeat/machine-info]) AC_CONFIG_FILES([heartbeat/mariadb], [chmod +x heartbeat/mariadb]) AC_CONFIG_FILES([heartbeat/mpathpersist], [chmod +x heartbeat/mpathpersist]) AC_CONFIG_FILES([heartbeat/nfsnotify], [chmod +x heartbeat/nfsnotify]) AC_CONFIG_FILES([heartbeat/openstack-info], [chmod +x heartbeat/openstack-info]) AC_CONFIG_FILES([heartbeat/powervs-subnet], [chmod +x heartbeat/powervs-subnet]) AC_CONFIG_FILES([heartbeat/rabbitmq-cluster], [chmod +x heartbeat/rabbitmq-cluster]) AC_CONFIG_FILES([heartbeat/redis], [chmod +x heartbeat/redis]) AC_CONFIG_FILES([heartbeat/rsyslog], [chmod +x heartbeat/rsyslog]) AC_CONFIG_FILES([heartbeat/smb-share], [chmod +x heartbeat/smb-share]) AC_CONFIG_FILES([heartbeat/sg_persist], [chmod +x heartbeat/sg_persist]) AC_CONFIG_FILES([heartbeat/slapd], [chmod +x heartbeat/slapd]) AC_CONFIG_FILES([heartbeat/storage-mon], [chmod +x heartbeat/storage-mon]) AC_CONFIG_FILES([heartbeat/sybaseASE], [chmod +x heartbeat/sybaseASE]) AC_CONFIG_FILES([heartbeat/syslog-ng], [chmod +x heartbeat/syslog-ng]) AC_CONFIG_FILES([heartbeat/vsftpd], [chmod +x heartbeat/vsftpd]) AC_CONFIG_FILES([heartbeat/CTDB], [chmod +x heartbeat/CTDB]) AC_CONFIG_FILES([rgmanager/src/resources/ASEHAagent.sh], [chmod +x rgmanager/src/resources/ASEHAagent.sh]) AC_CONFIG_FILES([rgmanager/src/resources/apache.sh], [chmod +x rgmanager/src/resources/apache.sh]) AC_CONFIG_FILES([rgmanager/src/resources/bind-mount.sh], [chmod +x rgmanager/src/resources/bind-mount.sh]) AC_CONFIG_FILES([rgmanager/src/resources/clusterfs.sh], [chmod +x rgmanager/src/resources/clusterfs.sh]) AC_CONFIG_FILES([rgmanager/src/resources/db2.sh], [chmod +x rgmanager/src/resources/db2.sh]) AC_CONFIG_FILES([rgmanager/src/resources/drbd.sh], [chmod +x rgmanager/src/resources/drbd.sh]) AC_CONFIG_FILES([rgmanager/src/resources/fs.sh], [chmod +x rgmanager/src/resources/fs.sh]) AC_CONFIG_FILES([rgmanager/src/resources/ip.sh], [chmod +x rgmanager/src/resources/ip.sh]) AC_CONFIG_FILES([rgmanager/src/resources/lvm.sh], [chmod +x rgmanager/src/resources/lvm.sh]) AC_CONFIG_FILES([rgmanager/src/resources/mysql.sh], [chmod +x rgmanager/src/resources/mysql.sh]) AC_CONFIG_FILES([rgmanager/src/resources/named.sh], [chmod +x rgmanager/src/resources/named.sh]) AC_CONFIG_FILES([rgmanager/src/resources/netfs.sh], [chmod +x rgmanager/src/resources/netfs.sh]) AC_CONFIG_FILES([rgmanager/src/resources/nfsclient.sh], [chmod +x rgmanager/src/resources/nfsclient.sh]) AC_CONFIG_FILES([rgmanager/src/resources/nfsexport.sh], [chmod +x rgmanager/src/resources/nfsexport.sh]) AC_CONFIG_FILES([rgmanager/src/resources/nfsserver.sh], [chmod +x rgmanager/src/resources/nfsserver.sh]) AC_CONFIG_FILES([rgmanager/src/resources/openldap.sh], [chmod +x rgmanager/src/resources/openldap.sh]) AC_CONFIG_FILES([rgmanager/src/resources/oracledb.sh], [chmod +x rgmanager/src/resources/oracledb.sh]) AC_CONFIG_FILES([rgmanager/src/resources/oradg.sh], [chmod +x rgmanager/src/resources/oradg.sh]) AC_CONFIG_FILES([rgmanager/src/resources/orainstance.sh], [chmod +x rgmanager/src/resources/orainstance.sh]) AC_CONFIG_FILES([rgmanager/src/resources/oralistener.sh], [chmod +x rgmanager/src/resources/oralistener.sh]) AC_CONFIG_FILES([rgmanager/src/resources/postgres-8.sh], [chmod +x rgmanager/src/resources/postgres-8.sh]) AC_CONFIG_FILES([rgmanager/src/resources/samba.sh], [chmod +x rgmanager/src/resources/samba.sh]) AC_CONFIG_FILES([rgmanager/src/resources/script.sh], [chmod +x rgmanager/src/resources/script.sh]) AC_CONFIG_FILES([rgmanager/src/resources/service.sh], [chmod +x rgmanager/src/resources/service.sh]) AC_CONFIG_FILES([rgmanager/src/resources/smb.sh], [chmod +x rgmanager/src/resources/smb.sh]) AC_CONFIG_FILES([rgmanager/src/resources/tomcat-5.sh], [chmod +x rgmanager/src/resources/tomcat-5.sh]) AC_CONFIG_FILES([rgmanager/src/resources/tomcat-6.sh], [chmod +x rgmanager/src/resources/tomcat-6.sh]) AC_CONFIG_FILES([rgmanager/src/resources/vm.sh], [chmod +x rgmanager/src/resources/vm.sh]) dnl Now process the entire list of files added by previous dnl calls to AC_CONFIG_FILES() AC_OUTPUT() dnl ***************** dnl Configure summary dnl ***************** AC_MSG_RESULT([]) AC_MSG_RESULT([$PACKAGE configuration:]) AC_MSG_RESULT([ Version = ${VERSION}]) AC_MSG_RESULT([ Build Version = $Format:%H$]) AC_MSG_RESULT([]) AC_MSG_RESULT([ Prefix = ${prefix}]) AC_MSG_RESULT([ Executables = ${sbindir}]) AC_MSG_RESULT([ Man pages = ${mandir}]) AC_MSG_RESULT([ Libraries = ${libdir}]) AC_MSG_RESULT([ Header files = ${includedir}]) AC_MSG_RESULT([ Arch-independent files = ${datadir}]) AC_MSG_RESULT([ Documentation = ${docdir}]) AC_MSG_RESULT([ State information = ${localstatedir}]) AC_MSG_RESULT([ System configuration = ${sysconfdir}]) AC_MSG_RESULT([ HA_BIN directory prefix = ${libexecdir}]) AC_MSG_RESULT([ RA state files = ${HA_RSCTMPDIR}]) AC_MSG_RESULT([ AIS Plugins = ${LCRSODIR}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ CPPFLAGS = ${CPPFLAGS}]) AC_MSG_RESULT([ CFLAGS = ${CFLAGS}]) AC_MSG_RESULT([ Libraries = ${LIBS}]) AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 index 27cae2d11..489826b81 100755 --- a/heartbeat/IPaddr2 +++ b/heartbeat/IPaddr2 @@ -1,1386 +1,1386 @@ #!/bin/sh # # $Id: IPaddr2.in,v 1.24 2006/08/09 13:01:54 lars Exp $ # # OCF Resource Agent compliant IPaddr2 script. # # Based on work by Tuomo Soini, ported to the OCF RA API by Lars # Marowsky-Brée. Implements Cluster Alias IP functionality too. # # Cluster Alias IP cleanup, fixes and testing by Michael Schwartzkopff # # # Copyright (c) 2003 Tuomo Soini # Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # TODO: # - There ought to be an ocf_run_cmd function which does all logging, # timeout handling etc for us # - Make this the standard IP address agent on Linux; the other # platforms simply should ignore the additional parameters OR can use # the legacy heartbeat resource script... # - Check LVS <-> clusterip incompatibilities. # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_iflabel # OCF_RESKEY_mac # OCF_RESKEY_clusterip_hash # OCF_RESKEY_arp_interval # OCF_RESKEY_arp_count # OCF_RESKEY_arp_bg # OCF_RESKEY_preferred_lft # # OCF_RESKEY_CRM_meta_clone # OCF_RESKEY_CRM_meta_clone_max ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/findif.sh # Defaults OCF_RESKEY_ip_default="" OCF_RESKEY_cidr_netmask_default="" OCF_RESKEY_broadcast_default="" OCF_RESKEY_proto_default="" OCF_RESKEY_iflabel_default="" OCF_RESKEY_lvs_support_default=false OCF_RESKEY_lvs_ipv6_addrlabel_default=true OCF_RESKEY_lvs_ipv6_addrlabel_value_default=99 OCF_RESKEY_clusterip_hash_default="sourceip-sourceport" OCF_RESKEY_mac_default="" OCF_RESKEY_unique_clone_address_default=false OCF_RESKEY_arp_interval_default=200 OCF_RESKEY_arp_count_default=5 OCF_RESKEY_arp_count_refresh_default=0 OCF_RESKEY_arp_bg_default="" OCF_RESKEY_arp_sender_default="" OCF_RESKEY_send_arp_opts_default="" OCF_RESKEY_flush_routes_default="false" OCF_RESKEY_run_arping_default=false OCF_RESKEY_nodad_default=false OCF_RESKEY_noprefixroute_default="false" OCF_RESKEY_preferred_lft_default="forever" OCF_RESKEY_network_namespace_default="" : ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} : ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}} : ${OCF_RESKEY_broadcast=${OCF_RESKEY_broadcast_default}} : ${OCF_RESKEY_proto=${OCF_RESKEY_proto_default}} : ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}} : ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} : ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}} : ${OCF_RESKEY_lvs_ipv6_addrlabel_value=${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}} : ${OCF_RESKEY_clusterip_hash=${OCF_RESKEY_clusterip_hash_default}} : ${OCF_RESKEY_mac=${OCF_RESKEY_mac_default}} : ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}} : ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}} : ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}} : ${OCF_RESKEY_arp_count_refresh=${OCF_RESKEY_arp_count_refresh_default}} : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} : ${OCF_RESKEY_arp_sender=${OCF_RESKEY_arp_sender_default}} : ${OCF_RESKEY_send_arp_opts=${OCF_RESKEY_send_arp_opts_default}} : ${OCF_RESKEY_flush_routes=${OCF_RESKEY_flush_routes_default}} : ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}} : ${OCF_RESKEY_nodad=${OCF_RESKEY_nodad_default}} : ${OCF_RESKEY_noprefixroute=${OCF_RESKEY_noprefixroute_default}} : ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}} : ${OCF_RESKEY_network_namespace=${OCF_RESKEY_network_namespace_default}} ####################################################################### [ -z "$OCF_RESKEY_proto" ] && proto="" || proto="proto $OCF_RESKEY_proto" SENDARP=$HA_BIN/send_arp SENDUA=$HA_BIN/send_ua FINDIF=findif VLDIR=$HA_RSCTMP SENDARPPIDDIR=$HA_RSCTMP CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip} IPADDR2_CIP_IPTABLES=$IPTABLES ####################################################################### meta_data() { cat < 1.0 This Linux-specific resource manages IP alias IP addresses. It can add an IP alias, or remove one. In addition, it can implement Cluster Alias IP functionality if invoked as a clone resource. If used as a clone, "shared address with a trivial, stateless (autonomous) load-balancing/mutual exclusion on ingress" mode gets applied (as opposed to "assume resource uniqueness" mode otherwise). For that, Linux firewall (kernel and userspace) is assumed, and since recent distributions are ambivalent in plain "iptables" command to particular back-end resolution, "iptables-legacy" (when present) gets prioritized so as to avoid incompatibilities (note that respective ipt_CLUSTERIP firewall extension in use here is, at the same time, marked deprecated, yet said "legacy" layer can make it workable, literally, to this day) with "netfilter" one (as in "iptables-nft"). In that case, you should explicitly set clone-node-max >= 2, and/or clone-max < number of nodes. In case of node failure, clone instances need to be re-allocated on surviving nodes. This would not be possible if there is already an instance on those nodes, and clone-node-max=1 (which is the default). When the specified IP address gets assigned to a respective interface, the resource agent sends unsolicited ARP (Address Resolution Protocol, IPv4) or NA (Neighbor Advertisement, IPv6) packets to inform neighboring machines about the change. This functionality is controlled for both IPv4 and IPv6 by shared 'arp_*' parameters. Manages virtual IPv4 and IPv6 addresses (Linux specific version) The IPv4 (dotted quad notation) or IPv6 address (colon hexadecimal notation) example IPv4 "192.168.1.1". example IPv6 "2001:db8:DC28:0:0:FC57:D4C8:1FFF". IPv4 or IPv6 address The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. If you want a label, see the iflabel parameter. Prerequisite: There must be at least one static IP address, which is not managed by the cluster, assigned to the network interface. If you can not assign any static IP address on the interface, modify this kernel parameter: sysctl -w net.ipv4.conf.all.promote_secondaries=1 # (or per device) Network interface The netmask for the interface in CIDR format (e.g., 24 and not 255.255.255.0) If unspecified, the script will also try to determine this from the routing table. CIDR netmask Broadcast address associated with the IP. It is possible to use the special symbols '+' and '-' instead of the broadcast address. In this case, the broadcast address is derived by setting/resetting the host bits of the interface prefix. Broadcast address Proto to match when finding network. E.g. "kernel". Proto You can specify an additional label for your IP address here. This label is appended to your interface name. The kernel allows alphanumeric labels up to a maximum length of 15 characters including the interface name and colon (e.g. eth0:foobar1234) A label can be specified in nic parameter but it is deprecated. If a label is specified in nic name, this parameter has no effect. Interface label Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Notes for IPv6: It is not necessary to enable this option on IPv6. Instead, enable 'lvs_ipv6_addrlabel' option for LVS-DR usage on IPv6. Enable support for LVS DR Enable adding IPv6 address label so IPv6 traffic originating from the address's interface does not use this address as the source. This is necessary for LVS-DR health checks to realservers to work. Without it, the most recently added IPv6 address (probably the address added by IPaddr2) will be used as the source address for IPv6 traffic from that interface and since that address exists on loopback on the realservers, the realserver response to pings/connections will never leave its loopback. See RFC3484 for the detail of the source address selection. See also 'lvs_ipv6_addrlabel_value' parameter. Enable adding IPv6 address label. Specify IPv6 address label value used when 'lvs_ipv6_addrlabel' is enabled. The value should be an unused label in the policy table which is shown by 'ip addrlabel list' command. You would rarely need to change this parameter. IPv6 address label value. Set the interface MAC address explicitly. Currently only used in case of the Cluster IP Alias. Leave empty to chose automatically. Cluster IP MAC address Specify the hashing algorithm used for the Cluster IP functionality. Cluster IP hashing function If true, add the clone ID to the supplied value of IP to create a unique address to manage Create a unique address for cloned instances Specify the interval between unsolicited ARP (IPv4) or NA (IPv6) packets in milliseconds. This parameter is deprecated and used for the backward compatibility only. It is effective only for the send_arp binary which is built with libnet, and send_ua for IPv6. It has no effect for other arp_sender. ARP/NA packet interval in ms (deprecated) Number of unsolicited ARP (IPv4) or NA (IPv6) packets to send at resource initialization. ARP/NA packet count sent during initialization For IPv4, number of unsolicited ARP packets to send during resource monitoring. Doing so helps mitigate issues of stuck ARP caches resulting from split-brain situations. ARP packet count sent during monitoring Whether or not to send the ARP (IPv4) or NA (IPv6) packets in the background. The default is true for IPv4 and false for IPv6. ARP/NA from background For IPv4, the program to send ARP packets with on start. Available options are: - send_arp: default - ipoibarping: default for infiniband interfaces if ipoibarping is available - iputils_arping: use arping in iputils package - libnet_arping: use another variant of arping based on libnet ARP sender For IPv4, extra options to pass to the arp_sender program. Available options are vary depending on which arp_sender is used. A typical use case is specifying '-A' for iputils_arping to use ARP REPLY instead of ARP REQUEST as Gratuitous ARPs. Options for ARP sender Flush the routing table on stop. This is for applications which use the cluster IP address and which run on the same physical host that the IP address lives on. The Linux kernel may force that application to take a shortcut to the local loopback interface, instead of the interface the address is really bound to. Under those circumstances, an application may, somewhat unexpectedly, continue to use connections for some time even after the IP address is deconfigured. Set this parameter in order to immediately disable said shortcut when the IP address goes away. Flush kernel routing table on stop For IPv4, whether or not to run arping for collision detection check. Run arping for IPv4 collision detection check For IPv6, do not perform Duplicate Address Detection when adding the address. Use nodad flag Use noprefixroute flag (see 'man ip-address'). Use noprefixroute flag For IPv6, set the preferred lifetime of the IP address. This can be used to ensure that the created IP address will not be used as a source address for routing. Expects a value as specified in section 5.5.4 of RFC 4862. IPv6 preferred lifetime Specifies the network namespace to operate within. The namespace must already exist, and the interface to be used must be within the namespace. Network namespace to use END exit $OCF_SUCCESS } ip_init() { local rc if [ X`uname -s` != "XLinux" ]; then ocf_exit_reason "IPaddr2 only supported Linux." exit $OCF_ERR_INSTALLED fi if [ X"$OCF_RESKEY_ip" = "X" ] && [ "$__OCF_ACTION" != "stop" ]; then ocf_exit_reason "IP address (the ip parameter) is mandatory" exit $OCF_ERR_CONFIGURED fi if case $__OCF_ACTION in start|stop) ocf_is_root;; *) true;; esac then : YAY! else ocf_exit_reason "You must be root for $__OCF_ACTION operation." exit $OCF_ERR_PERM fi BASEIP="$OCF_RESKEY_ip" BRDCAST="$OCF_RESKEY_broadcast" NIC="$OCF_RESKEY_nic" # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. Don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi NETMASK="$OCF_RESKEY_cidr_netmask" IFLABEL="$OCF_RESKEY_iflabel" IF_MAC="$OCF_RESKEY_mac" IP_INC_GLOBAL=${OCF_RESKEY_CRM_meta_clone_max:-1} IP_INC_NO=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + 1` if ocf_is_true ${OCF_RESKEY_lvs_support} && [ $IP_INC_GLOBAL -gt 1 ]; then ocf_exit_reason "LVS and load sharing do not go together well" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then : else ocf_exit_reason "Invalid meta-attribute clone_max [$IP_INC_GLOBAL], should be positive integer" exit $OCF_ERR_CONFIGURED fi echo $OCF_RESKEY_ip | grep -qs ":" if [ $? -ne 0 ];then FAMILY=inet if [ -z "$OCF_RESKEY_arp_bg" ]; then OCF_RESKEY_arp_bg=true fi else FAMILY=inet6 # address sanitization defined in RFC5952 SANITIZED_IP=$($IP2UTIL route get $OCF_RESKEY_ip 2> /dev/null | awk '$1~/:/ {print $1} $2~/:/ {print $2}') if [ -n "$SANITIZED_IP" ]; then OCF_RESKEY_ip="$SANITIZED_IP" fi if ocf_is_true $OCF_RESKEY_lvs_support ;then ocf_exit_reason "The IPv6 does not support lvs_support" exit $OCF_ERR_CONFIGURED fi if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then if ocf_is_decimal "$OCF_RESKEY_lvs_ipv6_addrlabel_value" && [ $OCF_RESKEY_lvs_ipv6_addrlabel_value -ge 0 ]; then : else ocf_exit_reason "Invalid lvs_ipv6_addrlabel_value [$OCF_RESKEY_lvs_ipv6_addrlabel_value], should be positive integer" exit $OCF_ERR_CONFIGURED fi fi if [ -z "$OCF_RESKEY_arp_bg" ]; then OCF_RESKEY_arp_bg=false fi fi # support nic:iflabel format in nic parameter case $NIC in *:*) IFLABEL=`echo $NIC | sed 's/[^:]*://'` NIC=`echo $NIC | sed 's/:.*//'` # only the base name should be passed to findif OCF_RESKEY_nic=$NIC ;; esac # $FINDIF takes its parameters from the environment # NICINFO=`$FINDIF` rc=$? if [ $rc -eq 0 ] then NICINFO=`echo "$NICINFO" | sed -e 's/netmask\ //;s/broadcast\ //;s/metric\ //'` NIC=`echo "$NICINFO" | cut -d" " -f1` NETMASK=`echo "$NICINFO" | cut -d" " -f2` BRDCAST=`echo "$NICINFO" | cut -d" " -f3` METRIC=`echo "$NICINFO" | cut -d" " -f4` else # findif couldn't find the interface if ocf_is_probe; then ocf_log info "[$FINDIF] failed" exit $OCF_NOT_RUNNING elif [ "$__OCF_ACTION" = stop ]; then ocf_log warn "[$FINDIF] failed" exit $OCF_SUCCESS else ocf_exit_reason "[$FINDIF] failed" exit $rc fi fi SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" if [ -n "$IFLABEL" ]; then IFLABEL=${NIC}:${IFLABEL} if [ ${#IFLABEL} -gt 15 ]; then ocf_exit_reason "Interface label [$IFLABEL] exceeds maximum character limit of 15" exit $OCF_ERR_CONFIGURED fi fi if [ "$IP_INC_GLOBAL" -gt 1 ] && ! ocf_is_true "$OCF_RESKEY_unique_clone_address"; then IP_CIP="yes" IP_CIP_HASH="${OCF_RESKEY_clusterip_hash}" if [ -z "$IF_MAC" ]; then # Choose a MAC # 1. Concatenate some input together # 2. This doesn't need to be a cryptographically # secure hash. # 3. Drop everything after the first 6 octets (12 chars) # 4. Delimit the octets with ':' # 5. Make sure the first octet is odd, # so the result is a multicast MAC IF_MAC=`echo $OCF_RESKEY_ip $NETMASK $BRDCAST | \ md5sum | \ sed -e 's#\(............\).*#\1#' \ -e 's#..#&:#g; s#:$##' \ -e 's#^\(.\)[02468aAcCeE]#\11#'` fi IP_CIP_FILE="/proc/net/ipt_CLUSTERIP/$OCF_RESKEY_ip" fi } # # Find out which interfaces serve the given IP address and netmask. # The arguments are an IP address and a netmask. # Its output are interface names devided by spaces (e.g., "eth0 eth1"). # find_interface() { local ipaddr="$1" local netmask="$2" # # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces # local iface="`$IP2UTIL -o -f $FAMILY addr show \ | grep " $ipaddr/$netmask" \ | cut -d ' ' -f2 \ | grep -v '^ipsec[0-9][0-9]*$'`" echo "$iface" return 0 } # # Delete an interface # delete_interface () { ipaddr="$1" iface="$2" netmask="$3" CMD="$IP2UTIL -f $FAMILY addr delete $ipaddr/$netmask dev $iface" ocf_run $CMD || return $OCF_ERR_GENERIC if ocf_is_true $OCF_RESKEY_flush_routes; then ocf_run $IP2UTIL route flush cache fi if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then delete_ipv6_addrlabel $ipaddr fi return $OCF_SUCCESS } # # Add an interface # add_interface () { local cmd msg extra_opts ipaddr netmask broadcast iface label metric ipaddr="$1" netmask="$2" broadcast="$3" iface="$4" label="$5" metric="$6" if [ "$FAMILY" = "inet" ] && ocf_is_true $OCF_RESKEY_run_arping && check_binary arping; then arping -q -c 2 -w 3 -D -I $iface $ipaddr if [ $? = 1 ]; then ocf_log err "IPv4 address collision $ipaddr [DAD]" return $OCF_ERR_GENERIC fi fi if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then add_ipv6_addrlabel $ipaddr fi cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask dev $iface" msg="Adding $FAMILY address $ipaddr/$netmask to device $iface" if [ "$broadcast" != "none" ]; then cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask brd $broadcast dev $iface" msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface" fi extra_opts="" if [ "$FAMILY" = "inet6" ] && [ -n "$metric" ]; then extra_opts="$extra_opts metric $metric" fi if [ "$FAMILY" = "inet6" ] && ocf_is_true "${OCF_RESKEY_nodad}"; then extra_opts="$extra_opts nodad" fi if ocf_is_true "${OCF_RESKEY_noprefixroute}"; then extra_opts="$extra_opts noprefixroute" fi if [ ! -z "$label" ]; then extra_opts="$extra_opts label $label" fi if [ "$FAMILY" = "inet6" ] ;then extra_opts="$extra_opts preferred_lft $OCF_RESKEY_preferred_lft" fi if [ -n "$extra_opts" ]; then cmd="$cmd$extra_opts" msg="$msg (with$extra_opts)" fi ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC msg="Bringing device $iface up" cmd="$IP2UTIL link set dev $iface up" ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC return $OCF_SUCCESS } # # Delete a route # delete_route () { prefix="$1" iface="$2" CMD="$IP2UTIL route delete $prefix dev $iface" ocf_log info "$CMD" $CMD return $? } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # # TODO: This is very ugly and should be controlled by an additional # instance parameter. Or even: multi-state, with the IP only being # "active" on the master!? # remove_conflicting_loopback() { ipaddr="$1" netmask="$2" broadcast="$3" ifname="$4" ocf_log info "Removing conflicting loopback $ifname." if echo "$ipaddr $netmask $broadcast $ifname" > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi delete_interface "$ipaddr" "$ifname" "$netmask" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" "$ifname" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by remove_conflicting_loopback() # restore_loopback() { ipaddr="$1" if [ -s "$VLDIR/$ipaddr" ]; then ifinfo=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address " \ "$ifinfo." add_interface $ifinfo rm -f "$VLDIR/$ipaddr" fi } add_ipv6_addrlabel() { local cmd ipaddr value ipaddr="$1" value="$OCF_RESKEY_lvs_ipv6_addrlabel_value" cmd="$IP2UTIL addrlabel add prefix $ipaddr label $value" ocf_log info "Adding IPv6 address label prefix $ipaddr label $value" ocf_run $cmd || ocf_log warn "$cmd failed." } delete_ipv6_addrlabel() { local cmd ipaddr value ipaddr="$1" value="$OCF_RESKEY_lvs_ipv6_addrlabel_value" cmd="$IP2UTIL addrlabel del prefix $ipaddr label $value" ocf_run $cmd # an error can be ignored } is_infiniband() { $IP2UTIL link show $NIC | grep link/infiniband >/dev/null } log_arp_sender() { local cmdline local output local rc cmdline="$@" output=$($cmdline 2>&1) rc=$? if [ $rc -ne 0 ] && \ [ "$ARP_SENDER" != "libnet_arping" ] ; then # libnet_arping always return an error as no answers ocf_log err "Could not send gratuitous arps: rc=$rc" fi ocf_log $LOGLEVEL "$output" } # wrapper function to manage PID file to run arping in background run_with_pidfile() { local cmdline local pid local rc cmdline="$@" $cmdline & pid=$! echo "$pid" > $SENDARPPIDFILE wait $pid rc=$? rm -f $SENDARPPIDFILE return $rc } build_arp_sender_cmd() { case "$ARP_SENDER" in send_arp) if [ "x$IP_CIP" = "xyes" ] ; then if [ x = "x$IF_MAC" ] ; then MY_MAC=auto else # send_arp.linux should return without doing anything in this case MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'` fi else MY_MAC=auto fi ARGS="$OCF_RESKEY_send_arp_opts -i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" ARP_SENDER_CMD="$SENDARP $ARGS" ;; iputils_arping) ARGS="$OCF_RESKEY_send_arp_opts -U -c $ARP_COUNT -I $NIC $OCF_RESKEY_ip" ARP_SENDER_CMD="run_with_pidfile arping $ARGS" ;; libnet_arping) ARGS="$OCF_RESKEY_send_arp_opts -U -c $ARP_COUNT -i $NIC -S $OCF_RESKEY_ip $OCF_RESKEY_ip" ARP_SENDER_CMD="run_with_pidfile arping $ARGS" ;; ipoibarping) ARGS="-q -c $ARP_COUNT -U -I $NIC $OCF_RESKEY_ip" ARP_SENDER_CMD="ipoibarping $ARGS" ;; *) # should not occur ocf_exit_reason "unrecognized arp_sender value: $ARP_SENDER" exit $OCF_ERR_GENERIC ;; esac } # # Send Unsolicited ARPs to update neighbor's ARP cache # run_arp_sender() { if [ "x$1" = "xrefresh" ] ; then ARP_COUNT=$OCF_RESKEY_arp_count_refresh LOGLEVEL=debug else ARP_COUNT=$OCF_RESKEY_arp_count LOGLEVEL=info fi if [ $ARP_COUNT -eq 0 ] ; then return fi # do not need to send Gratuitous ARPs in the Cluster IP configuration # except send_arp.libnet binary to retain the old behavior if [ "x$IP_CIP" = "xyes" ] && \ [ "x$ARP_SENDER" != "xsend_arp" ] ; then ocf_log info "Gratuitous ARPs are not sent in the Cluster IP configuration" return fi # prepare arguments for each arp sender program # $ARP_SENDER_CMD should be set build_arp_sender_cmd ocf_log $LOGLEVEL "$ARP_SENDER_CMD" if ocf_is_true $OCF_RESKEY_arp_bg; then log_arp_sender $ARP_SENDER_CMD & else log_arp_sender $ARP_SENDER_CMD fi } log_send_ua() { local cmdline local output local rc cmdline="$@" output=$($cmdline 2>&1) rc=$? if [ $rc -ne 0 ] ; then ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements: rc=$rc" fi ocf_log info "$output" return $rc } # # Run send_ua to note send ICMPv6 Unsolicited Neighbor Advertisements. # run_send_ua() { local i # Duplicate Address Detection [DAD] # Kernel will flag the IP as 'tentative' until it ensured that # there is no duplicates. # If there is, it will flag it as 'dadfailed' for i in $(seq 1 10); do ipstatus=$($IP2UTIL -o -f $FAMILY addr show dev $NIC to $OCF_RESKEY_ip/$NETMASK) case "$ipstatus" in *dadfailed*) ocf_log err "IPv6 address collision $OCF_RESKEY_ip [DAD]" $IP2UTIL -f $FAMILY addr del dev $NIC $OCF_RESKEY_ip/$NETMASK if [ $? -ne 0 ]; then ocf_log err "Could not delete IPv6 address" fi return $OCF_ERR_GENERIC ;; *tentative*) if [ $i -eq 10 ]; then ocf_log warn "IPv6 address : DAD is still in tentative" fi ;; *) break ;; esac sleep 1 done # Now the address should be usable ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC" ocf_log info "$SENDUA $ARGS" if ocf_is_true $OCF_RESKEY_arp_bg; then log_send_ua $SENDUA $ARGS & else log_send_ua $SENDUA $ARGS fi } # Do we already serve this IP address on the given $NIC? # # returns: # ok = served (for CIP: + hash bucket) # partial = served and no hash bucket (CIP only) # partial2 = served and no CIP iptables rule # partial3 = served with no label # no = nothing # ip_served() { if [ -z "$NIC" ]; then # no nic found or specified echo "no" return 0 fi cur_nic="`find_interface $OCF_RESKEY_ip $NETMASK`" if [ -z "$cur_nic" ]; then echo "no" return 0 fi if [ -z "$IP_CIP" ]; then for i in $cur_nic; do # check address label if [ -n "$IFLABEL" ] && [ -z "`$IP2UTIL -o -f $FAMILY addr show $nic label $IFLABEL`" ]; then echo partial3 return 0 fi # only mark as served when on the same interfaces as $NIC [ "$i" = "$NIC" ] || continue echo "ok" return 0 done # There used to be logic here to pretend "not served", # if ${OCF_RESKEY_lvs_support} was enabled, and the IP was # found active on "lo*" only. With lvs_support on, you should # have NIC != lo, so thats already filtered # by the continue above. echo "no" return 0 fi # Special handling for the CIP: if [ ! -e $IP_CIP_FILE ]; then echo "partial2" return 0 fi - if egrep -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then + if $EGREP -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then echo "ok" return 0 else echo "partial" return 0 fi exit $OCF_ERR_GENERIC } ####################################################################### ip_usage() { cat <$IP_CIP_FILE fi if [ "$ip_status" = "no" ]; then if ocf_is_true ${OCF_RESKEY_lvs_support}; then for i in `find_interface $OCF_RESKEY_ip 32`; do case $i in lo*) remove_conflicting_loopback $OCF_RESKEY_ip 32 255.255.255.255 lo ;; esac done fi add_interface "$OCF_RESKEY_ip" "$NETMASK" "${BRDCAST:-none}" "$NIC" "$IFLABEL" "$METRIC" rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "Failed to add $OCF_RESKEY_ip" exit $rc fi ip_status=`ip_served` if [ "$ip_status" != "ok" ]; then ocf_exit_reason "Failed to add $OCF_RESKEY_ip with error $ip_status" exit $OCF_ERR_GENERIC fi fi case $NIC in lo*) : no need to run send_arp on loopback ;; *) if [ $FAMILY = "inet" ];then run_arp_sender else if [ -x $SENDUA ]; then run_send_ua if [ $? -ne 0 ]; then ocf_exit_reason "run_send_ua failed." exit $OCF_ERR_GENERIC fi fi fi ;; esac exit $OCF_SUCCESS } ip_stop() { local ip_del_if="yes" if [ -n "$IP_CIP" ]; then # Cluster IPs need special processing when the last bucket # is removed from the node... take a lock to make sure only one # process executes that code ocf_take_lock $CIP_lockfile ocf_release_lock_on_exit $CIP_lockfile fi if [ -f "$SENDARPPIDFILE" ] ; then kill `cat "$SENDARPPIDFILE"` if [ $? -ne 0 ]; then ocf_log warn "Could not kill previously running send_arp for $OCF_RESKEY_ip" else ocf_log info "killed previously running send_arp for $OCF_RESKEY_ip" fi rm -f "$SENDARPPIDFILE" fi local ip_status=`ip_served` ocf_log info "IP status = $ip_status, IP_CIP=$IP_CIP" if [ $ip_status = "no" ]; then : Requested interface not in use exit $OCF_SUCCESS fi if [ -n "$IP_CIP" ] && [ $ip_status != "partial2" ]; then if [ $ip_status = "partial" ]; then exit $OCF_SUCCESS fi echo "-$IP_INC_NO" >$IP_CIP_FILE if [ "x$(cat $IP_CIP_FILE)" = "x" ]; then ocf_log info $OCF_RESKEY_ip, $IP_CIP_HASH i=1 while [ $i -le $IP_INC_GLOBAL ]; do ocf_log info $i $IPADDR2_CIP_IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ --new \ --clustermac $IF_MAC \ --total-nodes $IP_INC_GLOBAL \ --local-node $i \ --hashmode $IP_CIP_HASH i=`expr $i + 1` done else ip_del_if="no" fi fi if [ "$ip_del_if" = "yes" ]; then delete_interface $OCF_RESKEY_ip $NIC $NETMASK if [ $? -ne 0 ]; then ocf_exit_reason "Unable to remove IP [${OCF_RESKEY_ip} from interface [ $NIC ]" exit $OCF_ERR_GENERIC fi if ocf_is_true ${OCF_RESKEY_lvs_support}; then restore_loopback "$OCF_RESKEY_ip" fi fi exit $OCF_SUCCESS } ip_monitor() { # TODO: Implement more elaborate monitoring like checking for # interface health maybe via a daemon like FailSafe etc... local ip_status=`ip_served` case $ip_status in ok) run_arp_sender refresh return $OCF_SUCCESS ;; no) exit $OCF_NOT_RUNNING ;; *) # Errors on this interface? return $OCF_ERR_GENERIC ;; esac } # make sure that we have something to send ARPs with set_send_arp_program() { ARP_SENDER=send_arp if [ -n "$OCF_RESKEY_arp_sender" ]; then case "$OCF_RESKEY_arp_sender" in send_arp) check_binary $SENDARP ;; iputils_arping) check_binary arping ;; libnet_arping) check_binary arping ;; ipoibarping) check_binary ipoibarping ;; *) ocf_exit_reason "unrecognized arp_sender value: $OCF_RESKEY_arp_sender" exit $OCF_ERR_CONFIGURED ;; esac ARP_SENDER="$OCF_RESKEY_arp_sender" else if is_infiniband; then ARP_SENDER=ipoibarping if ! have_binary ipoibarping; then [ "$__OCF_ACTION" = start ] && ocf_log warn "using send_arp for infiniband because ipoibarping is not available (set arp_sender to \"send_arp\" to suppress this message)" check_binary $SENDARP ARP_SENDER=send_arp fi fi fi } ip_validate() { check_binary $IP2UTIL IP_CIP= if [ -n "$OCF_RESKEY_network_namespace" ]; then OCF_RESKEY_network_namespace= exec $IP2UTIL netns exec "$OCF_RESKEY_network_namespace" "$0" "$__OCF_ACTION" fi ip_init set_send_arp_program if [ -n "$IP_CIP" ]; then if have_binary "$IPTABLES_LEGACY"; then IPADDR2_CIP_IPTABLES="$IPTABLES_LEGACY" fi check_binary "$IPADDR2_CIP_IPTABLES" check_binary $MODPROBE fi # $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init, # do not bother here. if ocf_is_true "$OCF_RESKEY_unique_clone_address" && ! ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then ocf_exit_reason "unique_clone_address makes sense only with meta globally_unique set" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_interval" && [ $OCF_RESKEY_arp_interval -gt 0 ]; then : else ocf_exit_reason "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_count" && [ $OCF_RESKEY_arp_count -gt 0 ]; then : else ocf_exit_reason "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_preferred_lft" ]; then ocf_exit_reason "Empty value is invalid for OCF_RESKEY_preferred_lft" exit $OCF_ERR_CONFIGURED fi if [ -n "$IP_CIP" ]; then local valid=1 case $IP_CIP_HASH in sourceip|sourceip-sourceport|sourceip-sourceport-destport) ;; *) ocf_exit_reason "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]" exit $OCF_ERR_CONFIGURED ;; esac if ocf_is_true ${OCF_RESKEY_lvs_support}; then ocf_exit_reason "LVS and load sharing not advised to try" exit $OCF_ERR_CONFIGURED fi case $IF_MAC in [0-9a-zA-Z][13579bBdDfF][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]) ;; *) valid=0 ;; esac if [ $valid -eq 0 ]; then ocf_exit_reason "Invalid IF_MAC [$IF_MAC]" exit $OCF_ERR_CONFIGURED fi fi } if ocf_is_true "$OCF_RESKEY_unique_clone_address"; then prefix=`echo $OCF_RESKEY_ip | awk -F. '{print $1"."$2"."$3}'` suffix=`echo $OCF_RESKEY_ip | awk -F. '{print $4}'` suffix=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + $suffix` OCF_RESKEY_ip="$prefix.$suffix" fi case $__OCF_ACTION in meta-data) meta_data ;; usage|help) ip_usage exit $OCF_SUCCESS ;; esac ip_validate case $__OCF_ACTION in start) ip_start ;; stop) ip_stop ;; status) ip_status=`ip_served` if [ $ip_status = "ok" ]; then echo "running" exit $OCF_SUCCESS else echo "stopped" exit $OCF_NOT_RUNNING fi ;; monitor) ip_monitor ;; validate-all) ;; *) ip_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac # vi:sw=4:ts=8: diff --git a/heartbeat/ManageVE.in b/heartbeat/ManageVE.in index f07ca5bdc..540addd94 100644 --- a/heartbeat/ManageVE.in +++ b/heartbeat/ManageVE.in @@ -1,320 +1,320 @@ #!@BASH_SHELL@ # # ManageVE OCF RA. Manages OpenVZ Virtual Environments (VEs) # # (c) 2006-2010 Matthias Dahl, Florian Haas, # and Linux-HA contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # This OCF compliant resource agent manages OpenVZ VEs and thus requires # a proper OpenVZ installation including a recent vzctl util. # # rev. 1.00.4 # # Changelog # # 21/Oct/10 1.00.4 implement migrate_from/migrate_to # 12/Sep/06 1.00.3 more cleanup # 12/Sep/06 1.00.2 fixed some logic in start_ve # general cleanup all over the place # 11/Sep/06 1.00.1 fixed some typos # 07/Sep/06 1.00.0 it's alive... muahaha... ALIVE... :-) # ### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Parameter defaults OCF_RESKEY_veid_default="" : ${OCF_RESKEY_veid=${OCF_RESKEY_veid_default}} ### # required utilities VZCTL=/usr/sbin/vzctl # # usage() # usage() { cat <<-EOF usage: $0 {start|stop|status|monitor|migrate_from|migrate_to|validate-all|usage|meta-data} EOF } # # meta_data() # meta_data() { cat < 1.0 This OCF compliant resource agent manages OpenVZ VEs and thus requires a proper OpenVZ installation including a recent vzctl util. Manages an OpenVZ Virtual Environment (VE) OpenVZ ID of virtual environment (see output of vzlist -a for all assigned IDs) OpenVZ ID of VE END } # # start_ve() # # Starts a VE, or simply logs a message if the VE is already running. # start_ve() { if status_ve; then ocf_log info "VE $VEID already running." return $OCF_SUCCESS fi ocf_run $VZCTL start $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # stop_ve() # # ATTENTION: The following code relies on vzctl's exit codes, especially: # # 0 : success # # In case any of those exit codes change, this function will need fixing. # stop_ve() { status_ve if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "VE $VEID already stopped." return $OCF_SUCCESS fi ocf_run $VZCTL stop $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # migrate_to_ve() # # In the process of a resource migration, checkpoints the VE. For this # to work, vzctl must obviously create the dump file in a place which # the migration target has access to (an NFS mount, a DRBD device, # etc.). # migrate_to_ve() { if ! status_ve; then ocf_log err "VE $VEID is not running, aborting" exit $OCF_ERR_GENERIC fi ocf_run $VZCTL chkpnt $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # migrate_to_ve() # # In the process of a resource migration, restores the VE. For this to # work, vzctl must obviously have access to the dump file which was # created on the migration source (on an NFS mount, a DRBD device, # etc.). # migrate_from_ve() { ocf_run $VZCTL restore $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # status_ve() # # ATTENTION: The following code relies on vzctl's status output. The fifth # column is interpreted as the VE status (either up or down). # # In case the output format should change, this function will need fixing. # status_ve() { declare -i retcode veexists=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $3}'` vestatus=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $5}'` retcode=$? if [[ $retcode != 0 ]]; then # log error only if expected to find running if [ "$__OCF_ACTION" = "monitor" ] && ! ocf_is_probe; then ocf_log err "vzctl status $VEID returned: $retcode" fi exit $OCF_ERR_GENERIC fi if [[ $veexists != "exist" ]]; then ocf_log err "vzctl status $VEID returned: $VEID does not exist." return $OCF_NOT_RUNNING fi case "$vestatus" in running) return $OCF_SUCCESS ;; down) return $OCF_NOT_RUNNING ;; *) ocf_log err "vzctl status $VEID, wrong output format. (5th column: $vestatus)" exit $OCF_ERR_GENERIC ;; esac } # # validate_all_ve() # # ATTENTION: The following code relies on vzctl's status output. The fifth # column is interpreted as the VE status (either up or down). # # In case the output format should change, this function will need fixing. # validate_all_ve() { declare -i retcode # VEID should be a valid VE `status_ve` retcode=$? if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi return $OCF_SUCCESS } if [[ $# != 1 ]]; then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac # # check relevant environment variables for sanity and security # # empty string? `test -z "$OCF_RESKEY_veid"` declare -i veidtest1=$? # really a number? -`echo "$OCF_RESKEY_veid" | egrep -q '^[[:digit:]]+$'` +`echo "$OCF_RESKEY_veid" | $EGREP -q '^[[:digit:]]+$'` if [[ $veidtest1 != 1 || $? != 0 ]]; then ocf_log err "OCF_RESKEY_veid not set or not a number." exit $OCF_ERR_ARGS fi declare -i VEID=$OCF_RESKEY_veid # # check that all relevant utilities are available # check_binary $VZCTL check_binary $AWK # # finally... let's see what we are ordered to do :-) # case "$1" in start) start_ve ;; stop) stop_ve ;; status|monitor) status_ve ;; migrate_to) migrate_to_ve ;; migrate_from) migrate_from_ve ;; validate-all) validate_all_ve ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance index 26fd54136..95140e9c4 100755 --- a/heartbeat/SAPInstance +++ b/heartbeat/SAPInstance @@ -1,1076 +1,1076 @@ #!/bin/sh # # SAPInstance # # Description: Manages a single SAP Instance as a High-Availability # resource. One SAP Instance is defined by one # SAP Instance-Profile. start/stop handles all services # of the START-Profile, status and monitor care only # about essential services. # # Author: Alexander Krauth, June 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006-2008 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_InstanceName # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_WAITTIME (optional, to solve timing problems during J2EE-Addin start) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery using cleanipc, default is false) # OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor critical services only) # OCF_RESKEY_SHUTDOWN_METHOD (optional, defaults to NORMAL, KILL: terminate the SAP instance with OS commands - faster, at your own risk) # OCF_RESKEY_ERS_InstanceName (optional, InstanceName of the ERS instance in a Promotable configuration) # OCF_RESKEY_ERS_START_PROFILE (optional, START_PROFILE of the ERS instance in a Promotable configuration) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # OCF_RESKEY_IS_ERS (needed for ENQ/REPL NW 740) # OCF_RESKEY_MINIMAL_PROBE (optional but needed for simple mount structure architecure) # # TODO: - Option to shutdown sapstartsrv for non-active instances -> that means: do probes only with OS tools (sapinstance_status) # - Option for better standalone enqueue server monitoring, using ensmon (test enque-deque) # - Option for cleanup abandoned enqueue replication tables # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Parameter defaults OCF_RESKEY_InstanceName_default="" OCF_RESKEY_DIR_EXECUTABLE_default="" OCF_RESKEY_DIR_PROFILE_default="" OCF_RESKEY_START_PROFILE_default="" OCF_RESKEY_START_WAITTIME_default="3600" OCF_RESKEY_AUTOMATIC_RECOVER_default="false" OCF_RESKEY_MONITOR_SERVICES_default="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart|enq_server|enq_replicator" OCF_RESKEY_SHUTDOWN_METHOD_default="normal" OCF_RESKEY_ERS_InstanceName_default="" OCF_RESKEY_ERS_START_PROFILE_default="" OCF_RESKEY_PRE_START_USEREXIT_default="" OCF_RESKEY_POST_START_USEREXIT_default="" OCF_RESKEY_PRE_STOP_USEREXIT_default="" OCF_RESKEY_POST_STOP_USEREXIT_default="" OCF_RESKEY_IS_ERS_default="false" OCF_RESKEY_MINIMAL_PROBE_default="false" : ${OCF_RESKEY_InstanceName=${OCF_RESKEY_InstanceName_default}} : ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} : ${OCF_RESKEY_DIR_PROFILE=${OCF_RESKEY_DIR_PROFILE_default}} : ${OCF_RESKEY_START_PROFILE=${OCF_RESKEY_START_PROFILE_default}} : ${OCF_RESKEY_START_WAITTIME=${OCF_RESKEY_START_WAITTIME_default}} : ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}} : ${OCF_RESKEY_MONITOR_SERVICES=${OCF_RESKEY_MONITOR_SERVICES_default}} : ${OCF_RESKEY_SHUTDOWN_METHOD=${OCF_RESKEY_SHUTDOWN_METHOD_default}} : ${OCF_RESKEY_ERS_InstanceName=${OCF_RESKEY_ERS_InstanceName_default}} : ${OCF_RESKEY_ERS_START_PROFILE=${OCF_RESKEY_ERS_START_PROFILE_default}} : ${OCF_RESKEY_PRE_START_USEREXIT=${OCF_RESKEY_PRE_START_USEREXIT_default}} : ${OCF_RESKEY_POST_START_USEREXIT=${OCF_RESKEY_POST_START_USEREXIT_default}} : ${OCF_RESKEY_PRE_STOP_USEREXIT=${OCF_RESKEY_PRE_STOP_USEREXIT_default}} : ${OCF_RESKEY_POST_STOP_USEREXIT=${OCF_RESKEY_POST_STOP_USEREXIT_default}} : ${OCF_RESKEY_IS_ERS=${OCF_RESKEY_IS_ERS_default}} : ${OCF_RESKEY_IS_MINIMAL_PROBE=${OCF_RESKEY_IS_MINIMAL_PROBE_default}} ####################################################################### SH=/bin/sh sapinstance_usage() { methods=`sapinstance_methods` methods=`echo $methods | tr ' ' '|'` cat <<-EOF usage: $0 ($methods) $0 manages a SAP Instance as an HA resource. The 'start' operation starts the instance or the ERS instance in a Promotable configuration The 'stop' operation stops the instance The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'promote' operation starts the primary instance in a Promotable configuration The 'demote' operation stops the primary instance and starts the ERS instance The 'reload' operation allows changed parameters (non-unique only) without restarting the service The 'notify' operation always returns SUCCESS The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports EOF } sapinstance_meta_data() { cat < 1.0 Usually a SAP system consists of one database and at least one or more SAP instances (sometimes called application servers). One SAP Instance is defined by having exactly one instance profile. The instance profiles can usually be found in the directory /sapmnt/SID/profile. Each instance must be configured as it's own resource in the cluster configuration. The resource agent supports the following SAP versions: - SAP WebAS ABAP Release 6.20 - 7.40 - SAP WebAS Java Release 6.40 - 7.40 - SAP WebAS ABAP + Java Add-In Release 6.20 - 7.40 (Java is not monitored by the cluster in that case) When using a SAP Kernel 6.40 please check and implement the actions from the section "Manual postprocessing" from SAP note 995116 (http://sdn.sap.com). Other versions may also work with this agent, but have not been verified. All operations of the SAPInstance resource agent are done by using the startup framework called SAP Management Console or sapstartsrv that was introduced with SAP kernel release 6.40. Find more information about the SAP Management Console in SAP note 1014480. Using this framework defines a clear interface for the Heartbeat cluster, how it sees the SAP system. The options for monitoring the SAP system are also much better than other methods like just watching the ps command for running processes or doing some pings to the application. sapstartsrv uses SOAP messages to request the status of running SAP processes. Therefore it can actually ask a process itself what it's status is, independent from other problems that might exist at the same time. sapstartsrv knows 4 status colours: - GREEN = everything is fine - YELLOW = something is wrong, but the service is still working - RED = the service does not work - GRAY = the service has not been started The SAPInstance resource agent will interpret GREEN and YELLOW as OK. That means that minor problems will not be reported to the Heartbeat cluster. This prevents the cluster from doing an unwanted failover. The statuses RED and GRAY are reported as NOT_RUNNING to the cluster. Depending on the status the cluster expects from the resource, it will do a restart, failover or just nothing. Manages a SAP instance as an HA resource. The full qualified SAP instance name. e.g. P01_DVEBMGS00_sapp01ci. Usually this is the name of the SAP instance profile. Instance name: SID_INSTANCE_VIR-HOSTNAME The full qualified path where to find sapstartsrv and sapcontrol. Specify this parameter, if you have changed the SAP kernel directory location after the default SAP installation. Path of sapstartsrv and sapcontrol The full qualified path where to find the SAP START profile. Specify this parameter, if you have changed the SAP profile directory location after the default SAP installation. Path of start profile The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. Start profile name After that time in seconds a monitor operation is executed by the resource agent. Does the monitor return SUCCESS, the start ishandled as SUCCESS. This is useful to resolve timing problems with e.g. the J2EE-Addin instance.Usually the resource agent waits until all services are started and the SAP Management Console reports a GREEN status. A double stack installation (ABAP + Java AddIn) consists of an ABAP dispatcher and a JAVA instance. Normally the start of the JAVA instance takes much longer than the start of the ABAP instance. For a JAVA Instance you may need to configure a much higher timeout for the start operation of the resource in Heartbeat. The disadvantage here is, that the discovery of a failed start by the cluster takes longer. Somebody might say: For me it is important, that the ABAP instance is up and running. A failure of the JAVA instance shall not cause a failover of the SAP instance. Actually the SAP MC reports a YELLOW status, if the JAVA instance of a double stack system fails. From the resource agent point of view YELLOW means:everything is OK. Setting START_WAITTIME to a lower value determines the resource agent to check the status of the instance during a start operation after that time. As it would wait normally for a GREEN status, now it reports SUCCESS to the cluster in case of a YELLOW status already after the specified time. That is only useful for double stack systems. Check the successful start after that time (do not wait for J2EE-Addin) The SAPInstance resource agent tries to recover a failed start attempt automatically one time. This is done by killing running instance processes, removing the kill.sap file and executing cleanipc. Sometimes a crashed SAP instance leaves some processes and/or shared memory segments behind. Setting this option to true will try to remove those leftovers during a start operation. That is to reduce manual work for the administrator. Enable or disable automatic startup recovery Within a SAP instance there can be several services. Usually you will find the defined services in the START profile of the related instance (Attention: with SAP Release 7.10 the START profile content was moved to the instance profile). Not all of those services are worth to monitor by the cluster. For example you properly do not like to failover your SAP instance, if the central syslog collector daemon fails. Those services are monitored within the SAPInstance resource agent: - disp+work - msg_server - enserver (ENSA1) - enq_server (ENSA2) - enrepserver (ENSA1) - enq_replicator (ENSA2) - jcontrol - jstart Some other services could be monitored as well. They have to be given with the parameter MONITOR_SERVICES, e.g.: - sapwebdisp - TREXDaemon.x That names match the strings used in the output of the command 'sapcontrol -nr [Instance-Nr] -function GetProcessList'. The default should fit most cases where you want to manage a SAP Instance from the cluster. You may change this with this parameter, if you like to monitor more/less or other services that sapstartsrv supports. You may specify multiple services separated by a | (pipe) sign in this parameter: disp+work|msg_server|enserver Services to monitor Usually a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the graceful stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !! Shutdown graceful or kill a SAP instance by terminating the processes. (normal|KILL) Only used in a Promotable resource configuration: The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile. The enqueue replication instance must be installed, before you want to configure a promotable cluster resource. The promotable configuration in the cluster must use this properties: clone_max = 2 clone_node_max = 1 master_node_max = 1 master_max = 1 Enqueue replication instance name: SID_INSTANCE_VIR-HOSTNAME Only used in a Promotable resource configuration: The parameter ERS_InstanceName must also be set in this configuration. The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. Enqueue replication start profile name The full qualified path where to find a script or program which should be executed before this resource gets started. Path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got started. Path to a post-start script The full qualified path where to find a script or program which should be executed before this resource gets stopped. Path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got stopped. Path to a post-start script Only used for ASCS/ERS SAP Netweaver installations without implementing a promotable resource to allow the ASCS to 'find' the ERS running on another cluster node after a resource failure. This parameter should be set to true 'only' for the ERS instance for implementations following the SAP NetWeaver 7.40 HA certification (NW-HA-CLU-740). This includes also systems for NetWeaver less than 7.40, if you like to implement the NW-HA-CLU-740 scenario. Mark SAPInstance as ERS instance Setting MINIMAL_PROBE=true forces the resource agent to do only minimal check during a probe. This is needed for special file system setups. The MINIMAL_PROBE=true is only supported, if requested either by your vendor's support or if described in an architecture document from your HA vendor. Switch probe action from full to minimal check END } # # methods: What methods/operations do we support? # sapinstance_methods() { cat <<-EOF start stop status monitor promote demote reload notify validate-all methods meta-data usage EOF } # # is_clone : find out if we are configured to run in a Master/Slave configuration # is_clone() { if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ] \ && [ "$OCF_RESKEY_CRM_meta_clone_max" -gt 0 ] then if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] || \ [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] || \ [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] || \ [ "$OCF_RESKEY_CRM_meta_master_max" -ne 1 ] then ocf_log err "Clone options misconfigured. (expect: clone_max=2,clone_node_max=1,master_node_max=1,master_max=1)" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_ERS_InstanceName" ] then ocf_log err "In a Master/Slave configuration the ERS_InstanceName parameter is mandatory." exit $OCF_ERR_ARGS fi else return 0 fi return 1 } # # abnormal_end : essential things are missing, but in the natur of a SAP installation - which can be very different # from customer to customer - we cannot handle this always as an error # This would be the case, if the software is installed on shared disks and not visible # to all cluster nodes at all times. # abnormal_end() { local err_msg=$1 ocf_is_probe && { sapinstance_status exit $? } ocf_log err $err_msg if [ "$ACTION" = "stop" ] then cleanup_instance exit $OCF_SUCCESS fi exit $OCF_ERR_CONFIGURED } # # sapinstance_init : Define global variables with default values, if optional parameters are not set # # sapinstance_init() { local myInstanceName="$1" SID=`echo "$myInstanceName" | cut -d_ -f1` InstanceName=`echo "$myInstanceName" | cut -d_ -f2` InstanceNr=`echo "$InstanceName" | sed 's/.*\([0-9][0-9]\)$/\1/'` SAPVIRHOST=`echo "$myInstanceName" | cut -d_ -f3` # make sure that we don't care the content of variable from previous run of sapinstance_init DIR_EXECUTABLE="" SYSTEMCTL="systemctl" # optional OCF parameters, we try to guess which directories are correct if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] then if have_binary /usr/sap/$SID/$InstanceName/exe/sapstartsrv && have_binary /usr/sap/$SID/$InstanceName/exe/sapcontrol then DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe" SAPSTARTSRV="/usr/sap/$SID/$InstanceName/exe/sapstartsrv" SAPCONTROL="/usr/sap/$SID/$InstanceName/exe/sapcontrol" elif have_binary /usr/sap/$SID/SYS/exe/run/sapstartsrv && have_binary /usr/sap/$SID/SYS/exe/run/sapcontrol then DIR_EXECUTABLE="/usr/sap/$SID/SYS/exe/run" SAPSTARTSRV="/usr/sap/$SID/SYS/exe/run/sapstartsrv" SAPCONTROL="/usr/sap/$SID/SYS/exe/run/sapcontrol" fi else if have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" && have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" then DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE" SAPSTARTSRV="$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" SAPCONTROL="$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" fi fi sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" [ -z "$DIR_EXECUTABLE" ] && abnormal_end "Cannot find sapstartsrv and sapcontrol executable, please set DIR_EXECUTABLE parameter!" if [ -z "$OCF_RESKEY_DIR_PROFILE" ] then DIR_PROFILE="/usr/sap/$SID/SYS/profile" else DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" fi if [ "$myInstanceName" != "$OCF_RESKEY_InstanceName" ] then currentSTART_PROFILE=$OCF_RESKEY_ERS_START_PROFILE else currentSTART_PROFILE=$OCF_RESKEY_START_PROFILE fi if [ -z "$OCF_RESKEY_IS_ERS" ]; then is_ers="no" else is_ers="$OCF_RESKEY_IS_ERS" fi if [ -z "$currentSTART_PROFILE" ] then if [ ! -r "$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" -a -r "$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" ]; then SAPSTARTPROFILE="$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" else SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" fi else SAPSTARTPROFILE="$currentSTART_PROFILE" fi if [ -z "$OCF_RESKEY_START_WAITTIME" ] then export OCF_RESKEY_START_WAITTIME="${OCF_RESKEY_START_WAITTIME_default}" fi if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ] then export OCF_RESKEY_MONITOR_SERVICES="${OCF_RESKEY_MONITOR_SERVICES_default}" fi # as root user we need the library path to the SAP kernel to be able to call sapcontrol if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH export LD_LIBRARY_PATH fi return $OCF_SUCCESS } # # check_systemd_integration : Check, if SAP instance is controlled by systemd unit file SAP_.service # rc == 0 : sap instance is controlled by the unit file (file at least exists) # rc == 1 : sap instance is NOT controlled by the unit file (file does not exist) # check_systemd_integration() { local systemd_unit_name="SAP${SID}_${InstanceNr}" local rc=1 if which "$SYSTEMCTL" 1>/dev/null 2>/dev/null; then if $SYSTEMCTL list-unit-files | \ awk '$1 == service { found=1 } END { if (! found) {exit 1}}' service="${systemd_unit_name}.service"; then rc=0 else rc=1 fi fi return "$rc" } # # check_sapstartsrv : Before using sapcontrol we make sure that the sapstartsrv is running for the correct instance. # We cannot use sapinit and the /usr/sap/sapservices file in case of an enquerep instance, # because then we have two instances with the same instance number. # check_sapstartsrv() { local restart=0 local runninginst="" local chkrc=$OCF_SUCCESS local output="" # check for sapstartsrv/systemd integration if check_systemd_integration; then # do it the systemd way local systemd_unit_name="SAP${SID}_${InstanceNr}" if $SYSTEMCTL status "$systemd_unit_name" 1>/dev/null 2>/dev/null; then ocf_log info "systemd service $systemd_unit_name is active" else ocf_log warn "systemd service $systemd_unit_name is not active, it will be started using systemd" $SYSTEMCTL start "$systemd_unit_name" 1>/dev/null 2>/dev/null # use start, because restart does also stop sap instance fi return 0 else # otherwise continue with old code... if [ ! -S /tmp/.sapstream5${InstanceNr}13 ]; then ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName (no UDS), it will be started now" restart=1 else output=`$SAPCONTROL -nr $InstanceNr -function ParameterValue INSTANCE_NAME -format script` if [ $? -eq 0 ] then runninginst=`echo "$output" | grep '^0 : ' | cut -d' ' -f3` if [ "$runninginst" != "$InstanceName" ] then ocf_log warn "sapstartsrv is running for instance $runninginst, that service will be killed" restart=1 else output=`$SAPCONTROL -nr $InstanceNr -function AccessCheck Start` if [ $? -ne 0 ]; then ocf_log warn "FAILED : sapcontrol -nr $InstanceNr -function AccessCheck Start (`ls -ld1 /tmp/.sapstream5${InstanceNr}13`)" ocf_log warn "sapstartsrv will be restarted to try to solve this situation, otherwise please check sapstsartsrv setup (SAP Note 927637)" restart=1 fi fi else ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName, it will be started now" restart=1 fi fi if [ -z "$runninginst" ]; then runninginst=$InstanceName; fi if [ $restart -eq 1 ] then if [ -d /usr/sap/$SID/SYS/profile/ ] then DIR_PROFILE="/usr/sap/$SID/SYS/profile" else abnormal_end "Expected /usr/sap/$SID/SYS/profile/ to be a directory, please set DIR_PROFILE parameter!" fi [ ! -r $SAPSTARTPROFILE ] && abnormal_end "Expected $SAPSTARTPROFILE to be the instance START profile, please set START_PROFILE parameter!" pkill -9 -f "sapstartsrv.*$runninginst" # removing the unix domain socket files as they might have wrong permissions # or ownership - they will be recreated by sapstartsrv during next start rm -f /tmp/.sapstream5${InstanceNr}13 rm -f /tmp/.sapstream5${InstanceNr}14 $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm # now make sure the daemon has been started and is able to respond local srvrc=1 while [ $srvrc -eq 1 -a `pgrep -f "sapstartsrv.*$runninginst" | wc -l` -gt 0 ] do sleep 1 $SAPCONTROL -nr $InstanceNr -function GetProcessList > /dev/null 2>&1 srvrc=$? done if [ $srvrc -ne 1 ] then ocf_log info "sapstartsrv for instance $SID-$InstanceName was restarted !" chkrc=$OCF_SUCCESS else ocf_log error "sapstartsrv for instance $SID-$InstanceName could not be started!" chkrc=$OCF_ERR_GENERIC ocf_is_probe && chkrc=$OCF_NOT_RUNNING fi fi return $chkrc fi } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { local NAME="$1" local VALUE="$2" if [ -n "$VALUE" ] then if have_binary "$VALUE" then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" "$VALUE" >/dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return 0 } # # cleanup_instance : remove resources (processes and shared memory) from a crashed instance) # cleanup_instance() { pkill -9 -f -U $sidadm $InstanceName ocf_log info "Terminated instance using 'pkill -9 -f -U $sidadm $InstanceName'" # it is necessary to call cleanipc as user sidadm if the system has 'vmcj/enable = ON' set - otherwise SHM-segments in /dev/shm/SAP_ES2* cannot be removed su - $sidadm -c "cleanipc $InstanceNr remove" ocf_log info "Tried to remove shared memory resources using 'cleanipc $InstanceNr remove' as user $sidadm" ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/kill.sap ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/shutdown.sap ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgcpid ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgspid return 0 } # # sapinstance_start : Start the SAP instance # sapinstance_start() { sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" local rc=$OCF_NOT_RUNNING local output="" local loopcount=0 while [ $loopcount -lt 2 ] do loopcount=$(($loopcount + 1)) check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ]; then output=`$SAPCONTROL -nr $InstanceNr -function Start` rc=$? ocf_log info "Starting SAP Instance $SID-$InstanceName: $output" fi if [ $rc -ne 0 ] then ocf_log err "SAP Instance $SID-$InstanceName start failed." return $OCF_ERR_GENERIC fi local startrc=1 while [ $startrc -gt 0 ] do local waittime_start=`date +%s` output=`$SAPCONTROL -nr $InstanceNr -function WaitforStarted $OCF_RESKEY_START_WAITTIME 10` startrc=$? local waittime_stop=`date +%s` if [ $startrc -ne 0 ] then if [ $(($waittime_stop - $waittime_start)) -ge $OCF_RESKEY_START_WAITTIME ] then sapinstance_monitor NOLOG if [ $? -eq $OCF_SUCCESS ] then output="START_WAITTIME ($OCF_RESKEY_START_WAITTIME) has elapsed, but instance monitor returned SUCCESS. Instance considered running." startrc=0; loopcount=2 fi else if [ $loopcount -eq 1 ] && ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER then ocf_log warn "SAP Instance $SID-$InstanceName start failed: $output" ocf_log warn "Try to recover $SID-$InstanceName" cleanup_instance else loopcount=2 fi startrc=-1 fi else loopcount=2 fi done done if [ $startrc -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName started: $output" rc=$OCF_SUCCESS sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 1 -l reboot; fi else ocf_log err "SAP Instance $SID-$InstanceName start failed: $output" rc=$OCF_NOT_RUNNING if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 0 -l reboot; fi fi return $rc } # # sapinstance_recover: Try startup of failed instance by cleaning up resources # sapinstance_recover() { cleanup_instance sapinstance_start return $? } # # sapinstance_stop: Stop the SAP instance # sapinstance_stop() { local output="" local rc sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" if [ "$OCF_RESKEY_SHUTDOWN_METHOD" = "KILL" ] then ocf_log info "Stopping SAP Instance $SID-$InstanceName with shutdown method KILL!" cleanup_instance return $OCF_SUCCESS fi check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ]; then output=`$SAPCONTROL -nr $InstanceNr -function Stop` rc=$? ocf_log info "Stopping SAP Instance $SID-$InstanceName: $output" fi if [ $rc -eq 0 ] then output=`$SAPCONTROL -nr $InstanceNr -function WaitforStopped 3600 1` if [ $? -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName stopped: $output" rc=$OCF_SUCCESS else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 0 -l reboot; fi return $rc } # # sapinstance_monitor: Can the given SAP instance do anything useful? # sapinstance_monitor() { local MONLOG=$1 local rc if ocf_is_probe && ocf_is_true "$OCF_RESKEY_MINIMAL_PROBE"; then # code for minimal probe: # grep for sapstartsrv and maybe also for sapstart # TODO: Do we need to improve this minimal test? if pgrep -f -l "sapstartsrv .*pf=.*${SID}_${InstanceName}_${SAPVIRHOST}"; then rc="$OCF_SUCCESS" elif pgrep -f -l "sapstart .*pf=.*${SID}_${InstanceName}_${SAPVIRHOST}"; then rc="$OCF_SUCCESS" else rc="$OCF_NOT_RUNNING" fi else # standard probe and monitoring code check_sapstartsrv rc=$? fi if [ $rc -eq $OCF_SUCCESS ] then local count=0 local SERVNO local output output=`$SAPCONTROL -nr $InstanceNr -function GetProcessList -format script` # we have to parse the output, because the returncode doesn't tell anything about the instance status for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u` do local COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3` local SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3` local STATE=0 local SEARCH case $COLOR in GREEN|YELLOW) STATE=$OCF_SUCCESS;; *) STATE=$OCF_NOT_RUNNING;; esac SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'` - if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ] + if [ `echo "$SERVICE" | $EGREP -c "$SEARCH"` -eq 1 ] then if [ $STATE -eq $OCF_NOT_RUNNING ] then [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" rc=$STATE fi count=1 fi done if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] then if ocf_is_probe then rc=$OCF_NOT_RUNNING else [ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!" rc=$OCF_ERR_GENERIC fi fi fi return $rc } # # sapinstance_status: Lightweight check of SAP instance only with OS tools # sapinstance_status() { local pid local pids [ ! -f "/usr/sap/$SID/$InstanceName/work/kill.sap" ] && return $OCF_NOT_RUNNING pids=$(awk '$3 ~ "^[0-9]+$" { print $3 }' /usr/sap/$SID/$InstanceName/work/kill.sap) for pid in $pids do [ `pgrep -f -U $sidadm $InstanceName | grep -c $pid` -gt 0 ] && return $OCF_SUCCESS done return $OCF_NOT_RUNNING } # # sapinstance_validate: Check the semantics of the input parameters # sapinstance_validate() { local rc=$OCF_SUCCESS if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SID' is not a valid system ID!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceName" | grep -c '^[A-Z].*[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceName' is not a valid instance name!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceNr" | grep -c '^[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceNr' is not a valid instance number!" rc=$OCF_ERR_ARGS fi if [ `echo "$SAPVIRHOST" | grep -c '^[A-Za-z][A-Za-z0-9_-]*$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SAPVIRHOST' is not a valid hostname!" rc=$OCF_ERR_ARGS fi return $rc } # # sapinstance_start_clone # sapinstance_start_clone() { sapinstance_init $OCF_RESKEY_ERS_InstanceName ${HA_SBIN_DIR}/crm_master -v 50 -l reboot sapinstance_start return $? } # # sapinstance_stop_clone # sapinstance_stop_clone() { sapinstance_init $OCF_RESKEY_ERS_InstanceName ${HA_SBIN_DIR}/crm_master -v 0 -l reboot sapinstance_stop return $? } # # sapinstance_monitor_clone # sapinstance_monitor_clone() { # first check with the status function (OS tools) if there could be something like a SAP instance running # as we do not know here, if we are in master or slave state we do not want to start our monitoring # agents (sapstartsrv) on the wrong host local rc sapinstance_init $OCF_RESKEY_InstanceName if sapinstance_status; then if sapinstance_monitor; then ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot return $OCF_RUNNING_MASTER fi # by nature of the SAP enqueue server we have to make sure # that we do a failover to the slave (enqueue replication server) # in case the enqueue process has failed. We signal this to the # cluster by setting our master preference to a lower value than the slave. ${HA_SBIN_DIR}/crm_master -v 10 -l reboot return $OCF_FAILED_MASTER fi sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_status && sapinstance_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot fi return $rc } # # sapinstance_promote_clone: In a Master/Slave configuration get Master by starting the SCS instance and stopping the ERS instance # The order is important here to behave correct from the application levels view # sapinstance_promote_clone() { local rc sapinstance_init $OCF_RESKEY_InstanceName ocf_log info "Promoting $SID-$InstanceName to running Master." sapinstance_start rc=$? if [ $rc -eq $OCF_SUCCESS ]; then sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_stop rc=$? fi return $rc } # # sapinstance_demote_clone: In a Master/Slave configuration get Slave by stopping the SCS instance and starting the ERS instance # sapinstance_demote_clone() { local rc sapinstance_init $OCF_RESKEY_InstanceName ocf_log info "Demoting $SID-$InstanceName to a slave." sapinstance_stop rc=$? if [ $rc -eq $OCF_SUCCESS ]; then sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_start rc=$? fi return $rc } # # sapinstance_notify: Handle master scoring - to make sure a slave gets the next master # sapinstance_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" if [ "${n_type}_${n_op}" = "post_promote" ]; then # After promotion of one master in the cluster, we make sure that all clones reset their master # value back to 100. This is because a failed monitor on a master might have degree one clone # instance to score 10. ${HA_SBIN_DIR}/crm_master -v 100 -l reboot elif [ "${n_type}_${n_op}" = "pre_demote" ]; then # if we are a slave and a demote event is announced, make sure we are highest on the list to become master # that is, when a slave resource was started after the promote event of an already running master (e.g. node of slave was down) # We also have to make sure to overrule the globally set resource_stickiness or any fail-count factors => INFINITY local n_uname="$OCF_RESKEY_CRM_meta_notify_demote_uname" if [ ${n_uname} != ${NODENAME} ]; then ${HA_SBIN_DIR}/crm_master -v INFINITY -l reboot fi fi } # # 'main' starts here... # ## GLOBALS SID="" sidadm="" InstanceName="" InstanceNr="" SAPVIRHOST="" DIR_EXECUTABLE="" SAPSTARTSRV="" SAPCONTROL="" DIR_PROFILE="" SAPSTARTPROFILE="" CLONE=0 NODENAME=$(ocf_local_nodename) if ( [ $# -ne 1 ] ) then sapinstance_usage exit $OCF_ERR_ARGS fi ACTION=$1 if [ "$ACTION" = "status" ]; then ACTION=monitor fi # These operations don't require OCF instance parameters to be set case "$ACTION" in usage|methods) sapinstance_$ACTION exit $OCF_SUCCESS;; meta-data) sapinstance_meta_data exit $OCF_SUCCESS;; notify) sapinstance_notify exit $OCF_SUCCESS;; *);; esac if ! ocf_is_root then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # parameter check if [ -z "$OCF_RESKEY_InstanceName" ] then ocf_log err "Please set OCF_RESKEY_InstanceName to the name to the SAP instance profile!" exit $OCF_ERR_ARGS fi is_clone; CLONE=$? if [ ${CLONE} -eq 1 ] then CLACT=_clone else if [ "$ACTION" = "promote" -o "$ACTION" = "demote" ] then ocf_log err "$ACTION called in a non master/slave environment" exit $OCF_ERR_ARGS fi sapinstance_init $OCF_RESKEY_InstanceName fi # What kind of method was invoked? case "$ACTION" in start|stop|monitor|promote|demote) sapinstance_$ACTION$CLACT exit $?;; validate-all) sapinstance_validate exit $?;; reload ) ocf_log info "reloading SAPInstance parameters" exit $OCF_SUCCESS;; *) sapinstance_methods exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain index 3905695ae..7db42bd12 100755 --- a/heartbeat/VirtualDomain +++ b/heartbeat/VirtualDomain @@ -1,1158 +1,1158 @@ #!/bin/sh # # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # Resource Agent for domains managed by the libvirt API. # Requires a running libvirt daemon (libvirtd). # # (c) 2008-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all} # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_config_default="" OCF_RESKEY_migration_transport_default="" OCF_RESKEY_migration_downtime_default=0 OCF_RESKEY_migration_speed_default=0 OCF_RESKEY_migration_network_suffix_default="" OCF_RESKEY_force_stop_default=0 OCF_RESKEY_monitor_scripts_default="" OCF_RESKEY_autoset_utilization_cpu_default="true" OCF_RESKEY_autoset_utilization_host_memory_default="true" OCF_RESKEY_autoset_utilization_hv_memory_default="true" OCF_RESKEY_unset_utilization_cpu_default="false" OCF_RESKEY_unset_utilization_host_memory_default="false" OCF_RESKEY_unset_utilization_hv_memory_default="false" OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 )) OCF_RESKEY_CRM_meta_timeout_default=90000 OCF_RESKEY_save_config_on_stop_default=false OCF_RESKEY_sync_config_on_stop_default=false OCF_RESKEY_snapshot_default="" OCF_RESKEY_backingfile_default="" OCF_RESKEY_stateless_default="false" OCF_RESKEY_copyindirs_default="" OCF_RESKEY_shutdown_mode_default="" OCF_RESKEY_start_resources_default="false" : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_migration_transport=${OCF_RESKEY_migration_transport_default}} : ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}} : ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}} : ${OCF_RESKEY_migration_network_suffix=${OCF_RESKEY_migration_network_suffix_default}} : ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} : ${OCF_RESKEY_monitor_scripts=${OCF_RESKEY_monitor_scripts_default}} : ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} : ${OCF_RESKEY_autoset_utilization_host_memory=${OCF_RESKEY_autoset_utilization_host_memory_default}} : ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} : ${OCF_RESKEY_unset_utilization_cpu=${OCF_RESKEY_unset_utilization_cpu_default}} : ${OCF_RESKEY_unset_utilization_host_memory=${OCF_RESKEY_unset_utilization_host_memory_default}} : ${OCF_RESKEY_unset_utilization_hv_memory=${OCF_RESKEY_unset_utilization_hv_memory_default}} : ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}} : ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}} : ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}} : ${OCF_RESKEY_sync_config_on_stop=${OCF_RESKEY_sync_config_on_stop_default}} : ${OCF_RESKEY_snapshot=${OCF_RESKEY_snapshot_default}} : ${OCF_RESKEY_backingfile=${OCF_RESKEY_backingfile_default}} : ${OCF_RESKEY_stateless=${OCF_RESKEY_stateless_default}} : ${OCF_RESKEY_copyindirs=${OCF_RESKEY_copyindirs_default}} : ${OCF_RESKEY_shutdown_mode=${OCF_RESKEY_shutdown_mode_default}} : ${OCF_RESKEY_start_resources=${OCF_RESKEY_start_resources_default}} if ocf_is_true ${OCF_RESKEY_sync_config_on_stop}; then OCF_RESKEY_save_config_on_stop="true" fi ####################################################################### ## I'd very much suggest to make this RA use bash, ## and then use magic $SECONDS. ## But for now: NOW=$(date +%s) usage() { echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}" } VirtualDomain_meta_data() { cat < 1.0 Resource agent for a virtual domain (a.k.a. domU, virtual machine, virtual environment etc., depending on context) managed by libvirtd. Manages virtual domains through the libvirt virtualization framework Absolute path to the libvirt configuration file, for this virtual domain. Virtual domain configuration file Hypervisor URI to connect to. See the libvirt documentation for details on supported URI formats. The default is system dependent. Determine the system's default uri by running 'virsh --quiet uri'. Hypervisor URI Always forcefully shut down ("destroy") the domain on stop. The default behavior is to resort to a forceful shutdown only after a graceful shutdown attempt has failed. You should only set this to true if your virtual domain (or your virtualization backend) does not support graceful shutdown. Always force shutdown on stop Transport used to connect to the remote hypervisor while migrating. Please refer to the libvirt documentation for details on transports available. If this parameter is omitted, the resource will use libvirt's default transport to connect to the remote hypervisor. Remote hypervisor transport The username will be used in the remote libvirt remoteuri/migrateuri. No user will be given (which means root) in the username if omitted If remoteuri is set, migration_user will be ignored. Remote username for the remoteuri Define max downtime during live migration in milliseconds Live migration downtime Define live migration speed per resource in MiB/s Live migration speed Use a dedicated migration network. The migration URI is composed by adding this parameters value to the end of the node name. If the node name happens to be an FQDN (as opposed to an unqualified host name), insert the suffix immediately prior to the first period (.) in the FQDN. At the moment Qemu/KVM and Xen migration via a dedicated network is supported. Note: Be sure this composed host name is locally resolvable and the associated IP is reachable through the favored network. This suffix will be added to the remoteuri and migrateuri parameters. See also the migrate_options parameter below. Migration network host name suffix You can also specify here if the calculated migrate URI is unsuitable for your environment. If migrateuri is set then migration_network_suffix, migrateport and --migrateuri in migrate_options are effectively ignored. Use "%n" as the placeholder for the target node name. Please refer to the libvirt documentation for details on guest migration. Custom migrateuri for migration state transfer Extra virsh options for the guest live migration. You can also specify here --migrateuri if the calculated migrate URI is unsuitable for your environment. If --migrateuri is set then migration_network_suffix and migrateport are effectively ignored. Use "%n" as the placeholder for the target node name. Please refer to the libvirt documentation for details on guest migration. live migrate options To additionally monitor services within the virtual domain, add this parameter with a list of scripts to monitor. Note: when monitor scripts are used, the start and migrate_from operations will complete only when all monitor scripts have completed successfully. Be sure to set the timeout of these operations to accommodate this delay. space-separated list of monitor scripts If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it into the CPU utilization of the resource when the monitor is executed. Enable auto-setting the CPU utilization of the resource If set true, the agent will detect the number of *Max memory* from virsh, and put it into the host_memory utilization of the resource when the monitor is executed. Enable auto-setting the host_memory utilization of the resource If set true, the agent will detect the number of *Max memory* from virsh, and put it into the hv_memory utilization of the resource when the monitor is executed. Enable auto-setting the hv_memory utilization of the resource If set true then the agent will remove the cpu utilization resource when the monitor is executed. Enable auto-removing the CPU utilization of the resource If set true then the agent will remove the host_memory utilization resource when the monitor is executed. Enable auto-removing the host_memory utilization of the resource If set true then the agent will remove the hv_memory utilization resource when the monitor is executed. Enable auto-removing the hv_memory utilization of the resource This port will be used in the qemu migrateuri. If unset, the port will be a random highport. Port for migrateuri Use this URI as virsh connection URI to commuicate with a remote hypervisor. If remoteuri is set then migration_user and migration_network_suffix are effectively ignored. Use "%n" as the placeholder for the target node name. Please refer to the libvirt documentation for details on guest migration. Custom remoteuri to communicate with a remote hypervisor Changes to a running VM's config are normally lost on stop. This parameter instructs the RA to save the configuration back to the xml file provided in the "config" parameter. Save running VM's config back to its config file Setting this automatically enables save_config_on_stop. When enabled this parameter instructs the RA to call csync2 -x to synchronize the file to all nodes. csync2 must be properly set up for this to work. Save running VM's config back to its config file Path to the snapshot directory where the virtual machine image will be stored. When this parameter is set, the virtual machine's RAM state will be saved to a file in the snapshot directory when stopped. If on start a state file is present for the domain, the domain will be restored to the same state it was in right before it stopped last. This option is incompatible with the 'force_stop' option. Restore state on start/stop When the VM is used in Copy-On-Write mode, this is the backing file to use (with its full path). The VMs image will be created based on this backing file. This backing file will never be changed during the life of the VM. If the VM is wanted to work with Copy-On-Write mode, this is the backing file to use (with its full path) If set to true and backingfile is defined, the start of the VM will systematically create a new qcow2 based on the backing file, therefore the VM will always be stateless. If set to false, the start of the VM will use the COW (<vmname>.qcow2) file if it exists, otherwise the first start will create a new qcow2 based on the backing file given as backingfile. If set to true, the (<vmname>.qcow2) file will be re-created at each start, based on the backing file (if defined) List of directories for the virt-copy-in before booting the VM. Used only in stateless mode. List of directories for the virt-copy-in before booting the VM stateless mode. virsh shutdown method to use. Please verify that it is supported by your virsh toolsed with 'virsh help shutdown' When this parameter is set --mode shutdown_mode is passed as an additional argument to the 'virsh shutdown' command. One can use this option in case default acpi method does not work. Verify that this mode is supported by your VM. By default --mode is not passed. Instruct virsh to use specific shutdown mode Start the virtual storage pools and networks used by the virtual machine before starting it or before live migrating it. Ensure the needed virtual storage pools and networks are started EOF } set_util_attr() { local attr=$1 val=$2 local cval outp cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) if [ $? -ne 0 ] && [ -z "$cval" ]; then crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1 if [ $? -eq 0 ]; then ocf_log debug "Unable to set utilization attribute, cib is not available" return fi fi if [ "$cval" != "$val" ]; then outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) || ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp" fi } unset_util_attr() { local attr=$1 local cval outp outp=$(crm_resource --resource=$OCF_RESOURCE_INSTANCE --utilization --delete-parameter=$attr 2>&1) || ocf_log warn "crm_resource failed to unset utilization attribute $attr: $outp" } update_utilization() { local dom_cpu dom_mem if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}') test -n "$dom_cpu" && set_util_attr cpu $dom_cpu elif ocf_is_true "$OCF_RESKEY_unset_utilization_cpu"; then unset_util_attr cpu fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_host_memory"; then dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}') test -n "$dom_mem" && set_util_attr host_memory "$dom_mem" elif ocf_is_true "$OCF_RESKEY_unset_utilization_host_memory"; then unset_util_attr host_memory fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}') test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" elif ocf_is_true "$OCF_RESKEY_unset_utilization_hv_memory"; then unset_util_attr hv_memory fi } get_emulator() { local emulator="" emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') if [ -z "$emulator" ] && [ -e "$EMULATOR_STATE" ]; then emulator=$(cat $EMULATOR_STATE) fi if [ -z "$emulator" ]; then emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') fi if [ -n "$emulator" ]; then basename $emulator fi } update_emulator_cache() { local emulator emulator=$(get_emulator) if [ -n "$emulator" ]; then echo $emulator > $EMULATOR_STATE fi } # attempt to check domain status outside of libvirt using the emulator process pid_status() { local rc=$OCF_ERR_GENERIC local emulator=$(get_emulator) # An emulator is not required, so only report message in debug mode local loglevel="debug" if ocf_is_probe; then loglevel="notice" fi case "$emulator" in qemu-kvm|qemu-dm|qemu-system-*) rc=$OCF_NOT_RUNNING ps awx | grep -E "[q]emu-(kvm|dm|system).*-name ($DOMAIN_NAME|[^ ]*guest=$DOMAIN_NAME(,[^ ]*)?) " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; libvirt_lxc) rc=$OCF_NOT_RUNNING ps awx | grep -E "[l]ibvirt_lxc.*-name ($DOMAIN_NAME|[^ ]*guest=$DOMAIN_NAME(,[^ ]*)?) " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; # This can be expanded to check for additional emulators *) # We may be running xen with PV domains, they don't # have an emulator set. try xl list or xen-lists if have_binary xl; then rc=$OCF_NOT_RUNNING xl list $DOMAIN_NAME >/dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi elif have_binary xen-list; then rc=$OCF_NOT_RUNNING xen-list $DOMAIN_NAME 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi else ocf_log $loglevel "Unable to determine emulator for $DOMAIN_NAME" fi ;; esac if [ $rc -eq $OCF_SUCCESS ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently running." elif [ $rc -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running." fi return $rc } VirtualDomain_status() { local try=0 rc=$OCF_ERR_GENERIC status="no state" while [ "$status" = "no state" ]; do try=$(($try + 1 )) status=$(LANG=C virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z') case "$status" in *"error:"*"domain not found"|*"error:"*"failed to get domain"*|"shut off") # shut off: domain is defined, but not started, will not happen if # domain is created but not defined # "Domain not found" or "failed to get domain": domain is not defined # and thus not started ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)" rc=$OCF_NOT_RUNNING ;; running|paused|idle|blocked|"in shutdown") # running: domain is currently actively consuming cycles # paused: domain is paused (suspended) # idle: domain is running but idle # blocked: synonym for idle used by legacy Xen versions # in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed. ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_SUCCESS ;; ""|*"failed to "*"connect to the hypervisor"*|"no state") # Empty string may be returned when virsh does not # receive a reply from libvirtd. # "no state" may occur when the domain is currently # being migrated (on the migration target only), or # whenever virsh can't reliably obtain the domain # state. status="no state" if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then # During the stop operation, we want to bail out # quickly, so as to be able to force-stop (destroy) # the domain if necessary. ocf_exit_reason "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out." return $OCF_ERR_GENERIC; elif [ "$__OCF_ACTION" = "monitor" ]; then pid_status rc=$? if [ $rc -ne $OCF_ERR_GENERIC ]; then # we've successfully determined the domains status outside of libvirt return $rc fi else # During all other actions, we just wait and try # again, relying on the CRM/LRM to time us out if # this takes too long. ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying." fi sleep 1 ;; *) # any other output is unexpected. ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!" sleep 1 ;; esac done return $rc } # virsh undefine removes configuration files if they are in # directories which are managed by libvirt. such directories # include also subdirectories of /etc (for instance # /etc/libvirt/*) which may be surprising. VirtualDomain didn't # include the undefine call before, hence this wasn't an issue # before. # # There seems to be no way to find out which directories are # managed by libvirt. # verify_undefined() { local tmpf if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME" then tmpf=$(mktemp -t vmcfgsave.XXXXXX) if [ ! -r "$tmpf" ]; then ocf_log warn "unable to create temp file, disk full?" # we must undefine the domain virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 else cp -p $OCF_RESKEY_config $tmpf virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 [ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config rm -f $tmpf fi fi } start_resources() { local virsh_opts="--connect=$1 --quiet" local pool_state net_state for pool in `sed -n "s/^.*pool=['\"]\([^'\"]\+\)['\"].*\$/\1/gp" ${OCF_RESKEY_config} | sort | uniq`; do pool_state=`LANG=C virsh ${virsh_opts} pool-info ${pool} | sed -n 's/^State: \+\(.*\)$/\1/gp'` if [ "$pool_state" != "running" ]; then virsh ${virsh_opts} pool-start $pool if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start required virtual storage pool ${pool}." return $OCF_ERR_GENERIC fi else virsh ${virsh_opts} pool-refresh $pool fi done for net in `sed -n "s/^.*network=['\"]\([^'\"]\+\)['\"].*\$/\1/gp" ${OCF_RESKEY_config} | sort | uniq`; do net_state=`LANG=C virsh ${virsh_opts} net-info ${net} | sed -n 's/^Active: \+\(.*\)$/\1/gp'` if [ "$net_state" != "yes" ]; then virsh ${virsh_opts} net-start $net if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start required virtual network ${net}." return $OCF_ERR_GENERIC fi fi done return $OCF_SUCCESS } VirtualDomain_start() { local snapshotimage if VirtualDomain_status; then ocf_log info "Virtual domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi # systemd drop-in to stop domain before libvirtd terminates services # during shutdown/reboot if systemd_is_running ; then systemd_drop_in "99-VirtualDomain-libvirt" "After" "libvirtd.service" systemd_drop_in "99-VirtualDomain-machines" "Wants" "virt-guest-shutdown.target" systemctl start virt-guest-shutdown.target fi snapshotimage="$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then virsh restore $snapshotimage if [ $? -eq 0 ]; then rm -f $snapshotimage return $OCF_SUCCESS fi ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory." return $OCF_ERR_GENERIC fi # Make sure domain is undefined before creating. # The 'create' command guarantees that the domain will be # undefined on shutdown, but requires the domain to be undefined. # if a user defines the domain # outside of this agent, we have to ensure that the domain # is restored to an 'undefined' state before creating. verify_undefined if ocf_is_true "${OCF_RESKEY_start_resources}"; then start_resources ${OCF_RESKEY_hypervisor} rc=$? if [ $rc -eq $OCF_ERR_GENERIC ]; then return $rc fi fi if [ -z "${OCF_RESKEY_backingfile}" ]; then virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config} if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi else if ocf_is_true "${OCF_RESKEY_stateless}" || [ ! -s "${OCF_RESKEY_config%%.*}.qcow2" ]; then # Create the Stateless image dirconfig=`dirname ${OCF_RESKEY_config}` qemu-img create -f qcow2 -b ${OCF_RESKEY_backingfile} ${OCF_RESKEY_config%%.*}.qcow2 if [ $? -ne 0 ]; then ocf_exit_reason "Failed qemu-img create ${DOMAIN_NAME} with backing file ${OCF_RESKEY_backingfile}." return $OCF_ERR_GENERIC fi virsh define ${OCF_RESKEY_config} if [ $? -ne 0 ]; then ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi if [ -n "${OCF_RESKEY_copyindirs}" ]; then # Inject copyindirs directories and files virt-copy-in -d ${DOMAIN_NAME} ${OCF_RESKEY_copyindirs} / if [ $? -ne 0 ]; then ocf_exit_reason "Failed on virt-copy-in command ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi fi else virsh define ${OCF_RESKEY_config} if [ $? -ne 0 ]; then ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi fi virsh $VIRSH_OPTIONS start ${DOMAIN_NAME} if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi fi while ! VirtualDomain_monitor; do sleep 1 done return $OCF_SUCCESS } force_stop() { local out ex translate local status=0 ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1) ex=$? translate=$(echo $out|tr 'A-Z' 'a-z') echo >&2 "$translate" case $ex$translate in *"error:"*"domain is not running"*|*"error:"*"domain not found"*|\ *"error:"*"failed to get domain"*) : ;; # unexpected path to the intended outcome, all is well [!0]*) ocf_exit_reason "forced stop failed" return $OCF_ERR_GENERIC ;; 0*) while [ $status != $OCF_NOT_RUNNING ]; do VirtualDomain_status status=$? done ;; esac return $OCF_SUCCESS } sync_config(){ ocf_log info "Syncing $DOMAIN_NAME config file with csync2 -x ${OCF_RESKEY_config}" if ! csync2 -x ${OCF_RESKEY_config}; then ocf_log warn "Syncing ${OCF_RESKEY_config} failed."; fi } save_config(){ CFGTMP=$(mktemp -t vmcfgsave.XXX) virsh $VIRSH_OPTIONS dumpxml --inactive --security-info ${DOMAIN_NAME} > ${CFGTMP} if [ -s ${CFGTMP} ]; then if ! cmp -s ${CFGTMP} ${OCF_RESKEY_config}; then if virt-xml-validate ${CFGTMP} domain 2>/dev/null ; then ocf_log info "Saving domain $DOMAIN_NAME to ${OCF_RESKEY_config}. Please make sure it's present on all nodes or sync_config_on_stop is on." if cat ${CFGTMP} > ${OCF_RESKEY_config} ; then ocf_log info "Saved $DOMAIN_NAME domain's configuration to ${OCF_RESKEY_config}." if ocf_is_true "$OCF_RESKEY_sync_config_on_stop"; then sync_config fi else ocf_log warn "Moving ${CFGTMP} to ${OCF_RESKEY_config} failed." fi else ocf_log warn "Domain $DOMAIN_NAME config failed to validate after dump. Skipping config update." fi fi else ocf_log warn "Domain $DOMAIN_NAME config has 0 size. Skipping config update." fi rm -f ${CFGTMP} } VirtualDomain_stop() { local i local status local shutdown_timeout local needshutdown=1 VirtualDomain_status status=$? case $status in $OCF_SUCCESS) if ocf_is_true $OCF_RESKEY_force_stop; then # if force stop, don't bother attempting graceful shutdown. force_stop return $? fi ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}." if [ -n "$OCF_RESKEY_snapshot" ]; then virsh save $DOMAIN_NAME "$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ $? -eq 0 ]; then needshutdown=0 else ocf_log error "Failed to save snapshot state of ${DOMAIN_NAME} on stop" fi fi # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi # issue the shutdown if save state didn't shutdown for us if [ $needshutdown -eq 1 ]; then # Issue a graceful shutdown request if [ -n "${OCF_RESKEY_CRM_shutdown_mode}" ]; then shutdown_opts="--mode ${OCF_RESKEY_CRM_shutdown_mode}" fi virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} $shutdown_opts fi # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $NOW -lt $shutdown_timeout ]; do VirtualDomain_status status=$? case $status in $OCF_NOT_RUNNING) # This was a graceful shutdown. return $OCF_SUCCESS ;; $OCF_SUCCESS) # Domain is still running, keep # waiting (until shutdown_timeout # expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac NOW=$(date +%s) done ;; $OCF_NOT_RUNNING) ocf_log info "Domain $DOMAIN_NAME already stopped." return $OCF_SUCCESS esac # OK. Now if the above graceful shutdown hasn't worked, kill # off the domain with destroy. If that too does not work, # have the LRM time us out. force_stop } mk_migrateuri() { local target_node local migrate_target local hypervisor target_node="$OCF_RESKEY_CRM_meta_migrate_target" # A typical migration URI via a special migration network looks # like "tcp://bar-mig:49152". The port would be randomly chosen # by libvirt from the range 49152-49215 if omitted, at least since # version 0.7.4 ... if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" # Hostname might be a FQDN migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") case $hypervisor in qemu) # For quiet ancient libvirt versions a migration port is needed # and the URI must not contain the "//". Newer versions can handle # the "bad" URI. echo "tcp:${migrate_target}:${OCF_RESKEY_migrateport}" ;; xen) echo "${migrate_target}" ;; *) ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." ;; esac fi } VirtualDomain_migrate_to() { local rc local target_node local remoteuri local transport_suffix local migrateuri local migrate_opts local migrate_pid target_node="$OCF_RESKEY_CRM_meta_migrate_target" if VirtualDomain_status; then # Find out the remote hypervisor to connect to. That is, turn # something like "qemu://foo:9999/system" into # "qemu+tcp://bar:9999/system" if [ -n "${OCF_RESKEY_remoteuri}" ]; then remoteuri=`echo "${OCF_RESKEY_remoteuri}" | sed "s/%n/$target_node/g"` else if [ -n "${OCF_RESKEY_migration_transport}" ]; then transport_suffix="+${OCF_RESKEY_migration_transport}" fi # append user defined suffix if virsh target should differ from cluster node name if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then # Hostname might be a FQDN target_node=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") fi # a remote user has been defined to connect to target_node if echo ${OCF_RESKEY_migration_user} | grep -q "^[a-z][-a-z0-9]*$" ; then target_node="${OCF_RESKEY_migration_user}@${target_node}" fi # Scared of that sed expression? So am I. :-) remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") fi # User defined migrateuri or do we make one? migrate_opts="$OCF_RESKEY_migrate_options" # migration_uri is directly set if [ -n "${OCF_RESKEY_migrateuri}" ]; then migrateuri=`echo "${OCF_RESKEY_migrateuri}" | sed "s/%n/$target_node/g"` # extract migrationuri from options - elif echo "$migrate_opts" | fgrep -qs -- "--migrateuri="; then + elif echo "$migrate_opts" | $FGREP -qs -- "--migrateuri="; then migrateuri=`echo "$migrate_opts" | sed "s/.*--migrateuri=\([^ ]*\).*/\1/;s/%n/$target_node/g"` # auto generate else migrateuri=`mk_migrateuri` fi # remove --migrateuri from migration_opts migrate_opts=`echo "$migrate_opts" | sed "s/\(.*\)--migrateuri=[^ ]*\(.*\)/\1\2/"` # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi if ocf_is_true "${OCF_RESKEY_start_resources}"; then start_resources $remoteuri rc=$? if [ $rc -eq $OCF_ERR_GENERIC ]; then return $rc fi fi # Live migration speed limit if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})." virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed} fi # OK, we know where to connect to. Now do the actual migration. ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)." virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri & migrate_pid=${!} # Live migration downtime interval # Note: You can set downtime only while live migration is in progress if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then sleep 2 ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})." virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime} fi wait ${migrate_pid} rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc" return $OCF_ERR_GENERIC else ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." return $OCF_SUCCESS fi else ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } VirtualDomain_migrate_from() { # systemd drop-in to stop domain before libvirtd terminates services # during shutdown/reboot if systemd_is_running ; then systemd_drop_in "99-VirtualDomain-libvirt" "After" "libvirtd.service" systemd_drop_in "99-VirtualDomain-machines" "Wants" "virt-guest-shutdown.target" systemctl start virt-guest-shutdown.target fi while ! VirtualDomain_monitor; do sleep 1 done ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded." # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi return $OCF_SUCCESS } VirtualDomain_monitor() { # First, check the domain status. If that returns anything other # than $OCF_SUCCESS, something is definitely wrong. VirtualDomain_status rc=$? if [ ${rc} -eq ${OCF_SUCCESS} ]; then # OK, the generic status check turned out fine. Now, if we # have monitor scripts defined, run them one after another. for script in ${OCF_RESKEY_monitor_scripts}; do script_output="$($script 2>&1)" script_rc=$? if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then # A monitor script returned a non-success exit # code. Stop iterating over the list of scripts, log a # warning message, and propagate $OCF_ERR_GENERIC. ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}" rc=$OCF_ERR_GENERIC break else ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}" fi done fi update_emulator_cache update_utilization # Save configuration on monitor as well, so we will have a better chance of # having fresh and up to date config files on all nodes. if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi return ${rc} } VirtualDomain_validate_all() { if ocf_is_true $OCF_RESKEY_force_stop && [ -n "$OCF_RESKEY_snapshot" ]; then ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together." return $OCF_ERR_CONFIGURED fi # check if we can read the config file (otherwise we're unable to # deduce $DOMAIN_NAME from it, see below) if [ ! -r $OCF_RESKEY_config ]; then if ocf_is_probe; then ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe." elif [ "$__OCF_ACTION" = "stop" ]; then ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped." else ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or not readable." fi return $OCF_ERR_INSTALLED fi if [ -z $DOMAIN_NAME ]; then ocf_exit_reason "Unable to determine domain name." return $OCF_ERR_INSTALLED fi # Check if csync2 is available when config tells us we might need it. if ocf_is_true $OCF_RESKEY_sync_config_on_stop; then check_binary csync2 fi # Check if migration_speed is a decimal value if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then ocf_exit_reason "migration_speed has to be a decimal value" return $OCF_ERR_CONFIGURED fi # Check if migration_downtime is a decimal value if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then ocf_exit_reason "migration_downtime has to be a decimal value" return $OCF_ERR_CONFIGURED fi if ocf_is_true "${OCF_RESKEY_stateless}" && [ -z "${OCF_RESKEY_backingfile}" ]; then ocf_exit_reason "Stateless functionality can't be achieved without a backing file." return $OCF_ERR_CONFIGURED fi } VirtualDomain_getconfig() { # Grab the virsh uri default, but only if hypervisor isn't set : ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)} # Set options to be passed to virsh: VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet" # Retrieve the domain name from the xml file. - DOMAIN_NAME=`egrep '[[:space:]]*.*[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*\(.*\)<\/name>[[:space:]]*$/\1/'` + DOMAIN_NAME=`$EGREP '[[:space:]]*.*[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*\(.*\)<\/name>[[:space:]]*$/\1/'` EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state" } OCF_REQUIRED_PARAMS="config" OCF_REQUIRED_BINARIES="virsh sed" ocf_rarun $* diff --git a/heartbeat/WAS b/heartbeat/WAS index 15b56e99e..44aa83e20 100755 --- a/heartbeat/WAS +++ b/heartbeat/WAS @@ -1,572 +1,572 @@ #!/bin/sh # # # WAS # # Description: Manages a Websphere Application Server as an HA resource # # # Author: Alan Robertson # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 International Business Machines, Inc. # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 WAS::/opt/WebSphere/ApplicationServer/config/server-cfg.xml # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_config # (WAS-configuration file, used for the single server edition of WAS) # OCF_RESKEY_port # (WAS--port-number, used for the advanced edition of WAS) ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### WASDIR=/opt/WebSphere/AppServer if [ ! -d $WASDIR ] then WASDIR=/usr/WebSphere/AppServer fi STARTTIME=300 # 5 minutes DEFAULT_WASPORTS="9080" # # WASBIN=$WASDIR/bin DEFAULT=$WASDIR/config/server-cfg.xml # # Print usage message # usage() { methods=`WAS_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-END usage: $0 ($methods) For the single server edition of WAS, you have to set the following enviroment virable: OCF_RESKEY_config (WAS-configuration file) For the advanced edition of WAS, you have to set the following enviroment virable: OCF_RESKEY_port (WAS--port-number) $0 manages a Websphere Application Server (WAS) as an HA resource The 'start' operation starts WAS. The 'stop' operation stops WAS. The 'status' operation reports whether WAS is running The 'monitor' operation reports whether the WAS seems to be working (httpd also needs to be working for this case) The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_config or OCF_RESKEY_port) is valid The 'methods' operation reports on the methods $0 supports This is known to work with the Single Server edition of Websphere, and is believed to work with the Advanced edition too. Since the Advanced Edition has no configuration file (it's in a the database) you need to give a port number instead of a configuration file for this config parameter. The default configuration file for the single server edition is: $DEFAULT The default snoop-port for the advanced edition is: $DEFAULT_WASPORTS The start and stop operations must be run as root. The status operation will report a pid of "-" for the WAS root process using unless it is run as root. If you don't have xmllint on your system, parsing of WAS configuration files is very primitive. In this case, the port specification we need from the XML config file has to be on the same line as the first part of the tag. We run servlet/snoop on the first transport port listed in the config file for the "monitor" operation. END } meta_data() { cat < 1.0 Resource script for WAS. It manages a Websphere Application Server (WAS) as an HA resource. Manages a WebSphere Application Server instance The WAS-configuration file. configration file The WAS-(snoop)-port-number. port END } # # Reformat the XML document in a sort of canonical form # if we can. If we don't have xmllint, we just cat it out # and hope for the best ;-) # xmlcat() { if [ "X$XMLcat" = X ] then XMLcat=`which xmllint 2>/dev/null` if [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] then XMLcat=cat else XMLcat="$XMLcat --recover --format" fi fi for j in "$@" do ${XMLcat} "$j" done } # #This is a bit skanky, but it works anyway... # # # # # # It's not really skanky if we can find xmllint on the system, because it # reformats tags so they are all on one line, which is all we we need... # # # Get the numbers of the ports WAS should be listening on... # # If we don't have xmllint around, then the applicationserver and the # port= specification have to be on the same line in the XML config file. # GetWASPorts() { case $1 in [0-9]*) echo "$1" | tr ',' '\012';; *) xmlcat $1 | grep -i 'transports.*applicationserver:HTTPTransport' | grep port= | sed -e 's%.*port= *"* *%%' \ -e 's%[^0-9][^0-9]*.*$%%' # Delete up to port=, throw away optional quote and optional # white space. # Throw away everything after the first non-digit. # This should leave us the port number all by itself... esac } # # We assume that the first port listed in the # is the one we should run servlet/snoop on. # GetWASSnoopPort() { GetWASPorts "$@" | head -n1 } # # Return information on the processname/id for the WAS ports # # pid/java is the expected output. Several lines, one per port... # # WASPortInfo() { pat="" once=yes PortCount=0 for j in $* do case $pat in "") pat="$j";; *) pat="$pat|$j";; esac PortCount=`expr $PortCount + 1` done - netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' + netstat -ltnp 2>/dev/null| $EGREP -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' } # # Return the number of WAS ports which are open # CheckWASPortsInUse() { count=`WASPortInfo "$@" | wc -l` echo $count } # # Return the pid(s) of the processes that have WAS ports open # WASPIDs() { WASPortInfo "$@" | sort -u | cut -f1 -d/ } # # The version of ps that returns all processes and their (long) args # It's only used by WAS_procs, which isn't used for anything ;-) # ps_long() { ps axww } # # The total set of WAS processes (single server only) # WAS_procs() { ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 } # # methods: What methods/operations do we support? # WAS_methods() { cat <<-! start stop status methods validate-all meta-data usage ! if have_binary $WGET then echo monitor fi } # # Return WAS status (silently) # WAS_status() { WASPorts=`GetWASPorts $1` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) false;; *) true;; esac } # # Report on WAS status to stdout... # WAS_report_status() { WASPorts=`GetWASPorts $1` PortCount=`echo $WASPorts | wc -w` PortCount=`echo $PortCount` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; *) pids=`WASPIDs $WASPorts` if [ $PortsInUse -ge $PortCount ] then ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." else ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." fi return $OCF_SUCCESS;; esac } # # Monitor WAS - does it really seem to be working? # # For this we invoke the snoop applet via wget. # # This is actually faster than WAS_status above... # WAS_monitor() { trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0 tmpfile=`maketempfile` || return 1 SnoopPort=`GetWASSnoopPort $1` output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/servlet/snoop 2>&1` rc=$? if [ $rc -eq 0 ] then if grep -i 'user-agent.*Wget' $tmpfile >/dev/null then : OK else ocf_log "err" "WAS: $1: no user-agent from snoop application" rc=$OCF_ERR_GENERIC fi else ocf_log "err" "WAS: $1: wget failure: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # Start WAS instance # WAS_start() { # Launch Arguments: # # -configFile # -nodeName # -serverName # -oltEnabled # -oltHost # -oltPort # -debugEnabled # -jdwpPort # -debugSource # -serverTrace # -serverTraceFile # -script [] # -platform # -noExecute # -help if [ -x $WASBIN/startServer.sh ] then cmd="$WASBIN/startServer.sh -configFile $1" else cmd="$WASBIN/startupServer.sh" fi if ocf_run $cmd then if WAS_wait_4_start $STARTTIME "$@" then #true return $OCF_SUCCESS else ocf_log "err" "WAS server $1 did not start correctly" return $OCF_ERR_GENERIC fi else #false return $OCF_ERR_GENERIC fi } # # Wait for WAS to actually start up. # # It seems to take between 30 and 60 seconds for it to # start up on a trivial WAS instance. # WAS_wait_4_start() { max=$1 retries=0 shift while [ $retries -lt $max ] do if WAS_status "$@" then return $OCF_SUCCESS else sleep 1 fi retries=`expr $retries + 1` done WAS_status "$@" } # # Shut down WAS # WAS_stop() { # They don't return good return codes... # And, they seem to allow anyone to stop WAS (!) if [ -x $WASBIN/stopServer.sh ] then ocf_run $WASBIN/stopServer.sh -configFile $1 else WASPorts=`GetWASPorts $1` kill `WASPIDs $WASPorts` fi if WAS_status $1 then ocf_log "err" "WAS: $1 did not stop correctly" #false return $OCF_ERR_GENERIC else #true return $OCF_SUCCESS fi } # # Check if the port is valid # CheckPort() { ocf_is_decimal "$1" && [ $1 -gt 0 ] } WAS_validate_all() { if [ -x $WASBIN/startServer.sh ]; then # $arg should be config file if [ ! -f "$arg" ]; then ocf_log err "Configuration file [$arg] does not exist" exit $OCF_ERR_ARGS fi # $arg should specify a valid port number at the very least local WASPorts=`GetWASPorts $arg` if [ -z "$WASPorts" ]; then ocf_log err "No port number specified in configuration file [$arg]" exit $OCF_ERR_CONFIGURED fi local port local have_valid_port=false for port in $WASPorts; do if CheckPort $port; then have_valid_port=true break fi done if [ "false" = "$have_valid_port" ]; then ocf_log err "No valid port number specified in configuration file [$arg]" exit $OCF_ERR_CONFIGURED fi elif [ -x $WASBIN/startupServer.sh ]; then # $arg should be port number if CheckPort "$arg"; then ocf_log err "Port number is required but [$arg] is not valid port number" exit $OCF_ERR_ARGS fi else # Do not know hot to validate_all ocf_log warn "Do not know how to validate-all, assuming validation OK" return $OCF_SUCCESS fi } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # # Supply default configuration parameter(s) # if ( [ -z $OCF_RESKEY_config ] && [ -z $OCF_RESKEY_port ] ) then if [ -f $DEFAULT ] then arg=$DEFAULT else arg=$DEFAULT_WASPORTS fi elif [ ! -z $OCF_RESKEY_config ] then arg=$OCF_RESKEY_config else arg=$OCF_RESKEY_port fi if [ ! -f $arg ] then case $arg in [0-9]*) ;; # ignore port numbers... *) ocf_log "err" "WAS configuration file $arg does not exist!" usage exit $OCF_ERR_ARGS;; esac fi # What kind of method was invoked? case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; start) WAS_start $arg exit $?;; stop) WAS_stop $arg exit $?;; status) WAS_report_status $arg exit $?;; monitor) WAS_monitor $arg exit $?;; validate-all) WAS_validate_all $arg exit $?;; methods) WAS_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/WAS6 b/heartbeat/WAS6 index 9e18cd682..e71eec930 100755 --- a/heartbeat/WAS6 +++ b/heartbeat/WAS6 @@ -1,546 +1,546 @@ #!/bin/sh # WAS6 # # Description: Manages a Websphere Application Server as an HA resource # # # Author: Ru Xiang Min # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2006 International Business Machines China, Ltd., Inc. # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 WAS::/opt/IBM/WebSphere/AppServer/profiles/default/config/cells/Node01Cell/nodes/Node01/serverindex.xml # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_profile # (WAS profile name, used for the single server edition of WAS6) ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### WAS_DIR=/opt/IBM/WebSphere/AppServer if [ ! -d $WAS_DIR ] then WAS_DIR=/usr/IBM/WebSphere/AppServer fi STARTTIME=300 # 5 minutes DEFAULT_WASPORTS="9080" # # WAS_BIN=$WAS_DIR/bin DEFAULT=default # # Print usage message # usage() { methods=`WAS_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-END usage: $0 ($methods) For the single server edition of WAS6, you have to set the following enviroment virable: OCF_RESKEY_profile (WAS profile name) $0 manages a Websphere Application Server 6(WAS6) as an HA resource The 'start' operation starts WAS6. The 'stop' operation stops WAS6. The 'status' operation reports whether WAS6 is running The 'monitor' operation reports whether the WAS6 seems to be working (httpd also needs to be working for this case) The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_profileName ) is valid The 'methods' operation reports on the methods $0 supports This is known to work with the Single Server edition of Websphere. The default profile name for the single server edition is: $DEFAULT The start and stop operations must be run as root. The status operation will report a pid of "-" for the WAS root process using unless it is run as root. If you don't have xmllint on your system, parsing of WAS configuration files is very primitive. We run servlet/snoop on the seventh transport port listed in the config file for the "monitor" operation. END } meta_data() { cat < 1.0 Resource script for WAS6. It manages a Websphere Application Server (WAS6) as an HA resource. Manages a WebSphere Application Server 6 instance The WAS profile name. profile name END } # # Reformat the XML document in a sort of canonical form # if we can. If we don't have xmllint, we just cat it out # and hope for the best ;-) # xmlcat() { if [ "X$XMLcat" = X ] then XMLcat=`which xmllint 2>/dev/null` if [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] then XMLcat=cat else XMLcat="$XMLcat --recover --format" fi fi for j in "$@" do ${XMLcat} "$j" done } # #This is a bit skanky, but it works anyway... # # It's not really skanky if we can find xmllint on the system, because it # reformats tags so they are all on one line, which is all we we need... # # # Get the numbers of the ports WAS should be listening on... # # If we don't have xmllint around, then the applicationserver and the # port= specification have to be on the same line in the XML config file. # GetWASPorts() { case $1 in [0-9]*) echo "$1" | tr ',' '\012';; *) xmlcat ${WAS_DIR}/profiles/${WAS_PROFILE_NAME}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml | grep port= | sed -e 's%.*port= *"* *%%' \ -e 's%[^0-9][^0-9]*.*$%%' # Delete up to port=, throw away optional quote and optional # white space. # Throw away everything after the first non-digit. # This should leave us the port number all by itself... esac } # # We assume that the seventh port listed in the serverindex.xml # is the one we should run servlet/snoop on. # GetWASSnoopPort() { GetWASPorts "$@" | sed -n '7p' } # # Return information on the processname/id for the WAS ports # # pid/java is the expected output. Several lines, one per port... # # WASPortInfo() { pat="" once=yes PortCount=0 for j in $* do case $pat in "") pat="$j";; *) pat="$pat|$j";; esac PortCount=`expr $PortCount + 1` done - netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' + netstat -ltnp 2>/dev/null| $EGREP -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' } # # Return the number of WAS ports which are open # CheckWASPortsInUse() { count=`WASPortInfo "$@" | wc -l` echo $count } # # Return the pid(s) of the processes that have WAS ports open # WASPIDs() { WASPortInfo "$@" | sort -u | cut -f1 -d/ } # # The version of ps that returns all processes and their (long) args # It's only used by WAS_procs, which isn't used for anything ;-) # ps_long() { ps axww } # # The total set of WAS processes (single server only) # WAS_procs() { ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 } # # methods: What methods/operations do we support? # WAS_methods() { cat <<-! start stop status methods validate-all meta-data usage ! if have_binary $WGET then echo " monitor" fi } # # Return WAS status (silently) # WAS_status() { WASPorts=`GetWASPorts $1` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) false;; *) true;; esac } # # Report on WAS status to stdout... # WAS_report_status() { WASPorts=`GetWASPorts $1` PortCount=`echo $WASPorts | wc -w` PortCount=`echo $PortCount` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; *) pids=`WASPIDs $WASPorts` if [ $PortsInUse -ge $PortCount ] then ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." else ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." fi return $OCF_SUCCESS;; esac } # # Monitor WAS - does it really seem to be working? # # For this we invoke the snoop applet via wget. # # This is actually faster than WAS_status above... # WAS_monitor() { trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0 tmpfile=`maketempfile` || exit 1 SnoopPort=`GetWASSnoopPort $1` output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/snoop 2>&1` rc=$? if [ $rc -eq 0 ] then if grep -i 'user-agent.*Wget' $tmpfile >/dev/null then : OK else ocf_log "err" "WAS: $1: no user-agent from snoop application" rc=$OCF_ERR_GENERIC fi else ocf_log "err" "WAS: $1: wget failure: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # Start WAS instance # WAS_start() { # Launch Arguments: # -nowait # -quiet # -logfile # -replacelog # -trace # -script [