diff --git a/README b/README index 2d46d3fc5..09d1f1c58 100644 --- a/README +++ b/README @@ -1,3 +1,12 @@ -Repository moved! Please use - https://github.com/ClusterLabs/resource-agents - git://github.com/ClusterLabs/resource-agents.git +This repository contains resource agents (RAs) compliant +with the Open Cluster Framework (OCF) specification. + +These resource agents are used by two cluster resource +management implementations: + +- Pacemaker +- rgmanager + +More information about OCF resource agents is available +in the OCF Resource Agent Developer's guide, in the +doc/dev-guides subdirectory. diff --git a/configure.ac b/configure.ac index f624e8e6d..97485a275 100644 --- a/configure.ac +++ b/configure.ac @@ -1,917 +1,917 @@ dnl dnl autoconf for Agents dnl dnl License: GNU General Public License (GPL) dnl =============================================== dnl Bootstrap dnl =============================================== AC_PREREQ(2.63) dnl Suggested structure: dnl information on the package dnl checks for programs dnl checks for libraries dnl checks for header files dnl checks for types dnl checks for structures dnl checks for compiler characteristics dnl checks for library functions dnl checks for system services AC_INIT([resource-agents], m4_esyscmd([make/git-version-gen .tarball-version]), [to_be_defined@foobar.org]) AC_USE_SYSTEM_EXTENSIONS CRM_DTD_VERSION="1.0" PKG_FEATURES="" AC_CONFIG_AUX_DIR(.) AC_CANONICAL_HOST dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below) dnl dnl Internal header: include/config.h dnl - Contains ALL defines dnl - include/config.h.in is generated automatically by autoheader dnl - NOT to be included in any header files except lha_internal.h dnl (which is also not to be included in any other header files) dnl dnl External header: include/agent_config.h dnl - Contains a subset of defines checked here dnl - Manually edit include/agent_config.h.in to have configure include new defines dnl - Should not include HAVE_* defines dnl - Safe to include anywhere AM_CONFIG_HEADER(include/config.h include/agent_config.h) ALL_LINGUAS="en fr" AC_ARG_WITH(version, [ --with-version=version Override package version (if you're a packager needing to pretend) ], [ PACKAGE_VERSION="$withval" ]) AC_ARG_WITH(pkg-name, [ --with-pkg-name=name Override package name (if you're a packager needing to pretend) ], [ PACKAGE_NAME="$withval" ]) dnl dnl AM_INIT_AUTOMAKE([1.11.1 foreign dist-bzip2 dist-xz]) dnl AM_INIT_AUTOMAKE([1.10.1 foreign dist-bzip2]) AC_DEFINE_UNQUOTED(AGENTS_VERSION, "$PACKAGE_VERSION", Current agents version) CC_IN_CONFIGURE=yes export CC_IN_CONFIGURE LDD=ldd dnl ======================================================================== dnl Compiler characteristics dnl ======================================================================== # check stolen from gnulib/m4/gnu-make.m4 if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then AC_MSG_ERROR([you don't seem to have GNU make; it is required]) fi AC_PROG_CC dnl Can force other with environment variable "CC". AM_PROG_CC_C_O AC_PROG_CC_STDC AC_PROG_AWK AC_PROG_LN_S AC_PROG_INSTALL AC_PROG_MAKE_SET AC_C_STRINGIZE AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_SSIZE_T AC_TYPE_UID_T AC_TYPE_UINT16_T AC_TYPE_UINT8_T AC_TYPE_UINT32_T AC_CHECK_SIZEOF(char) AC_CHECK_SIZEOF(short) AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(long long) AC_STRUCT_TIMEZONE dnl =============================================== dnl Helpers dnl =============================================== cc_supports_flag() { local CFLAGS="$@" AC_MSG_CHECKING(whether $CC supports "$@") AC_COMPILE_IFELSE([int main(){return 0;}] ,[RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)]) return $RC } extract_header_define() { AC_MSG_CHECKING(for $2 in $1) - Cfile=/tmp/extract_define.$2.${$} + Cfile=$srcdir/extract_define.$2.${$} printf "#include \n" > ${Cfile}.c printf "#include <%s>\n" $1 >> ${Cfile}.c printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c $CC $CFLAGS ${Cfile}.c -o ${Cfile} value=`${Cfile}` AC_MSG_RESULT($value) printf $value rm -f ${Cfile}.c ${Cfile} } dnl =============================================== dnl Configure Options dnl =============================================== dnl Some systems, like Solaris require a custom package name AC_ARG_WITH(pkgname, [ --with-pkgname=name name for pkg (typically for Solaris) ], [ PKGNAME="$withval" ], [ PKGNAME="LXHAhb" ], ) AC_SUBST(PKGNAME) AC_ARG_ENABLE([ansi], [ --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers. [default=yes]]) AC_ARG_ENABLE([fatal-warnings], [ --enable-fatal-warnings very pedantic and fatal warnings for gcc [default=yes]]) INITDIR="" AC_ARG_WITH(initdir, [ --with-initdir=DIR directory for init (rc) scripts [${INITDIR}]], [ INITDIR="$withval" ]) OCF_ROOT_DIR="/usr/lib/ocf" AC_ARG_WITH(ocf-root, [ --with-ocf-root=DIR directory for OCF scripts [${OCF_ROOT_DIR}]], [ if test x"$withval" = xprefix; then OCF_ROOT_DIR=${prefix}; else OCF_ROOT_DIR="$withval"; fi ]) HA_RSCTMPDIR=${localstatedir}/run/resource-agents AC_ARG_WITH(rsctmpdir, [ --with-rsctmpdir=DIR directory for resource agents state files [${HA_RSCTMPDIR}]], [ if test x"$withval" = xprefix; then HA_RSCTMPDIR=${prefix}; else HA_RSCTMPDIR="$withval"; fi ]) AC_ARG_ENABLE([libnet], [ --enable-libnet Use libnet for ARP based funcationality, [default=try]], [enable_libnet="$enableval"], [enable_libnet=try]) BUILD_RGMANAGER=0 BUILD_LINUX_HA=0 RASSET=all AC_ARG_WITH(ras-set, [ --with-ras-set=SET build/install only linux-ha or rgmanager resource-agents [default: all]], [ RASSET="$withval" ]) if test x$RASSET = xyes || test x$RASSET = xall ; then BUILD_RGMANAGER=1 BUILD_LINUX_HA=1 fi if test x$RASSET = xlinux-ha; then BUILD_LINUX_HA=1 fi if test x$RASSET = xrgmanager; then BUILD_RGMANAGER=1 fi if test $BUILD_LINUX_HA -eq 0 && test $BUILD_RGMANAGER -eq 0; then AC_MSG_ERROR([Are you really sure you want this package?]) exit 1 fi AM_CONDITIONAL(BUILD_LINUX_HA, test $BUILD_LINUX_HA -eq 1) AM_CONDITIONAL(BUILD_RGMANAGER, test $BUILD_RGMANAGER -eq 1) dnl =============================================== dnl General Processing dnl =============================================== INIT_EXT="" echo Our Host OS: $host_os/$host AC_MSG_NOTICE(Sanitizing prefix: ${prefix}) case $prefix in NONE) prefix=/usr;; esac AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix}) case $exec_prefix in dnl For consistency with Heartbeat, map NONE->$prefix NONE) exec_prefix=$prefix;; prefix) exec_prefix=$prefix;; esac AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR}) case $INITDIR in prefix) INITDIR=$prefix;; "") AC_MSG_CHECKING(which init (rc) directory to use) for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \ /usr/local/etc/rc.d /etc/rc.d do if test -d $initdir then INITDIR=$initdir break fi done if test -z $INITDIR then INITDIR=${sysconfdir}/init.d fi AC_MSG_RESULT($INITDIR);; esac AC_SUBST(INITDIR) if test "${prefix}" = "/usr"; then INITDIRPREFIX="$INITDIR" else INITDIRPREFIX="${prefix}/$INITDIR" fi AC_SUBST(INITDIRPREFIX) AC_MSG_NOTICE(Sanitizing libdir: ${libdir}) case $libdir in dnl For consistency with Heartbeat, map NONE->$prefix *prefix*|NONE) AC_MSG_CHECKING(which lib directory to use) for aDir in lib64 lib do trydir="${exec_prefix}/${aDir}" if test -d ${trydir} then libdir=${trydir} break fi done AC_MSG_RESULT($libdir); ;; esac dnl Expand autoconf variables so that we dont end up with '${prefix}' dnl in #defines and python scripts dnl NOTE: Autoconf deliberately leaves them unexpanded to allow dnl make exec_prefix=/foo install dnl No longer being able to do this seems like no great loss to me... eval prefix="`eval echo ${prefix}`" eval exec_prefix="`eval echo ${exec_prefix}`" eval bindir="`eval echo ${bindir}`" eval sbindir="`eval echo ${sbindir}`" eval libexecdir="`eval echo ${libexecdir}`" eval datadir="`eval echo ${datadir}`" eval sysconfdir="`eval echo ${sysconfdir}`" eval sharedstatedir="`eval echo ${sharedstatedir}`" eval localstatedir="`eval echo ${localstatedir}`" eval libdir="`eval echo ${libdir}`" eval includedir="`eval echo ${includedir}`" eval oldincludedir="`eval echo ${oldincludedir}`" eval infodir="`eval echo ${infodir}`" eval mandir="`eval echo ${mandir}`" dnl docdir is a recent addition to autotools eval docdir="`eval echo ${docdir}`" if test "x$docdir" = "x"; then docdir="`eval echo ${datadir}/doc`" fi AC_SUBST(docdir) dnl Home-grown variables eval INITDIR="${INITDIR}" for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ sharedstatedir localstatedir libdir includedir oldincludedir infodir \ mandir INITDIR docdir do dirname=`eval echo '${'${j}'}'` if test ! -d "$dirname" then AC_MSG_WARN([$j directory ($dirname) does not exist!]) fi done dnl This OS-based decision-making is poor autotools practice; dnl feature-based mechanisms are strongly preferred. dnl dnl So keep this section to a bare minimum; regard as a "necessary evil". REBOOT_OPTIONS="-f" POWEROFF_OPTIONS="-f" case "$host_os" in *bsd*) LIBS="-L/usr/local/lib" CPPFLAGS="$CPPFLAGS -I/usr/local/include" INIT_EXT=".sh" ;; *solaris*) REBOOT_OPTIONS="-n" POWEROFF_OPTIONS="-n" ;; *linux*) AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform) POWEROFF_OPTIONS="-nf" REBOOT_OPTIONS="-nf" ;; darwin*) AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform) LIBS="$LIBS -L${prefix}/lib" CFLAGS="$CFLAGS -I${prefix}/include" ;; esac AC_SUBST(INIT_EXT) AC_DEFINE_UNQUOTED(HA_LOG_FACILITY, LOG_DAEMON, Default logging facility) AC_MSG_NOTICE(Host CPU: $host_cpu) case "$host_cpu" in ppc64|powerpc64) case $CFLAGS in *powerpc64*) ;; *) if test "$GCC" = yes; then CFLAGS="$CFLAGS -m64" fi ;; esac esac AC_MSG_CHECKING(which format is needed to print uint64_t) case "$host_cpu" in s390x)U64T="%lu";; *64*) U64T="%lu";; *) U64T="%llu";; esac AC_MSG_RESULT($U64T) AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t) dnl Variables needed for substitution AC_CHECK_HEADERS(heartbeat/glue_config.h) if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then OCF_ROOT_DIR=`extract_header_define heartbeat/glue_config.h OCF_ROOT_DIR` else enable_libnet=no fi AC_DEFINE_UNQUOTED(OCF_ROOT_DIR,"$OCF_ROOT_DIR", OCF root directory - specified by the OCF standard) AC_SUBST(OCF_ROOT_DIR) GLUE_STATE_DIR=${localstatedir}/run AC_DEFINE_UNQUOTED(GLUE_STATE_DIR,"$GLUE_STATE_DIR", Where to keep state files and sockets) AC_SUBST(GLUE_STATE_DIR) AC_DEFINE_UNQUOTED(HA_VARRUNDIR,"$GLUE_STATE_DIR", Where Heartbeat keeps state files and sockets - old name) HA_VARRUNDIR="$GLUE_STATE_DIR" AC_SUBST(HA_VARRUNDIR) # Expand $prefix eval HA_RSCTMPDIR="`eval echo ${HA_RSCTMPDIR}`" AC_DEFINE_UNQUOTED(HA_RSCTMPDIR,"$HA_RSCTMPDIR", Where Resouce agents keep state files) AC_SUBST(HA_RSCTMPDIR) dnl Eventually move out of the heartbeat dir tree and create symlinks when needed HA_VARLIBHBDIR=${localstatedir}/lib/heartbeat AC_DEFINE_UNQUOTED(HA_VARLIBHBDIR,"$HA_VARLIBHBDIR", Whatever this used to mean) AC_SUBST(HA_VARLIBHBDIR) OCF_RA_DIR="${OCF_ROOT_DIR}/resource.d/" AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs) AC_SUBST(OCF_RA_DIR) if test "${prefix}" = "/usr"; then OCF_RA_DIR_PREFIX="$OCF_RA_DIR" else OCF_RA_DIR_PREFIX="${prefix}/$OCF_RA_DIR" fi AC_SUBST(OCF_RA_DIR_PREFIX) OCF_LIB_DIR="${OCF_ROOT_DIR}/lib/" AC_DEFINE_UNQUOTED(OCF_LIB_DIR,"$OCF_LIB_DIR", Location for shared code for OCF RAs) AC_SUBST(OCF_LIB_DIR) if test "${prefix}" = "/usr"; then OCF_LIB_DIR_PREFIX="$OCF_LIB_DIR" else OCF_LIB_DIR_PREFIX="${prefix}/$OCF_LIB_DIR" fi AC_SUBST(OCF_LIB_DIR_PREFIX) dnl =============================================== dnl rgmanager ras bits dnl =============================================== LOGDIR=${localstatedir}/log/cluster CLUSTERDATA=${datadir}/cluster AC_SUBST([LOGDIR]) AC_SUBST([CLUSTERDATA]) dnl =============================================== dnl Program Paths dnl =============================================== PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin" export PATH AM_PATH_PYTHON AC_CHECK_PROGS(MAKE, gmake make) AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh) AC_PATH_PROGS(SCP, scp, /usr/bin/scp) AC_PATH_PROGS(TAR, tar) AC_PATH_PROGS(MD5, md5) AC_PATH_PROGS(TEST, test) AC_PATH_PROGS(PING, ping, /bin/ping) AC_PATH_PROGS(IFCONFIG, ifconfig, /sbin/ifconfig) AC_PATH_PROGS(MAILCMD, mailx mail, mail) AC_PATH_PROGS(EGREP, egrep) AC_PATH_PROGS(PKGCONFIG, pkg-config) AC_SUBST(MAILCMD) AC_SUBST(EGREP) AC_SUBST(SHELL) AC_SUBST(PING) AC_SUBST(TEST) AC_PATH_PROGS(ROUTE, route) AC_DEFINE_UNQUOTED(ROUTE, "$ROUTE", path to route command) AC_MSG_CHECKING(ifconfig option to list interfaces) for IFCONFIG_A_OPT in "-A" "-a" "" do $IFCONFIG $IFCONFIG_A_OPT > /dev/null 2>&1 if test "$?" = 0 then AC_DEFINE_UNQUOTED(IFCONFIG_A_OPT, "$IFCONFIG_A_OPT", option for ifconfig command) AC_MSG_RESULT($IFCONFIG_A_OPT) break fi done AC_SUBST(IFCONFIG_A_OPT) if test x"${MAKE}" = x""; then AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE}) fi dnl =============================================== dnl Libraries dnl =============================================== AC_CHECK_LIB(socket, socket) AC_CHECK_LIB(gnugetopt, getopt_long) dnl if available if test x"${PKGCONFIG}" = x""; then AC_MSG_ERROR(You need pkgconfig installed in order to build ${PACKAGE}) fi if test "x${enable_thread_safe}" = "xyes"; then GPKGNAME="gthread-2.0" else GPKGNAME="glib-2.0" fi if $PKGCONFIG --exists $GPKGNAME then GLIBCONFIG="$PKGCONFIG $GPKGNAME" else set -x echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH $PKGCONFIG --exists $GPKGNAME; echo $? $PKGCONFIG --cflags $GPKGNAME; echo $? $PKGCONFIG $GPKGNAME; echo $? set +x AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE}) fi AC_MSG_RESULT(using $GLIBCONFIG) if test "X$GLIBCONFIG" != X; then AC_MSG_CHECKING(for special glib includes: ) GLIBHEAD=`$GLIBCONFIG --cflags` AC_MSG_RESULT($GLIBHEAD) CPPFLAGS="$CPPFLAGS $GLIBHEAD" AC_MSG_CHECKING(for glib library flags) GLIBLIB=`$GLIBCONFIG --libs` AC_MSG_RESULT($GLIBLIB) LIBS="$LIBS $GLIBLIB" fi dnl ======================================================================== dnl Headers dnl ======================================================================== AC_HEADER_STDC AC_CHECK_HEADERS(sys/socket.h) AC_CHECK_HEADERS(sys/sockio.h) AC_CHECK_HEADERS([arpa/inet.h]) AC_CHECK_HEADERS([fcntl.h]) AC_CHECK_HEADERS([limits.h]) AC_CHECK_HEADERS([malloc.h]) AC_CHECK_HEADERS([netdb.h]) AC_CHECK_HEADERS([netinet/in.h]) AC_CHECK_HEADERS([sys/file.h]) AC_CHECK_HEADERS([sys/ioctl.h]) AC_CHECK_HEADERS([sys/param.h]) AC_CHECK_HEADERS([sys/time.h]) AC_CHECK_HEADERS([syslog.h]) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_FUNC_FORK AC_FUNC_STRNLEN AC_CHECK_FUNCS([alarm gettimeofday inet_ntoa memset mkdir socket uname]) AC_CHECK_FUNCS([strcasecmp strchr strdup strerror strrchr strspn strstr strtol strtoul]) dnl 'reboot()' system call: one argument (e.g. Linux) or two (e.g. Solaris)? dnl AC_CACHE_CHECK([number of arguments in reboot system call], ac_cv_REBOOT_ARGS,[ AC_TRY_COMPILE( [#include ], [(void)reboot(0);], ac_cv_REBOOT_ARGS=1, [AC_TRY_COMPILE( [#include ], [(void)reboot(0,(void *)0);], ac_cv_REBOOT_ARGS=2, ac_cv_REBOOT_ARGS=0 )], ac_cv_REBOOT_ARGS=0 ) ] ) dnl Argument count of 0 suggests no known 'reboot()' call. if test "$ac_cv_REBOOT_ARGS" -ge "1"; then AC_DEFINE_UNQUOTED(REBOOT_ARGS,$ac_cv_REBOOT_ARGS,[number of arguments for reboot system call]) fi AC_PATH_PROGS(REBOOT, reboot, /sbin/reboot) AC_SUBST(REBOOT) AC_SUBST(REBOOT_OPTIONS) AC_DEFINE_UNQUOTED(REBOOT, "$REBOOT", path to the reboot command) AC_DEFINE_UNQUOTED(REBOOT_OPTIONS, "$REBOOT_OPTIONS", reboot options) AC_PATH_PROGS(POWEROFF_CMD, poweroff, /sbin/poweroff) AC_SUBST(POWEROFF_CMD) AC_SUBST(POWEROFF_OPTIONS) AC_DEFINE_UNQUOTED(POWEROFF_CMD, "$POWEROFF_CMD", path to the poweroff command) AC_DEFINE_UNQUOTED(POWEROFF_OPTIONS, "$POWEROFF_OPTIONS", poweroff options) AC_PATH_PROGS(XSLTPROC, xsltproc) AM_CONDITIONAL(BUILD_DOC, test "x$XSLTPROC" != "x" ) if test "x$XSLTPROC" = "x"; then AC_MSG_WARN([xsltproc not installed, unable to (re-)build manual pages]) fi AC_SUBST(XSLTPROC) AC_PATH_PROGS(POD2MAN, pod2man) AM_CONDITIONAL(BUILD_POD_DOC, test "x$POD2MAN" != "x" ) if test "x$POD2MAN" = "x"; then AC_MSG_WARN([pod2man not installed, unable to (re-)build ldirector manual page]) fi AC_SUBST(POD2MAN) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function])) dnl ======================================================================== dnl sfex dnl ======================================================================== build_sfex=no case $host_os in *Linux*|*linux*) if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then build_sfex=yes fi ;; esac AM_CONDITIONAL(BUILD_SFEX, test "$build_sfex" = "yes" ) dnl ======================================================================== dnl tickle (needs port to BSD platforms) dnl ======================================================================== AC_CHECK_MEMBERS([struct iphdr.saddr],,,[[#include ]]) AM_CONDITIONAL(BUILD_TICKLE, test "$ac_cv_member_struct_iphdr_saddr" = "yes" ) dnl ======================================================================== dnl libnet dnl ======================================================================== libnet="" libnet_version="none" LIBNETLIBS="" LIBNETDEFINES="" AC_MSG_CHECKING(if libnet is required) libnet_fatal=$enable_libnet case $enable_libnet in no) ;; yes|libnet10|libnet11|10|11) libnet_fatal=yes;; try) case $host_os in *Linux*|*linux*) libnet_fatal=no;; *) libnet_fatal=yes;; dnl legacy behavior esac ;; *) libnet_fatal=yes; enable_libnet=try;; esac AC_MSG_RESULT($libnet_fatal) if test "x$enable_libnet" != "xno"; then AC_PATH_PROGS(LIBNETCONFIG, libnet-config) AC_CHECK_LIB(nsl, t_open) dnl -lnsl AC_CHECK_LIB(socket, socket) dnl -lsocket AC_CHECK_LIB(net, libnet_get_hwaddr, LIBNETLIBS=" -lnet", []) fi AC_MSG_CHECKING(for libnet) if test "x$LIBNETLIBS" != "x" -o "x$enable_libnet" = "xlibnet11"; then LIBNETDEFINES="" if test "$ac_cv_lib_nsl_t_open" = yes; then LIBNETLIBS="-lnsl $LIBNETLIBS" fi if test "$ac_cv_lib_socket_socket" = yes; then LIBNETLIBS="-lsocket $LIBNETLIBS" fi libnet=net libnet_version="libnet1.1" fi if test "x$enable_libnet" = "xtry" -o "x$enable_libnet" = "xlibnet10"; then if test "x$LIBNETLIBS" = x -a "x${LIBNETCONFIG}" != "x" ; then LIBNETDEFINES="`$LIBNETCONFIG --defines` `$LIBNETCONFIG --cflags`"; LIBNETLIBS="`$LIBNETCONFIG --libs`"; libnet_version="libnet1.0 (old)" case $LIBNETLIBS in *-l*) libnet=`echo $LIBNETLIBS | sed 's%.*-l%%'`;; *) libnet_version=none;; esac CPPFLAGS="$CPPFLAGS $LIBNETDEFINES" AC_CHECK_HEADERS(libnet.h) if test "$ac_cv_header_libnet_h" = no; then libnet_version=none fi fi fi AC_MSG_RESULT(found $libnet_version) if test "$libnet_version" = none; then LIBNETLIBS="" LIBNETDEFINES="" if test $libnet_fatal = yes; then AC_MSG_ERROR(libnet not found) fi else AC_CHECK_LIB($libnet,libnet_init, [new_libnet=yes; AC_DEFINE(HAVE_LIBNET_1_1_API, 1, Libnet 1.1 API)], [new_libnet=no; AC_DEFINE(HAVE_LIBNET_1_0_API, 1, Libnet 1.0 API)],$LIBNETLIBS) AC_SUBST(LIBNETLIBS) fi if test "$new_libnet" = yes; then AC_MSG_CHECKING(for libnet API 1.1.4: ) save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -fgnu89-inline -Wall -Werror" AC_COMPILE_IFELSE([#include int main(){libnet_t *l=NULL; libnet_pblock_record_ip_offset(l, l->total_size); return(0); }], [AC_MSG_RESULT(no)], [AC_DEFINE(HAVE_LIBNET_1_1_4_API, 1, Libnet 1.1.4 API) AC_MSG_RESULT(yes)]) CFLAGS="$save_CFLAGS" fi sendarp_linux=0 case $host_os in *Linux*|*linux*) sendarp_linux=1;; esac AC_SUBST(LIBNETLIBS) AC_SUBST(LIBNETDEFINES) AM_CONDITIONAL(SENDARP_LINUX, test $sendarp_linux = 1 ) AM_CONDITIONAL(USE_LIBNET, test "x$libnet_version" != "xnone" ) dnl ************************************************************************ dnl * Check for netinet/icmp6.h to enable the IPv6addr resource agent AC_CHECK_HEADERS(netinet/icmp6.h,[],[],[#include ]) AM_CONDITIONAL(USE_IPV6ADDR, test "$ac_cv_header_netinet_icmp6_h" = yes ) dnl ======================================================================== dnl Compiler flags dnl ======================================================================== dnl Make sure that CFLAGS is not exported. If the user did dnl not have CFLAGS in their environment then this should have dnl no effect. However if CFLAGS was exported from the user's dnl environment, then the new CFLAGS will also be exported dnl to sub processes. CC_ERRORS="" CC_EXTRAS="" if export | fgrep " CFLAGS=" > /dev/null; then export -n CFLAGS || true # We don't want to bomb out if this fails SAVED_CFLAGS="$CFLAGS" unset CFLAGS CFLAGS="$SAVED_CFLAGS" unset SAVED_CFLAGS fi if test "$GCC" != yes; then CFLAGS="$CFLAGS -g" enable_fatal_warnings=no else CFLAGS="$CFLAGS -ggdb3" # We had to eliminate -Wnested-externs because of libtool changes # Also remove -Waggregate-return because we use one libnet # call which returns a struct EXTRA_FLAGS="-fgnu89-inline -fstack-protector-all -Wall -Wbad-function-cast -Wcast-qual -Wcast-align -Wdeclaration-after-statement -Wendif-labels -Wfloat-equal -Wformat=2 -Wformat-security -Wformat-nonliteral -Winline -Wmissing-prototypes -Wmissing-declarations -Wmissing-format-attribute -Wnested-externs -Wno-long-long -Wno-strict-aliasing -Wpointer-arith -Wstrict-prototypes -Wunsigned-char -Wwrite-strings" # Additional warnings it might be nice to enable one day # -Wshadow # -Wunreachable-code for j in $EXTRA_FLAGS do if cc_supports_flag $j then CC_EXTRAS="$CC_EXTRAS $j" fi done dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'` AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4) dnl System specific options case "$host_os" in *linux*|*bsd*) if test "${enable_fatal_warnings}" = "unknown"; then enable_fatal_warnings=yes fi ;; esac if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then enable_fatal_warnings=yes else enable_fatal_warnings=no fi if test "x${enable_ansi}" != xno && cc_supports_flag -std=iso9899:199409 ; then AC_MSG_NOTICE(Enabling ANSI Compatibility) CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY" fi AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS}) fi CFLAGS="$CFLAGS $CC_EXTRAS" NON_FATAL_CFLAGS="$CFLAGS" AC_SUBST(NON_FATAL_CFLAGS) dnl dnl We reset CFLAGS to include our warnings *after* all function dnl checking goes on, so that our warning flags don't keep the dnl AC_*FUNCS() calls above from working. In particular, -Werror will dnl *always* cause us troubles if we set it before here. dnl dnl if test "x${enable_fatal_warnings}" = xyes ; then AC_MSG_NOTICE(Enabling Fatal Warnings) CFLAGS="$CFLAGS -Werror" fi AC_SUBST(CFLAGS) dnl This is useful for use in Makefiles that need to remove one specific flag CFLAGS_COPY="$CFLAGS" AC_SUBST(CFLAGS_COPY) AC_SUBST(LOCALE) AC_SUBST(CC) AC_SUBST(MAKE) dnl The Makefiles and shell scripts we output AC_CONFIG_FILES(Makefile \ include/Makefile \ heartbeat/Makefile \ heartbeat/ocf-binaries \ heartbeat/ocf-directories \ heartbeat/ocf-shellfuncs \ heartbeat/shellfuncs \ tools/Makefile \ tools/ocf-tester \ tools/ocft/Makefile \ tools/ocft/ocft \ tools/ocft/caselib \ tools/ocft/README \ tools/ocft/README.zh_CN \ ldirectord/Makefile \ ldirectord/ldirectord \ ldirectord/init.d/Makefile \ ldirectord/init.d/ldirectord \ ldirectord/init.d/ldirectord.debian \ ldirectord/init.d/ldirectord.debian.default \ ldirectord/logrotate.d/Makefile \ ldirectord/OCF/Makefile \ ldirectord/OCF/ldirectord \ doc/Makefile \ doc/man/Makefile \ rgmanager/Makefile \ rgmanager/src/Makefile \ rgmanager/src/resources/Makefile \ rgmanager/src/resources/utils/Makefile \ ) dnl Now process the entire list of files added by previous dnl calls to AC_CONFIG_FILES() AC_OUTPUT() dnl ***************** dnl Configure summary dnl ***************** AC_MSG_RESULT([]) AC_MSG_RESULT([$PACKAGE configuration:]) AC_MSG_RESULT([ Version = ${VERSION}]) AC_MSG_RESULT([ Build Version = $Format:%H$]) AC_MSG_RESULT([ Features =${PKG_FEATURES}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ Prefix = ${prefix}]) AC_MSG_RESULT([ Executables = ${sbindir}]) AC_MSG_RESULT([ Man pages = ${mandir}]) AC_MSG_RESULT([ Libraries = ${libdir}]) AC_MSG_RESULT([ Header files = ${includedir}]) AC_MSG_RESULT([ Arch-independent files = ${datadir}]) AC_MSG_RESULT([ Documentation = ${docdir}]) AC_MSG_RESULT([ State information = ${localstatedir}]) AC_MSG_RESULT([ System configuration = ${sysconfdir}]) AC_MSG_RESULT([ RA state files = ${HA_RSCTMPDIR}]) AC_MSG_RESULT([ AIS Plugins = ${LCRSODIR}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ CFLAGS = ${CFLAGS}]) AC_MSG_RESULT([ Libraries = ${LIBS}]) AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am index 15de91603..a7dae80d9 100644 --- a/doc/man/Makefile.am +++ b/doc/man/Makefile.am @@ -1,161 +1,162 @@ # # doc: Linux-HA resource agents # # Copyright (C) 2009 Florian Haas # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(doc_DATA) $(REFENTRY_STYLESHEET) \ mkappendix.sh ralist.sh CLEANFILES = $(man_MANS) $(xmlfiles) metadata-*.xml STYLESHEET_PREFIX ?= http://docbook.sourceforge.net/release/xsl/current MANPAGES_STYLESHEET ?= $(STYLESHEET_PREFIX)/manpages/docbook.xsl HTML_STYLESHEET ?= $(STYLESHEET_PREFIX)/xhtml/docbook.xsl FO_STYLESHEET ?= $(STYLESHEET_PREFIX)/fo/docbook.xsl REFENTRY_STYLESHEET ?= ra2refentry.xsl XSLTPROC_OPTIONS ?= --xinclude XSLTPROC_MANPAGES_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_HTML_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS) radir = $(top_srcdir)/heartbeat # OCF_ROOT=. is necessary due to a sanity check in ocf-shellfuncs # (which tests whether $OCF_ROOT points to a directory metadata-%.xml: $(radir)/% OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ metadata-IPv6addr.xml: ../../heartbeat/IPv6addr OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ # Please note: we can't name the man pages # ocf:heartbeat:. Believe me, I've tried. It looks like it # works, but then it doesn't. While make can deal correctly with # colons in target names (when properly escaped), it royally messes up # when it is deals with _dependencies_ that contain colons. See Bug # 12126 on savannah.gnu.org. But, maybe it gets fixed soon, it was # first reported in 1995 and added to Savannah in in 2005... if BUILD_DOC man_MANS = ocf_heartbeat_AoEtarget.7 \ ocf_heartbeat_AudibleAlarm.7 \ ocf_heartbeat_ClusterMon.7 \ ocf_heartbeat_CTDB.7 \ ocf_heartbeat_Delay.7 \ ocf_heartbeat_Dummy.7 \ ocf_heartbeat_EvmsSCC.7 \ ocf_heartbeat_Evmsd.7 \ ocf_heartbeat_Filesystem.7 \ ocf_heartbeat_ICP.7 \ ocf_heartbeat_IPaddr.7 \ ocf_heartbeat_IPaddr2.7 \ ocf_heartbeat_IPsrcaddr.7 \ ocf_heartbeat_LVM.7 \ ocf_heartbeat_LinuxSCSI.7 \ ocf_heartbeat_MailTo.7 \ ocf_heartbeat_ManageRAID.7 \ ocf_heartbeat_ManageVE.7 \ ocf_heartbeat_Pure-FTPd.7 \ ocf_heartbeat_Raid1.7 \ ocf_heartbeat_Route.7 \ ocf_heartbeat_SAPDatabase.7 \ ocf_heartbeat_SAPInstance.7 \ ocf_heartbeat_SendArp.7 \ ocf_heartbeat_ServeRAID.7 \ ocf_heartbeat_SphinxSearchDaemon.7 \ ocf_heartbeat_Squid.7 \ ocf_heartbeat_Stateful.7 \ ocf_heartbeat_SysInfo.7 \ ocf_heartbeat_VIPArip.7 \ ocf_heartbeat_VirtualDomain.7 \ ocf_heartbeat_WAS.7 \ ocf_heartbeat_WAS6.7 \ ocf_heartbeat_WinPopup.7 \ ocf_heartbeat_Xen.7 \ ocf_heartbeat_Xinetd.7 \ ocf_heartbeat_anything.7 \ ocf_heartbeat_apache.7 \ ocf_heartbeat_asterisk.7 \ ocf_heartbeat_conntrackd.7 \ ocf_heartbeat_db2.7 \ ocf_heartbeat_drbd.7 \ ocf_heartbeat_eDir88.7 \ ocf_heartbeat_ethmonitor.7 \ ocf_heartbeat_exportfs.7 \ ocf_heartbeat_fio.7 \ ocf_heartbeat_iSCSILogicalUnit.7 \ ocf_heartbeat_iSCSITarget.7 \ ocf_heartbeat_ids.7 \ ocf_heartbeat_iscsi.7 \ ocf_heartbeat_jboss.7 \ ocf_heartbeat_lxc.7 \ ocf_heartbeat_mysql.7 \ ocf_heartbeat_mysql-proxy.7 \ ocf_heartbeat_named.7 \ ocf_heartbeat_nfsserver.7 \ ocf_heartbeat_nginx.7 \ ocf_heartbeat_oracle.7 \ ocf_heartbeat_oralsnr.7 \ ocf_heartbeat_pgsql.7 \ ocf_heartbeat_pingd.7 \ ocf_heartbeat_portblock.7 \ ocf_heartbeat_postfix.7 \ + ocf_heartbeat_pound.7 \ ocf_heartbeat_proftpd.7 \ ocf_heartbeat_rsyncd.7 \ ocf_heartbeat_rsyslog.7 \ ocf_heartbeat_scsi2reservation.7 \ ocf_heartbeat_sfex.7 \ ocf_heartbeat_slapd.7 \ ocf_heartbeat_symlink.7 \ ocf_heartbeat_syslog-ng.7 \ ocf_heartbeat_tomcat.7 \ ocf_heartbeat_varnish.7 \ ocf_heartbeat_vmware.7 if USE_IPV6ADDR man_MANS += ocf_heartbeat_IPv6addr.7 endif xmlfiles = $(man_MANS:.7=.xml) %.1 %.5 %.7 %.8: %.xml $(XSLTPROC) \ $(XSLTPROC_MANPAGES_OPTIONS) \ $(MANPAGES_STYLESHEET) $< ocf_heartbeat_%.xml: metadata-%.xml $(srcdir)/$(REFENTRY_STYLESHEET) $(XSLTPROC) --novalid \ --stringparam package $(PACKAGE_NAME) \ --stringparam version $(VERSION) \ --output $@ \ $(srcdir)/$(REFENTRY_STYLESHEET) $< ocf_resource_agents.xml: $(xmlfiles) mkappendix.sh ./mkappendix.sh $(xmlfiles) > $@ %.html: %.xml $(XSLTPROC) \ $(XSLTPROC_HTML_OPTIONS) \ --output $@ \ $(HTML_STYLESHEET) $< xml: ocf_resource_agents.xml endif diff --git a/doc/man/ra2refentry.xsl b/doc/man/ra2refentry.xsl index 9a1f69d5c..41a60aabe 100644 --- a/doc/man/ra2refentry.xsl +++ b/doc/man/ra2refentry.xsl @@ -1,513 +1,539 @@ resource-agents ocf heartbeat 7 | __ re-ra- - + Linux-HA contributors (see the resource agent source for information about individual authors) OCF resource agents - + + + + + + + + + + + + + + + + + - - - - - - - - - + + + + + + + + + + + + + + - + - + + + + - + + + + Description This resource agent may be configured for native migration if available in the cluster manager. For Pacemaker, the allow-migrate="true" meta attribute enables native migration. Supported Parameters This resource agent does not support any parameters. - - + + - + - + - + - + - + ( unique, required optional , , , default " " default false no default ) Supported Actions This resource agent does not advertise any supported actions. This resource agent supports the following actions (operations): Starts the resource. Stops the resource. Performs a status check. Performs a detailed status check. Promotes the resource to the Master role. Demotes the resource to the Slave role. Executes steps necessary for migrating the resource away from the node. Executes steps necessary for migrating the resource to the node. Performs a validation of the resource configuration. Retrieves resource agent metadata (internal use only). Suggested minimum timeout: . Suggested interval: . Example The following is an example configuration for a resource using the crm8 shell: primitive p_ : : \ params \ \ meta allow-migrate="true" \ ms ms_ p_ \ meta notify="true" interleave="true" - + = \ " " op \ =" " See also http://www.linux-ha.org/wiki/ _(resource_agent) diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem index a31931b3b..9c9a66483 100755 --- a/heartbeat/Filesystem +++ b/heartbeat/Filesystem @@ -1,1096 +1,1122 @@ #!/bin/sh # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Filesystem # Description: Manages a Filesystem on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # # usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data} # # OCF parameters are as below: # OCF_RESKEY_device # OCF_RESKEY_directory # OCF_RESKEY_fstype # OCF_RESKEY_options # OCF_RESKEY_statusfile_prefix # OCF_RESKEY_run_fsck # OCF_RESKEY_fast_stop +# OCF_RESKEY_force_clones # #OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0 # Or a -U or -L option for mount, or an NFS mount specification #OCF_RESKEY_directory : the mount point for the filesystem #OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2 #OCF_RESKEY_options : options to be given to the mount command via -o #OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring #OCF_RESKEY_run_fsck : fsck execution mode: auto(default)/force/no #OCF_RESKEY_fast_stop : fast stop: yes(default)/no +#OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts +# for each brick in a glusterfs setup # # # This assumes you want to manage a filesystem on a shared (SCSI) bus, # on a replicated device (such as DRBD), or a network filesystem (such # as NFS or Samba). # # Do not put this filesystem in /etc/fstab. This script manages all of # that for you. # # NOTE: If 2 or more nodes mount the same file system read-write, and # that file system is not designed for that specific purpose # (such as GFS or OCFS2), and is not a network file system like # NFS or Samba, then the filesystem is going to become # corrupted. # # As a result, you should use this together with the stonith # option and redundant, independent communications paths. # # If you don't do this, don't blame us when you scramble your # disk. ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults DFLT_STATUSDIR=".Filesystem_status/" # Variables used by multiple methods HOSTOS=`uname` # The status file is going to an extra directory, by default # prefix=${OCF_RESKEY_statusfile_prefix} : ${prefix:=$DFLT_STATUSDIR} suffix="${OCF_RESOURCE_INSTANCE}" [ "$OCF_RESKEY_CRM_meta_clone" ] && suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone" suffix="${suffix}_`uname -n`" STATUSFILE=${OCF_RESKEY_directory}/$prefix$suffix + ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|meta-data} EOT } meta_data() { cat < -1.0 +1.1 Resource script for Filesystem. It manages a Filesystem on a shared storage medium. The standard monitor operation of depth 0 (also known as probe) checks if the filesystem is mounted. If you want deeper tests, set OCF_CHECK_LEVEL to one of the following values: 10: read first 16 blocks of the device (raw read) This doesn't exercise the filesystem at all, but the device on which the filesystem lives. This is noop for non-block devices such as NFS, SMBFS, or bind mounts. 20: test if a status file can be written and read The status file must be writable by root. This is not always the case with an NFS mount, as NFS exports usually have the "root_squash" option set. In such a setup, you must either use read-only monitoring (depth=10), export with "no_root_squash" on your NFS server, or grant world write permissions on the directory where the status file is to be placed. Manages filesystem mounts The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. block device The mount point for the filesystem. mount point The type of filesystem to be mounted. filesystem type Any extra options to be given as -o options to mount. For bind mounts, add "bind" here and set fstype to "none". We will do the right thing for options such as "bind,ro". options The prefix to be used for a status file for resource monitoring with depth 20. If you don't specify this parameter, all status files will be created in a separate directory. status file prefix Specify how to decide whether to run fsck or not. "auto" : decide to run fsck depending on the fstype(default) "force" : always run fsck regardless of the fstype "no" : do not run fsck ever. run_fsck Normally, we expect no users of the filesystem and the stop operation to finish quickly. If you cannot control the filesystem users easily and want to prevent the stop action from failing, then set this parameter to "no" and add an appropriate timeout for the stop operation. fast stop + + +The usage of a clone setup for local filesystems is forbidden +by default. For special setups like glusterfs, cloning a mount +of a local device with a filesystem like ext4 or xfs, independently +on several nodes is a valid use-case. + +Only set this to "true" if you know what you are doing! + +allow running as a clone, regardless of filesystem type + + + END } # # Make sure the kernel does the right thing with the FS buffers # This function should be called after unmounting and before mounting # It may not be necessary in 2.4 and later kernels, but it shouldn't hurt # anything either... # # It's really a bug that you have to do this at all... # flushbufs() { if have_binary $BLOCKDEV ; then if [ "$blockdevice" = "yes" ] ; then $BLOCKDEV --flushbufs $1 return $? fi fi return 0 } # Take advantage of /etc/mtab if present, use portable mount command # otherwise. Normalize format to "dev mountpoint fstype". is_bind_mount() { echo "$options" | grep -w bind >/dev/null 2>&1 } list_mounts() { local inpf="" if [ -e "/proc/mounts" ] && ! is_bind_mount; then inpf=/proc/mounts elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then inpf=/etc/mtab fi if [ "$inpf" ]; then cut -d' ' -f1,2,3 < $inpf else $MOUNT | cut -d' ' -f1,3,5 fi } determine_blockdevice() { if [ $blockdevice = "yes" ]; then return fi # Get the current real device name, if possible. # (specified devname could be -L or -U...) case "$FSTYPE" in - nfs|smbfs|cifs|glusterfs|tmpfs|none) ;; + nfs|smbfs|cifs|glusterfs|ceph|tmpfs|none) ;; *) DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1` if [ -b "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Lists all filesystems potentially mounted under a given path, # excluding the path itself. list_submounts() { list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r } ocfs2_del_cache() { if [ -e "$_OCFS2_uuid_cache" ]; then rm -f $_OCFS2_uuid_cache fi } ocfs2_cleanup() { # We'll never see the post-stop notification. We're gone now, # have unmounted, and thus should remove the membership. # # (Do so regardless of whether we were unmounted already, # because the admin might have manually unmounted but not # cleared up the membership directory. Bad admin, no cookie.) # if [ ! -d "$OCFS2_FS_ROOT" ]; then ocf_log info "$OCFS2_FS_ROOT: Filesystem membership already gone." else ocf_log info "$OCFS2_FS_ROOT: Removing membership directory." rm -rf $OCFS2_FS_ROOT/ fi ocfs2_del_cache } ocfs2_fetch_uuid() { mounted.ocfs2 -d $DEVICE|tail -1|awk '{print $3}'|tr -d -- -|tr '[a-z]' '[A-Z]' } ocfs2_set_uuid() { _OCFS2_uuid_cache="$HA_RSCTMP/Filesystem.ocfs2_uuid.$(echo $DEVICE|tr / .)" if [ "$OP" != "start" -a -e "$_OCFS2_uuid_cache" ]; then # Trust the cache. OCFS2_UUID=$(cat $_OCFS2_uuid_cache 2>/dev/null) return 0 fi OCFS2_UUID=$(ocfs2_fetch_uuid) if [ -n "$OCFS2_UUID" -a "$OCFS2_UUID" != "UUID" ]; then # UUID valid: echo $OCFS2_UUID > $_OCFS2_uuid_cache return 0 fi # Ok, no UUID still, but that's alright for stop, because it # very likely means we never got started - if [ "$OP" = "stop" ]; then ocf_log warn "$DEVICE: No UUID; assuming never started!" OCFS2_UUID="UUID_NOT_SET" return 0 fi # Everything else - wrong: ocf_log err "$DEVICE: Could not determine ocfs2 UUID for device." exit $OCF_ERR_GENERIC } ocfs2_init() { # Check & initialize the OCFS2 specific variables. # This check detects whether the special/legacy hooks to # integrate OCFS2 with user-space clustering on SLES10 need to # be activated. # Newer kernels >= 2.6.28, with OCFS2+openAIS+Pacemaker, do # not need this: OCFS2_SLES10="" if [ "X$HA_cluster_type" = "Xcman" ]; then return elif [ "X$HA_cluster_type" != "Xopenais" ]; then if grep -q "SUSE Linux Enterprise Server 10" /etc/SuSE-release >/dev/null 2>&1 ; then OCFS2_SLES10="yes" ocf_log info "$DEVICE: Enabling SLES10 compatibility mode for OCFS2." else ocf_log err "$DEVICE: ocfs2 is not compatible with your environment." exit $OCF_ERR_CONFIGURED fi else return fi if [ $OP != "stop" ]; then if [ -z "$OCF_RESKEY_CRM_meta_clone" ]; then ocf_log err "ocfs2 must be run as a clone." exit $OCF_ERR_GENERIC fi fi if [ $blockdevice = "no" ]; then ocf_log err "$DEVICE: ocfs2 needs a block device instead." exit $OCF_ERR_GENERIC fi for f in "$OCF_RESKEY_ocfs2_configfs" /sys/kernel/config/cluster /configfs/cluster ; do if [ -n "$f" -a -d "$f" ]; then OCFS2_CONFIGFS="$f" break fi done if [ ! -d "$OCFS2_CONFIGFS" ]; then ocf_log err "ocfs2 needs configfs mounted." exit $OCF_ERR_GENERIC fi ocfs2_set_uuid if [ -n "$OCF_RESKEY_ocfs2_cluster" ]; then OCFS2_CLUSTER=$(echo $OCF_RESKEY_ocfs2_cluster) else OCFS2_CLUSTER=$(find "$OCFS2_CONFIGFS" -maxdepth 1 -mindepth 1 -type d -printf %f 2>/dev/null) set -- $OCFS2_CLUSTER local n; n="$#" if [ $n -gt 1 ]; then ocf_log err "$OCFS2_CLUSTER: several clusters found." exit $OCF_ERR_GENERIC fi if [ $n -eq 0 ]; then ocf_log err "$OCFS2_CONFIGFS: no clusters found." exit $OCF_ERR_GENERIC fi fi OCFS2_CLUSTER_ROOT="$OCFS2_CONFIGFS/$OCFS2_CLUSTER" if [ ! -d "$OCFS2_CLUSTER_ROOT" ]; then ocf_log err "$OCFS2_CLUSTER: Cluster doesn't exist. Maybe o2cb hasn't been run?" exit $OCF_ERR_GENERIC fi OCFS2_FS_ROOT=$OCFS2_CLUSTER_ROOT/heartbeat/$OCFS2_UUID } # kernels < 2.6.26 can't handle bind remounts bind_kernel_check() { echo "$options" | grep -w ro >/dev/null 2>&1 || return uname -r | awk -F. ' $1==2 && $2==6 { sub("[^0-9].*","",$3); if ($3<26) exit(1); }' [ $? -ne 0 ] && ocf_log warn "kernel `uname -r` cannot handle read only bind mounts" } bind_mount() { if is_bind_mount && [ "$options" != "-o bind" ] then bind_kernel_check bind_opts=`echo $options | sed 's/bind/remount/'` $MOUNT $bind_opts $MOUNTPOINT else true # make sure to return OK fi } is_option() { echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1 } is_fsck_needed() { case $OCF_RESKEY_run_fsck in force) true;; no) false;; ""|auto) case $FSTYPE in - ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|tmpfs) + ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs) false;; *) true;; esac;; *) ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'" OCF_RESKEY_run_fsck="auto" is_fsck_needed;; esac } # # START: Start up the filesystem # Filesystem_start() { if [ -n "$OCFS2_SLES10" ]; then # "start" now has the notification data available; that # we're being started means we didn't get the # pre-notification, because we weren't running, so # process the information now first. ocf_log info "$OCFS2_UUID: Faking pre-notification on start." OCF_RESKEY_CRM_meta_notify_type="pre" OCF_RESKEY_CRM_meta_notify_operation="start" Filesystem_notify fi # See if the device is already mounted. if Filesystem_status >/dev/null 2>&1 ; then ocf_log info "Filesystem $MOUNTPOINT is already mounted." return $OCF_SUCCESS fi if [ "X${HOSTOS}" != "XOpenBSD" ];then - # Insert SCSI module - # TODO: This probably should go away. Why should the filesystem - # RA magically load a kernel module? - $MODPROBE scsi_hostadapter >/dev/null - if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then : No FSTYPE specified, rely on the system has the right file-system support already else local support="$FSTYPE" # support fuse-filesystems (e.g. GlusterFS) case $FSTYPE in glusterfs) support="fuse";; esac grep -w "$support"'$' /proc/filesystems >/dev/null || $MODPROBE $support >/dev/null grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -ne 0 ] ; then ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems" return $OCF_ERR_INSTALLED fi fi fi # Check the filesystem & auto repair. # NOTE: Some filesystem types don't need this step... Please modify # accordingly if [ $blockdevice = "yes" ]; then if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then ocf_log err "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" exit $OCF_ERR_INSTALLED fi if is_fsck_needed; then ocf_log info "Starting filesystem check on $DEVICE" if [ -z "$FSTYPE" ]; then $FSCK -p $DEVICE else $FSCK -t $FSTYPE -p $DEVICE fi # NOTE: if any errors at all are detected, it returns non-zero # if the error is >= 4 then there is a big problem if [ $? -ge 4 ]; then ocf_log err "Couldn't sucessfully fsck filesystem for $DEVICE" return $OCF_ERR_GENERIC fi fi fi if [ ! -d "$MOUNTPOINT" ] ; then ocf_log err "Couldn't find directory [$MOUNTPOINT] to use as a mount point" exit $OCF_ERR_INSTALLED fi flushbufs $DEVICE # Mount the filesystem. case "$FSTYPE" in none) $MOUNT $options $DEVICE $MOUNTPOINT && bind_mount ;; "") $MOUNT $options $DEVICE $MOUNTPOINT ;; *) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;; esac if [ $? -ne 0 ]; then ocf_log err "Couldn't mount filesystem $DEVICE on $MOUNTPOINT" if [ -n "$OCFS2_SLES10" ]; then ocfs2_cleanup fi return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # end of Filesystem_start Filesystem_notify() { # Process notifications; this is the essential glue level for # giving user-space membership events to a cluster-aware # filesystem. Right now, only OCFS2 is supported. # # When we get a pre-start notification, we set up all the nodes # which will be active in our membership for the filesystem. # (For the resource to be started, this happens at the time of # the actual 'start' operation.) # # At a post-start, actually there's nothing to do for us really, # but no harm done in re-syncing either. # # pre-stop is meaningless; we can't remove any node yet, it # first needs to unmount. # # post-stop: the node is removed from the membership of the # other nodes. # # Note that this expects that the base cluster is already # active; ie o2cb has been started and populated # $OCFS2_CLUSTER_ROOT/node/ already. This can be achieved by # simply having o2cb run on all nodes by the CRM too. This # probably ought to be mentioned somewhere in the to be written # documentation. ;-) # if [ -z "$OCFS2_SLES10" ]; then # One of the cases which shouldn't occur; it should have # been caught much earlier. Still, you know ... ocf_log err "$DEVICE: Please only enable notifications for SLES10 OCFS2 mounts." # Yes, in theory this is a configuration error, but # simply discarding them allows users to switch from the # SLES10 stack to the new one w/o downtime. # Ignoring the notifications is harmless, afterall, and # they can simply disable them in their own time. return $OCF_SUCCESS fi local n_type; n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op; n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active; n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop; n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start; n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" ocf_log info "$OCFS2_UUID: notify: $n_type for $n_op" ocf_log info "$OCFS2_UUID: notify active: $n_active" ocf_log info "$OCFS2_UUID: notify stop: $n_stop" ocf_log info "$OCFS2_UUID: notify start: $n_start" case "$n_type" in pre) case "$n_op" in stop) ocf_log info "$OCFS2_UUID: ignoring pre-notify for stop." return $OCF_SUCCESS ;; start) # These are about to become active; prepare to # communicate with them. # Duplicate removal - start can contain nodes # already on the active list, confusing the # script later on: for UNAME in $n_active; do n_start=`echo ${n_start} | sed s/$UNAME//` done # Merge pruned lists again: n_active="$n_active $n_start" ;; esac ;; post) case "$n_op" in stop) # remove unames from notify_stop_uname; these have been # stopped and can no longer be considered active. for UNAME in $n_stop; do n_active=`echo ${n_active} | sed s/$UNAME//` done ;; start) if [ "$n_op" = "start" ]; then ocf_log info "$OCFS2_UUID: ignoring post-notify for start." return $OCF_SUCCESS fi ;; esac ;; esac ocf_log info "$OCFS2_UUID: post-processed active: $n_active" local n_myself; n_myself=${HA_CURHOST:-$(uname -n | tr '[A-Z]' '[a-z]')} ocf_log info "$OCFS2_UUID: I am node $n_myself." case " $n_active " in *" $n_myself "*) ;; *) ocf_log err "$OCFS2_UUID: $n_myself (local) not on active list!" return $OCF_ERR_GENERIC ;; esac if [ -d "$OCFS2_FS_ROOT" ]; then entry_prefix=$OCFS2_FS_ROOT/ for entry in $OCFS2_FS_ROOT/* ; do n_fs="${entry##$entry_prefix}" # ocf_log info "$OCFS2_UUID: Found current node $n_fs" case " $n_active " in *" $n_fs "*) # Construct a list of nodes which are present # already in the membership. n_exists="$n_exists $n_fs" ocf_log info "$OCFS2_UUID: Keeping node: $n_fs" ;; *) # Node is in the membership currently, but not on our # active list. Must be removed. if [ "$n_op" = "start" ]; then ocf_log warn "$OCFS2_UUID: Removing nodes on start" fi ocf_log info "$OCFS2_UUID: Removing dead node: $n_fs" if ! rm -f $entry ; then ocf_log err "$OCFS2_UUID: Removal of $n_fs failed!" fi ;; esac done else ocf_log info "$OCFS2_UUID: heartbeat directory doesn't exist yet, creating." mkdir -p $OCFS2_FS_ROOT fi ocf_log info "$OCFS2_UUID: Existing node list: $n_exists" # (2) for entry in $n_active ; do # ocf_log info "$OCFS2_UUID: Expected active node: $entry" case " $n_exists " in *" $entry "*) ocf_log info "$OCFS2_UUID: Already active: $entry" ;; *) if [ "$n_op" = "stop" ]; then ocf_log warn "$OCFS2_UUID: Adding nodes on stop" fi ocf_log info "$OCFS2_UUID: Activating node: $entry" if ! ln -s $OCFS2_CLUSTER_ROOT/node/$entry $OCFS2_FS_ROOT/$entry ; then ocf_log err "$OCFS2_CLUSTER_ROOT/node/$entry: failed to link" fi ;; esac done } signal_processes() { local dir=$1 local sig=$2 # fuser returns a non-zero return code if none of the # specified files is accessed or in case of a fatal # error. if [ "X${HOSTOS}" = "XOpenBSD" ];then PIDS=`fstat | grep $dir | awk '{print $3}'` for PID in ${PIDS};do kill -s $sig ${PID} ocf_log info "Sent signal $sig to ${PID}" done else if $FUSER -$sig -m -k $dir ; then ocf_log info "Some processes on $dir were signalled" else ocf_log info "No processes on $dir were signalled" fi fi } try_umount() { local SUB=$1 $UMOUNT $umount_force $SUB list_mounts | grep -q " $SUB " >/dev/null 2>&1 || { ocf_log info "unmounted $SUB successfully" return $OCF_SUCCESS } return $OCF_ERR_GENERIC } fs_stop() { local SUB=$1 timeout=$2 sig cnt for sig in TERM KILL; do cnt=$((timeout/2)) # try half time with TERM while [ $cnt -gt 0 ]; do try_umount $SUB && return $OCF_SUCCESS ocf_log err "Couldn't unmount $SUB; trying cleanup with $sig" signal_processes $SUB $sig cnt=$((cnt-1)) sleep 1 done done return $OCF_ERR_GENERIC } # # STOP: Unmount the filesystem # Filesystem_stop() { # See if the device is currently mounted Filesystem_status >/dev/null 2>&1 if [ $? -eq $OCF_NOT_RUNNING ]; then # Already unmounted, wonderful. rc=$OCF_SUCCESS else # Wipe the status file, but continue with a warning if # removal fails -- the file system might be read only if [ -f "$STATUSFILE" ]; then rm -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_log warn "Failed to remove status file ${STATUSFILE}." fi fi # Determine the real blockdevice this is mounted on (if # possible) prior to unmounting. determine_blockdevice # For networked filesystems, there's merit in trying -f: case "$FSTYPE" in nfs|cifs|smbfs) umount_force="-f" ;; esac # Umount all sub-filesystems mounted under $MOUNTPOINT/ too. local timeout for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do ocf_log info "Trying to unmount $SUB" if ocf_is_true "$FAST_STOP"; then timeout=6 else timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"} timeout=$((timeout/1000)) fi fs_stop $SUB $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_log err "Couldn't unmount $SUB, giving up!" fi done fi flushbufs $DEVICE # Yes I know the next blob is ugly, sorry. if [ $rc -eq $OCF_SUCCESS ]; then if [ "$FSTYPE" = "ocfs2" ]; then ocfs2_init if [ -n "$OCFS2_SLES10" ]; then ocfs2_cleanup fi fi fi return $rc } # end of Filesystem_stop # # STATUS: is the filesystem mounted or not? # Filesystem_status() { if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then rc=$OCF_SUCCESS msg="$MOUNTPOINT is mounted (running)" else rc=$OCF_NOT_RUNNING msg="$MOUNTPOINT is unmounted (stopped)" fi # TODO: For ocfs2, or other cluster filesystems, should we be # checking connectivity to other nodes here, or the IO path to # the storage? # Special case "monitor" to check whether the UUID cached and # on-disk still match? case "$OP" in status) ocf_log info "$msg";; esac return $rc } # end of Filesystem_status # Note: the read/write tests below will stall in case the # underlying block device (or in the case of a NAS mount, the # NAS server) has gone away. In that case, if I/O does not # return to normal in time, the operation hits its timeout # and it is up to the CRM to initiate appropriate recovery # actions (such as fencing the node). # # MONITOR 10: read the device # Filesystem_monitor_10() { if [ "$blockdevice" = "no" ] ; then ocf_log warn "$DEVICE is not a block device, monitor 10 is noop" return $OCF_SUCCESS fi - dd_opts="iflag=direct bs=512 count=16" + dd_opts="iflag=direct bs=4k count=1" err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null` if [ $? -ne 0 ]; then ocf_log err "Failed to read device $DEVICE" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # MONITOR 20: write and read a status file # Filesystem_monitor_20() { if [ "$blockdevice" = "no" ] ; then # O_DIRECT not supported on cifs/smbfs - dd_opts="oflag=sync bs=512 conv=fsync,sync" + dd_opts="oflag=sync bs=4k conv=fsync,sync" else # Writing to the device in O_DIRECT mode is imperative # to bypass caches. - dd_opts="oflag=direct,sync bs=512 conv=fsync,sync" + dd_opts="oflag=direct,sync bs=4k conv=fsync,sync" fi status_dir=`dirname $STATUSFILE` [ -d "$status_dir" ] || mkdir -p "$status_dir" err_output=` echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1` if [ $? -ne 0 ]; then ocf_log err "Failed to write status file ${STATUSFILE}" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi test -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_log err "Cannot stat the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi cat ${STATUSFILE} > /dev/null if [ $? -ne 0 ]; then ocf_log err "Cannot read the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } Filesystem_monitor() { Filesystem_status rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then case "$OCF_CHECK_LEVEL" in 10) Filesystem_monitor_10; rc=$?;; 20) Filesystem_monitor_20; rc=$?;; *) ocf_log err "unsupported monitor level $OCF_CHECK_LEVEL" rc=$OCF_ERR_CONFIGURED ;; esac fi return $rc } # end of Filesystem_monitor # # VALIDATE_ALL: Are the instance parameters valid? # FIXME!! The only part that's useful is the return code. # This code always returns $OCF_SUCCESS (!) # Filesystem_validate_all() { if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then ocf_log warn "Mountpoint $MOUNTPOINT does not exist" fi # Check if the $FSTYPE is workable # NOTE: Without inserting the $FSTYPE module, this step may be imprecise # TODO: This is Linux specific crap. if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then cut -f2 /proc/filesystems |grep -q ^$FSTYPE$ if [ $? -ne 0 ]; then modpath=/lib/modules/`uname -r` moddep=$modpath/modules.dep # Do we have $FSTYPE in modules.dep? cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$" if [ $? -ne 0 ]; then ocf_log info "It seems we do not have $FSTYPE support" fi fi fi # If we are supposed to do monitoring with status files, then # we need a utility to write in O_DIRECT mode. if [ $OCF_CHECK_LEVEL -gt 0 ]; then check_binary dd # Note: really old coreutils version do not support # the "oflag" option for dd. We don't check for that # here. In case dd does not support oflag, monitor is # bound to fail, with dd spewing an error message to # the logs. On such systems, we must do without status # file monitoring. fi #TODO: How to check the $options ? return $OCF_SUCCESS } +# +# set the blockdevice variable to "no" or "yes" +# +set_blockdevice_var() { + blockdevice=no + + # these are definitely not block devices + case $FSTYPE in + nfs|smbfs|cifs|none|glusterfs) return;; + esac + + case $DEVICE in + -*) # Oh... An option to mount instead... Typically -U or -L + ;; + /dev/null) # Special case for BSC + blockdevice=yes + ;; + *) if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then + ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" + fi + if [ ! -d "$DEVICE" ]; then + blockdevice=yes + fi + ;; + esac +} + # Check the arguments passed to this script if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi # Check the OCF_RESKEY_ environment variables... DEVICE=$OCF_RESKEY_device FSTYPE=$OCF_RESKEY_fstype if [ ! -z "$OCF_RESKEY_options" ]; then options="-o $OCF_RESKEY_options" fi FAST_STOP=${OCF_RESKEY_fast_stop:="yes"} OP=$1 # These operations do not require instance parameters case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac -blockdevice=no -case $DEVICE in -"") ocf_log err "Please set OCF_RESKEY_device to the device to be managed" +if [ x = x"$DEVICE" ]; then + ocf_log err "Please set OCF_RESKEY_device to the device to be managed" exit $OCF_ERR_CONFIGURED - ;; --*) # Oh... An option to mount instead... Typically -U or -L - ;; -[!/]*:/*) # An NFS filesystem specification... - ;; -//[!/]*/*) # An SMB filesystem specification... - ;; -*:*) # An GlusterFS filesystem specification... - ;; -none) - ;; -/dev/null) # Special case for BSC - blockdevice=yes - ;; -*) if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then - ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" - fi - if [ ! -d "$DEVICE" ];then - blockdevice=yes - fi - ;; -esac +fi +set_blockdevice_var # Normalize instance parameters: # It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". # But the output of `mount` and /proc/mounts do not. if [ -z "$OCF_RESKEY_directory" ]; then if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then ocf_log err "Please specify the directory" exit $OCF_ERR_CONFIGURED fi else MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//') : ${MOUNTPOINT:=/} # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/" # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll # kill the whole system. Is that a good idea? fi # Check to make sure the utilites are found if [ "X${HOSTOS}" != "XOpenBSD" ];then check_binary $MODPROBE check_binary $FUSER fi check_binary $FSCK check_binary $MOUNT check_binary $UMOUNT if [ "$OP" != "monitor" ]; then ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" fi # These operations do not require the clone checking + OCFS2 # initialization. case $OP in status) Filesystem_status exit $? ;; monitor) Filesystem_monitor exit $? ;; validate-all) Filesystem_validate_all exit $? ;; stop) Filesystem_stop exit $? ;; esac CLUSTERSAFE=0 is_option "ro" && CLUSTERSAFE=2 case $FSTYPE in ocfs2) ocfs2_init CLUSTERSAFE=1 ;; -nfs|smbfs|cifs|none|gfs2|glusterfs) CLUSTERSAFE=1 # this is kind of safe too +nfs|smbfs|cifs|none|gfs2|glusterfs|ceph) CLUSTERSAFE=1 # this is kind of safe too ;; # add here CLUSTERSAFE=0 for all filesystems which are not # cluster aware and which, even if when mounted read-only, # could still modify parts of it such as journal/metadata ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs) - CLUSTERSAFE=0 # these are not allowed + if ocf_is_true "$OCF_RESKEY_force_clones"; then + CLUSTERSAFE=2 + else + CLUSTERSAFE=0 # these are not allowed + fi ;; esac if [ -n "$OCF_RESKEY_CRM_meta_clone" ]; then case $CLUSTERSAFE in 0) ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" ocf_log err "DO NOT RUN IT AS A CLONE!" ocf_log err "Politely refusing to proceed to avoid data corruption." exit $OCF_ERR_CONFIGURED ;; 2) ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!" - ocf_log warn "But we'll let it run because it is mounted read-only." - ocf_log warn "Please make sure that it's meta data is read-only too!" + if ocf_is_true "$OCF_RESKEY_force_clones"; then + ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so." + else + ocf_log warn "But we'll let it run because it is mounted read-only." + ocf_log warn "Please make sure that it's meta data is read-only too!" + fi ;; esac fi case $OP in start) Filesystem_start ;; notify) Filesystem_notify ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/IPaddr b/heartbeat/IPaddr index 426a5a176..8ada6c4d2 100755 --- a/heartbeat/IPaddr +++ b/heartbeat/IPaddr @@ -1,892 +1,892 @@ #!/bin/sh # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # This script manages IP alias IP addresses # # It can add an IP alias, or remove one. # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds an IP alias. # # Surprisingly, the "stop" arg removes one. :-) # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_lvs_support ( e.g. true, on, 1 ) # OCF_RESKEY_ARP_INTERVAL_MS # OCF_RESKEY_ARP_REPEAT # OCF_RESKEY_ARP_BACKGROUND (e.g. yes ) # OCF_RESKEY_ARP_NETMASK # OCF_RESKEY_local_start_script # OCF_RESKEY_local_stop_script # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs SENDARP=$HA_BIN/send_arp FINDIF=$HA_BIN/findif VLDIR=$HA_RSCTMP SENDARPPIDDIR=$HA_RSCTMP SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ####################################################################### SYSTYPE="`uname -s`" case "$SYSTYPE" in SunOS) # `uname -r` = 5.9 -> SYSVERSION = 9 SYSVERSION="`uname -r | cut -d. -f 2`" ;; Darwin) # Treat Darwin the same as the other BSD variants (matched as *BSD) SYSTYPE="${SYSTYPE}BSD" ;; *) ;; esac meta_data() { cat < 1.0 This script manages IP alias IP addresses It can add an IP alias, or remove one. Manages virtual IPv4 addresses (portable version) The IPv4 address to be configured in dotted quad notation, for example "192.168.1.1". IPv4 address The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. Prerequisite: There must be at least one static IP address, which is not managed by the cluster, assigned to the network interface. If you can not assign any static IP address on the interface, modify this kernel parameter: sysctl -w net.ipv4.conf.all.promote_secondaries=1 (or per device) Network interface The netmask for the interface in CIDR format. (ie, 24), or in dotted quad notation 255.255.255.0). If unspecified, the script will also try to determine this from the routing table. Netmask Broadcast address associated with the IP. If left empty, the script will determine this from the netmask. Broadcast address You can specify an additional label for your IP address here. Interface label Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Enable support for LVS DR Script called when the IP is released Script called when the IP is released Script called when the IP is added Script called when the IP is added milliseconds between ARPs milliseconds between gratuitous ARPs How many gratuitous ARPs to send out when bringing up a new address repeat count run in background (no longer any reason to do this) run in background netmask for ARP - in nonstandard hexadecimal format. netmask for ARP END exit $OCF_SUCCESS } # The 'ping' command takes highly OS-dependent arguments, so this # function creates a suitable argument list for the host OS's 'ping'. # We use a subset of its functionality: # 1. single packet # 2. reasonable timeout (say 1 second) # # arguments: # $1: IP address to ping # result string: # arguments for ping command # # If more flexibility is needed, they could be specified in the environment # to this function, to adjust the resulting 'ping' arguments. # David Lee May 2007 pingargs() { _baseip=$1 _timeout=1 # seconds _pktcount=1 _systype="`uname -s`" case $_systype in Linux) # Default is perpetual ping: need "-c $_pktcount". # -c count -t timetolive -q(uiet) -n(umeric) -W timeout _pingargs="-c $_pktcount -q -n $_baseip" ;; SunOS) # Default is immediate (or timeout) return. _pingargs="$_baseip $_timeout" ;; *) _pingargs="-c $_pktcount $_baseip" ;; esac echo "$_pingargs" } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # lvs_remove_conflicting_loopback() { ipaddr="$1" ifname="$2" ocf_log info "Removing conflicting loopback $ifname." if echo $ifname > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi if [ ! -z "${OCF_RESKEY_local_stop_script}" ]; then if [ -x "${OCF_RESKEY_local_stop_script}" ]; then ${OCF_RESKEY_local_stop_script} $* fi fi delete_interface "$ifname" "$ipaddr" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by lvs_remove_conflicting_loopback() # lvs_restore_loopback() { ipaddr="$1" if [ ! -s "$VLDIR/$ipaddr" ]; then return fi ifname=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address $ipaddr on $ifname." CMD="OCF_RESKEY_cidr_netmask=32 OCF_RESKEY_ip=$1 OCF_RESKEY_nic=$ifname $FINDIF" if NICINFO=`eval $CMD` NICINFO=`echo $NICINFO | tr " " " " | tr -s " "` then netmask_text=`echo "$NICINFO" | cut -f3 -d " "` broadcast=`echo "$NICINFO" | cut -f5 -d " "` else echo "ERROR: $CMD failed (rc=$rc)" exit $OCF_ERR_GENERIC fi add_interface "$ipaddr" "$ifname" "$ifname" $netmask_text $broadcast rm -f "$VLDIR/$ipaddr" } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_solaris() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | while read ifname linkstuff do : ifname = $ifname read inet addr junk : inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in *:*) ;; *) continue;; esac # This doesn't look right for a box with multiple NICs. # It looks like it always selects the first interface on # a machine. Yet, we appear to use the results for this case too... ifname=`echo "$ifname" | sed s'%:$%%'` case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } find_interface_bsd() { $IFCONFIG $IFCONFIG_A_OPT | awk -v ip_addr="$ipaddr" ' /UP,/ && $0 ~ /^[a-z]+[0-9]:/ { if_name=$1; sub(":$","",if_name); } $1 == "inet" && $2 == ip_addr { print if_name exit(0) }' } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_generic() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | while read ifname linkstuff do : Read gave us ifname = $ifname read inet addr junk : Read gave us inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in - *:*) ;; + *:*) ifname=`echo $ifname | sed 's/:$//'`;; *) continue;; esac : "comparing $ipaddr to $addr (from ifconfig)" case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface() { ipaddr="$1" case "$SYSTYPE" in SunOS) NIC=`find_interface_solaris $ipaddr`;; *BSD) NIC=`find_interface_bsd $ipaddr`;; *) NIC=`find_interface_generic $ipaddr`;; esac echo $NIC return $OCF_SUCCESS; } # # Find an unused interface/alias name for us to use for new IP alias # The argument is an IP address, and the output # is an aliased interface name (e.g., "eth0:0", "dc0", "le0:0"). # find_free_interface() { NIC="$1" if [ "X$NIC" = "X" ]; then ocf_log err "No free interface found for $OCF_RESKEY_ip" return $OCF_ERR_GENERIC; fi NICBASE="$VLDIR/IPaddr-$NIC" touch "$NICBASE" case "$SYSTYPE" in *BSD) echo $NIC; return $OCF_SUCCESS;; SunOS) j=1 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's%: .*%%'`;; *) j=0 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's% .*%%'` TRYADRCNT=`ls "${NICBASE}:"* 2>/dev/null | wc -w | tr -d ' '` if [ -f "${NICBASE}:${TRYADRCNT}" ]; then : OK else j="${TRYADRCNT}" fi ;; esac IFLIST=" `echo $IFLIST` " while [ $j -lt 512 ] do case $IFLIST in *" "$NIC:$j" "*) ;; *) NICLINK="$NICBASE:$j" if ln "$NICBASE" "$NICLINK" 2>/dev/null then echo "$NIC:$j" return $OCF_SUCCESS fi ;; esac j=`expr $j + 1` done return $OCF_ERR_GENERIC } delete_route () { ipaddr="$1" case "$SYSTYPE" in SunOS) return 0;; *BSD) CMD="$ROUTE -n delete -host $ipaddr";; *) CMD="$ROUTE -n del -host $ipaddr";; esac $CMD return $? } delete_interface () { ifname="$1" ipaddr="$2" case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then CMD="$IFCONFIG $ifname unplumb" else CMD="$IFCONFIG $ifname 0 down" fi;; Darwin*) CMD="$IFCONFIG $ifname $ipaddr delete";; *BSD) CMD="$IFCONFIG $ifname inet $ipaddr delete";; *) CMD="$IFCONFIG $ifname down";; esac ocf_log info "$CMD" $CMD return $? } add_interface () { ipaddr="$1" iface_base="$2" iface="$3" netmask="$4" broadcast="$5" if [ $# != 5 ]; then ocf_log err "Insufficient arguments to add_interface: $*" exit $OCF_ERR_ARGS fi case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then $IFCONFIG $iface plumb rc=$? if [ $rc -ne 0 ] ; then echo "ERROR: '$IFCONFIG $iface plumb' failed." return $rc fi fi # At Solaris 10, this single-command version sometimes broke. # Almost certainly an S10 bug. # CMD="$IFCONFIG $iface inet $ipaddr $text up" # So hack the following workaround: CMD="$IFCONFIG $iface inet $ipaddr" CMD="$CMD && $IFCONFIG $iface netmask $netmask" CMD="$CMD && $IFCONFIG $iface up" ;; *BSD) # netmask is always set to 255.255.255.255 for an alias CMD="$IFCONFIG $iface inet $ipaddr netmask 255.255.255.255 alias";; *) CMD="$IFCONFIG $iface $ipaddr netmask $netmask broadcast $broadcast";; esac # Use "eval $CMD" (not "$CMD"): it might be a chain of two or more commands. ocf_log info "eval $CMD" eval $CMD rc=$? if [ $rc != 0 ]; then echo "ERROR: eval $CMD failed (rc=$rc)" fi return $rc } # # Remove the IP alias for the requested IP address... # ip_stop() { SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" NIC=`find_interface $OCF_RESKEY_ip` if [ -f "$SENDARPPIDFILE" ]; then cat "$SENDARPPIDFILE" | xargs kill rm -f "$SENDARPPIDFILE" fi if [ -z "$NIC" ]; then : Requested interface not in use return $OCF_SUCCESS fi if [ ${OCF_RESKEY_lvs_support} = 1 ]; then case $NIC in lo*) : Requested interface is on loopback return $OCF_SUCCESS;; esac fi delete_route "$OCF_RESKEY_ip" delete_interface "$NIC" "$OCF_RESKEY_ip" rc=$? if [ ${OCF_RESKEY_lvs_support} = 1 ]; then lvs_restore_loopback "$OCF_RESKEY_ip" fi # remove lock file... rm -f "$VLDIR/IPaddr-$NIC" if [ $rc != 0 ]; then ocf_log warn "IP Address $OCF_RESKEY_ip NOT released: rc=$rc" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # Add an IP alias for the requested IP address... # # It could be that we already have taken it, in which case it should # do nothing. # ip_start() { # # Do we already service this IP address? # ip_status_internal if [ $? = $OCF_SUCCESS ]; then # Nothing to do, the IP is already active return $OCF_SUCCESS; fi NIC_unique=`find_free_interface $OCF_RESKEY_nic` if [ -n "$NIC_unique" ]; then : OK got interface [$NIC_unique] for $OCF_RESKEY_ip else return $OCF_ERR_GENERIC fi # This logic is mostly to support LVS (If I understand it correctly) if [ ${OCF_RESKEY_lvs_support} = 1 ]; then NIC_current=`find_interface $OCF_RESKEY_ip` case $NIC_unique in lo*) if [ x"$NIC_unique" = x"$NIC_current" ]; then # Its already "running" and not moving, nothing to do. ocf_log err "Could not find a non-loopback device to move $OCF_RESKEY_ip to" return $OCF_ERR_GENERIC fi;; *) lvs_remove_conflicting_loopback "$OCF_RESKEY_ip" "$NIC_current";; esac fi if [ ! -z "${OCF_RESKEY_local_start_script}" ]; then if [ -x "${OCF_RESKEY_local_start_script}" ]; then ${OCF_RESKEY_local_start_script} $* fi fi add_interface "$OCF_RESKEY_ip" "$OCF_RESKEY_nic" "$NIC_unique" \ "$OCF_RESKEY_cidr_netmask" "$OCF_RESKEY_broadcast" rc=$? if [ $rc != 0 ]; then ocf_log err "Could not add $OCF_RESKEY_ip to $OCF_RESKEY_nic: rc=$rc" return $rc fi # The address is active, now notify others about it using sendarp if [ "$SYSTYPE" = "DarwinBSD" -a "$NIC_unique" = "lo0" ]; then # Darwin can't send ARPs on loopback devices SENDARP="x$SENDARP" # Prevent the binary from being found fi if [ -x $SENDARP ]; then TARGET_INTERFACE=`echo $NIC_unique | sed 's%:.*%%'` SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" ARGS="-i $OCF_RESKEY_ARP_INTERVAL_MS -r $OCF_RESKEY_ARP_REPEAT" ARGS="$ARGS -p $SENDARPPIDFILE $TARGET_INTERFACE $OCF_RESKEY_ip" ARGS="$ARGS auto $OCF_RESKEY_ip $OCF_RESKEY_ARP_NETMASK" ocf_log debug "Sending Gratuitous Arp for $OCF_RESKEY_ip on $NIC_unique [$TARGET_INTERFACE]" case $OCF_RESKEY_ARP_BACKGROUND in yes) ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?" & ) >&2 ;; *) $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?";; esac fi ip_status_internal return $? } ip_status_internal() { NIC=`find_interface "$OCF_RESKEY_ip"` if [ "x$NIC" = x ]; then return $OCF_NOT_RUNNING elif [ "${OCF_RESKEY_lvs_support}" = "1" ]; then case $NIC in lo*) return $OCF_NOT_RUNNING;; *) return $OCF_SUCCESS;; esac else if [ x$OCF_RESKEY_nic != x ]; then simple_OCF_NIC=`echo $OCF_RESKEY_nic | awk -F: '{print $1}'` simple_NIC=`echo $NIC | awk -F: '{print $1}'` if [ $simple_OCF_NIC != $simple_NIC ]; then ocf_log err "$OCF_RESKEY_ip is running an interface ($simple_NIC) instead of the configured one ($simple_OCF_NIC)" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS fi } ip_status() { ip_status_internal rc=$? if [ $rc = $OCF_SUCCESS ]; then echo "running" elif [ $rc = $OCF_NOT_RUNNING ]; then echo "stopped" else echo "unknown" fi return $rc; } # # Determine if this IP address is really being served, or not. # Note that we must distinguish if *we're* serving it locally... # ip_monitor() { ip_status_internal rc=$? if [ $OCF_CHECK_LEVEL = 0 -o $rc != 0 ]; then return $rc fi ocf_log info "Checking IP stack" PINGARGS="`pingargs $OCF_RESKEY_ip`" for j in 1 2 3 4 5 6 7 8 9 10; do MSG=`$PING $PINGARGS 2>&1` if [ $? = 0 ]; then return $OCF_SUCCESS fi done ocf_log err "$MSG" return $OCF_ERR_GENERIC } is_positive_integer() { ocf_is_decimal $1 && [ $1 -ge 1 ] if [ $? = 0 ]; then return 1 fi return 0 } ip_validate_all() { : ${OCF_RESKEY_ARP_BACKGROUND=yes} : ${OCF_RESKEY_ARP_NETMASK=ffffffffffff} : ${OCF_RESKEY_ARP_INTERVAL_MS=500} : ${OCF_RESKEY_ARP_REPEAT=10} check_binary $AWK check_binary $IFCONFIG check_binary $ROUTE check_binary $PING if is_positive_integer $OCF_RESKEY_ARP_INTERVAL_MS then ocf_log err "Invalid parameter value: ARP_INTERVAL_MS [$OCF_RESKEY_ARP_INTERVAL_MS]" return $OCF_ERR_ARGS fi if is_positive_integer $OCF_RESKEY_ARP_REPEAT then ocf_log err "Invalid parameter value: ARP_REPEAT [$OCF_RESKEY_ARP_REPEAT]" return $OCF_ERR_ARGS fi : ${OCF_RESKEY_lvs_support=0} if [ "$SYSTYPE" = "Linux" -o "$SYSTYPE" = "SunOS" ]; then : else if [ "${OCF_RESKEY_lvs_support}" = "1" ]; then ocf_log err "$SYSTYPE does not support LVS" return $OCF_ERR_GENERIC fi fi case $OCF_RESKEY_ip in "") ocf_log err "Required parameter OCF_RESKEY_ip is missing" return $OCF_ERR_CONFIGURED;; [0-9]*.[0-9]*.[0-9]*.*[0-9]) : OK;; *) ocf_log err "Parameter OCF_RESKEY_ip [$OCF_RESKEY_ip] not an IP address" return $OCF_ERR_CONFIGURED;; esac # Unconditionally do this? case $OCF_RESKEY_nic in *:*) OCF_RESKEY_nic=`echo $OCF_RESKEY_nic | sed 's/:.*//'` ;; esac NICINFO=`$FINDIF` rc=$? if [ $rc != 0 ]; then ocf_log err "$FINDIF failed [rc=$rc]." return $OCF_ERR_GENERIC fi tmp=`echo "$NICINFO" | cut -f1` if [ "x$OCF_RESKEY_nic" = "x" ] then ocf_log info "Using calculated nic for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_nic=$tmp elif [ x$tmp != x${OCF_RESKEY_nic} ] then ocf_log err "Invalid parameter value: nic [$OCF_RESKEY_nic] Calculated nic: [$tmp]" return $OCF_ERR_ARGS fi tmp=`echo "$NICINFO" | cut -f2 | cut -d ' ' -f2` if [ "x$OCF_RESKEY_cidr_netmask" != "x$tmp" ] then ocf_log info "Using calculated netmask for ${OCF_RESKEY_ip}: $tmp" fi # Always use the calculated version becuase it might have been specified # using CIDR notation which not every system accepts OCF_RESKEY_netmask=$tmp OCF_RESKEY_cidr_netmask=$tmp; export OCF_RESKEY_cidr_netmask tmp=`echo "$NICINFO" | cut -f3 | cut -d ' ' -f2` if [ "x$OCF_RESKEY_broadcast" = "x" ] then ocf_log debug "Using calculated broadcast for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_broadcast=$tmp elif [ x$tmp != x${OCF_RESKEY_broadcast} ]; then ocf_log err "Invalid parameter value: broadcast [$OCF_RESKEY_broadcast] Calculated broadcast: [$tmp]" return $OCF_ERR_ARGS fi return $OCF_SUCCESS } usage() { echo $USAGE >&2 return $1 } if [ $# -ne 1 ]; then usage $OCF_ERR_ARGS fi : ${OCF_RESKEY_lvs_support=0} # Normalize the value of lvs_support if [ "${OCF_RESKEY_lvs_support}" = "true" \ -o "${OCF_RESKEY_lvs_support}" = "on" \ -o "${OCF_RESKEY_lvs_support}" = "yes" \ -o "${OCF_RESKEY_lvs_support}" = "1" ]; then OCF_RESKEY_lvs_support=1 else OCF_RESKEY_lvs_support=0 fi # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. So, don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi case $1 in meta-data) meta_data;; start) ip_validate_all && ip_start;; stop) ip_stop;; status) ip_status;; monitor) ip_monitor;; validate-all) ip_validate_all;; usage) usage $OCF_SUCCESS;; *) usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 index 79e19230f..a35e05838 100755 --- a/heartbeat/IPaddr2 +++ b/heartbeat/IPaddr2 @@ -1,889 +1,909 @@ #!/bin/sh # # $Id: IPaddr2.in,v 1.24 2006/08/09 13:01:54 lars Exp $ # # OCF Resource Agent compliant IPaddr2 script. # # Based on work by Tuomo Soini, ported to the OCF RA API by Lars -# Marowsky-Brée. Implements Cluster Alias IP functionality too. +# Marowsky-Brée. Implements Cluster Alias IP functionality too. # # Cluster Alias IP cleanup, fixes and testing by Michael Schwartzkopff # # # Copyright (c) 2003 Tuomo Soini -# Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brée +# Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # TODO: # - There ought to be an ocf_run_cmd function which does all logging, # timeout handling etc for us # - Make this the standard IP address agent on Linux; the other # platforms simply should ignore the additional parameters OR can use # the legacy heartbeat resource script... # - Check LVS <-> clusterip incompatibilities. # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_iflabel # OCF_RESKEY_mac # OCF_RESKEY_clusterip_hash # OCF_RESKEY_arp_interval # OCF_RESKEY_arp_count # OCF_RESKEY_arp_bg # OCF_RESKEY_arp_mac # # OCF_RESKEY_CRM_meta_clone # OCF_RESKEY_CRM_meta_clone_max ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_lvs_support_default=false OCF_RESKEY_clusterip_hash_default="sourceip-sourceport" OCF_RESKEY_unique_clone_address_default=false OCF_RESKEY_arp_interval_default=200 OCF_RESKEY_arp_count_default=5 OCF_RESKEY_arp_bg_default=true OCF_RESKEY_arp_mac_default="ffffffffffff" : ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} : ${OCF_RESKEY_clusterip_hash=${OCF_RESKEY_clusterip_hash_default}} : ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}} : ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}} : ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}} : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} : ${OCF_RESKEY_arp_mac=${OCF_RESKEY_arp_mac_default}} ####################################################################### SENDARP=$HA_BIN/send_arp FINDIF=$HA_BIN/findif VLDIR=$HA_RSCTMP SENDARPPIDDIR=$HA_RSCTMP CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip} ####################################################################### meta_data() { cat < 1.0 This Linux-specific resource manages IP alias IP addresses. It can add an IP alias, or remove one. In addition, it can implement Cluster Alias IP functionality if invoked as a clone resource. + +If used as a clone, you should explicitly set clone-node-max >= 2, +and/or clone-max < number of nodes. In case of node failure, +clone instances need to be re-allocated on surviving nodes. +Which would not be possible, if there is already an instance on those nodes, +and clone-node-max=1 (which is the default). Manages virtual IPv4 addresses (Linux specific version) The IPv4 address to be configured in dotted quad notation, for example "192.168.1.1". IPv4 address The base network interface on which the IP address will be brought online. - If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. +If you want a label, see the iflabel parameter. Prerequisite: There must be at least one static IP address, which is not managed by the cluster, assigned to the network interface. - If you can not assign any static IP address on the interface, modify this kernel parameter: -sysctl -w net.ipv4.conf.all.promote_secondaries=1 -(or per device) +sysctl -w net.ipv4.conf.all.promote_secondaries=1 # (or per device) Network interface The netmask for the interface in CIDR format (e.g., 24 and not 255.255.255.0) If unspecified, the script will also try to determine this from the routing table. CIDR netmask Broadcast address associated with the IP. If left empty, the script will determine this from the netmask. Broadcast address You can specify an additional label for your IP address here. This label is appended to your interface name. If a label is specified in nic name, this parameter has no effect. Interface label Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Enable support for LVS DR Set the interface MAC address explicitly. Currently only used in case of the Cluster IP Alias. Leave empty to chose automatically. Cluster IP MAC address Specify the hashing algorithm used for the Cluster IP functionality. Cluster IP hashing function If true, add the clone ID to the supplied value of ip to create a unique address to manage Create a unique address for cloned instances Specify the interval between unsolicited ARP packets in milliseconds. ARP packet interval in ms Number of unsolicited ARP packets to send. ARP packet count Whether or not to send the arp packets in the background. ARP from background -MAC address to send the ARP packets too. +MAC address to send the ARP packets to. You really shouldn't be touching this. ARP MAC Flush the routing table on stop. This is for applications which use the cluster IP address and which run on the same physical host that the IP address lives on. The Linux kernel may force that application to take a shortcut to the local loopback interface, instead of the interface the address is really bound to. Under those circumstances, an application may, somewhat unexpectedly, continue to use connections for some time even after the IP address is deconfigured. Set this parameter in order to immediately disable said shortcut when the IP address goes away. Flush kernel routing table on stop END exit $OCF_SUCCESS } ip_init() { local rc if [ X`uname -s` != "XLinux" ]; then ocf_log err "IPaddr2 only supported Linux." exit $OCF_ERR_INSTALLED fi if [ X"$OCF_RESKEY_ip" = "X" ]; then ocf_log err "IP address (the ip parameter) is mandatory" exit $OCF_ERR_CONFIGURED fi if case $__OCF_ACTION in start|stop) ocf_is_root;; *) true;; esac then : YAY! else ocf_log err "You must be root for $__OCF_ACTION operation." exit $OCF_ERR_PERM fi BASEIP="$OCF_RESKEY_ip" BRDCAST="$OCF_RESKEY_broadcast" NIC="$OCF_RESKEY_nic" # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. Don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi NETMASK="$OCF_RESKEY_cidr_netmask" IFLABEL="$OCF_RESKEY_iflabel" IF_MAC="$OCF_RESKEY_mac" IP_INC_GLOBAL=${OCF_RESKEY_CRM_meta_clone_max:-1} IP_INC_NO=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + 1` if ocf_is_true ${OCF_RESKEY_lvs_support} && [ $IP_INC_GLOBAL -gt 1 ]; then ocf_log err "LVS and load sharing do not go together well" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_incarnations_max_global [$IP_INC_GLOBAL], should be positive integer" exit $OCF_ERR_CONFIGURED fi # $FINDIF takes its parameters from the environment # NICINFO=`$FINDIF -C` rc=$? if [ $rc -eq 0 ] then NICINFO=`echo $NICINFO | sed -e 's/netmask\ //;s/broadcast\ //'` NIC=`echo "$NICINFO" | cut -d" " -f1` NETMASK=`echo "$NICINFO" | cut -d" " -f2` BRDCAST=`echo "$NICINFO" | cut -d" " -f3` else # findif couldn't find the interface if ocf_is_probe; then ocf_log info "[$FINDIF -C] failed" exit $OCF_NOT_RUNNING elif [ "$__OCF_ACTION" = stop ]; then ocf_log warn "[$FINDIF -C] failed" exit $OCF_SUCCESS else ocf_log err "[$FINDIF -C] failed" exit $rc fi fi SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" case $NIC in *:*) IFLABEL=$NIC NIC=`echo $NIC | sed 's/:.*//'` ;; *) if [ -n "$IFLABEL" ]; then IFLABEL=${NIC}:${IFLABEL} fi ;; esac if [ "$IP_INC_GLOBAL" -gt 1 ] && ! ocf_is_true "$OCF_RESKEY_unique_clone_address"; then IP_CIP="yes" IP_CIP_HASH="${OCF_RESKEY_clusterip_hash}" if [ -z "$IF_MAC" ]; then # Choose a MAC # 1. Concatenate some input together # 2. This doesn't need to be a cryptographically # secure hash. # 3. Drop everything after the first 6 octets (12 chars) # 4. Delimit the octets with ':' # 5. Make sure the first octet is odd, # so the result is a multicast MAC IF_MAC=`echo $OCF_RESKEY_ip $NETMASK $BRDCAST | \ md5sum | \ sed -e 's#\(............\).*#\1#' \ -e 's#..#&:#g; s#:$##' \ -e 's#^\(.\)[02468aAcCeE]#\11#'` fi IP_CIP_FILE="/proc/net/ipt_CLUSTERIP/$OCF_RESKEY_ip" fi } # -# Find out which interface serves the given IP address -# The argument is an IP address, and its output -# is an interface name (e.g., "eth0"). +# Find out which interfaces serve the given IP address and netmask. +# The arguments are an IP address and a netmask. +# Its output are interface names devided by spaces (e.g., "eth0 eth1"). # find_interface() { + local ipaddr="$1" + local netmask="$2" + # # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces # - local iface=`$IP2UTIL -o -f inet addr show | grep "\ $OCF_RESKEY_ip/$NETMASK" \ - | cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'` - echo $iface + local iface="`$IP2UTIL -o -f inet addr show \ + | grep "\ $ipaddr/$netmask" \ + | cut -d ' ' -f2 \ + | grep -v '^ipsec[0-9][0-9]*$'`" + + echo "$iface" return 0 } # # Delete an interface # delete_interface () { ipaddr="$1" iface="$2" netmask="$3" CMD="$IP2UTIL -f inet addr delete $ipaddr/$netmask dev $iface" ocf_run $CMD || return $OCF_ERR_GENERIC if ocf_is_true $OCF_RESKEY_flush_routes; then ocf_run $IP2UTIL route flush cache fi return $OCF_SUCCESS } # # Add an interface # add_interface () { local cmd msg ipaddr netmask broadcast iface label ipaddr="$1" netmask="$2" broadcast="$3" iface="$4" label="$5" cmd="$IP2UTIL -f inet addr add $ipaddr/$netmask brd $broadcast dev $iface" msg="Adding IPv4 address $ipaddr/$netmask with broadcast address $broadcast to device $iface" if [ ! -z "$label" ]; then cmd="$cmd label $label" msg="${msg} (with label $label)" fi ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC msg="Bringing device $iface up" cmd="$IP2UTIL link set $iface up" ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC return $OCF_SUCCESS } # # Delete a route # delete_route () { prefix="$1" iface="$2" CMD="$IP2UTIL route delete $prefix dev $iface" ocf_log info "$CMD" $CMD return $? } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # # TODO: This is very ugly and should be controlled by an additional # instance parameter. Or even: multi-state, with the IP only being # "active" on the master!? # remove_conflicting_loopback() { ipaddr="$1" netmask="$2" broadcast="$3" ifname="$4" ocf_log info "Removing conflicting loopback $ifname." if echo "$ipaddr $netmask $broadcast $ifname" > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi delete_interface "$ipaddr" "$ifname" "$netmask" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" "$ifname" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by remove_conflicting_loopback() # restore_loopback() { ipaddr="$1" if [ -s "$VLDIR/$ipaddr" ]; then ifinfo=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address " \ "$ifinfo." add_interface $ifinfo rm -f "$VLDIR/$ipaddr" fi } # # Run send_arp to note peers about new mac address # run_send_arp() { ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip auto not_used not_used" if [ "x$IP_CIP" = "xyes" ] ; then if [ x = "x$IF_MAC" ] ; then MY_MAC=auto else MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'` fi ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" fi ocf_log info "$SENDARP $ARGS" if ocf_is_true $OCF_RESKEY_arp_bg; then ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" &) >&2 else $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" fi } # # Run ipoibarping to note peers about new Infiniband address # run_send_ib_arp() { ARGS="-q -c $OCF_RESKEY_arp_count -U -I $NIC $OCF_RESKEY_ip" ocf_log info "ipoibarping $ARGS" if ocf_is_true $OCF_RESKEY_arp_bg; then (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" &) >&2 else ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" fi } -# Do we already serve this IP address? +# Do we already serve this IP address on the given $NIC? # # returns: # ok = served (for CIP: + hash bucket) # partial = served and no hash bucket (CIP only) # partial2 = served and no CIP iptables rule # no = nothing # ip_served() { if [ -z "$NIC" ]; then # no nic found or specified echo "no" return 0 fi - cur_nic="`find_interface $OCF_RESKEY_ip`" + cur_nic="`find_interface $OCF_RESKEY_ip $NETMASK`" if [ -z "$cur_nic" ]; then echo "no" return 0 fi if [ -z "$IP_CIP" ]; then - case $cur_nic in - lo*) if ocf_is_true ${OCF_RESKEY_lvs_support}; then - echo "no" + for i in $cur_nic; do + case $i in + lo*) + if ocf_is_true ${OCF_RESKEY_lvs_support}; then + echo "no" + return 0 + fi + ;; + $NIC) + # only mark as served when on the same interfaces as $NIC + echo "ok" return 0 - fi - ;; - esac + ;; + esac + done - echo "ok" + echo "no" return 0 fi # Special handling for the CIP: if [ ! -e $IP_CIP_FILE ]; then echo "partial2" return 0 fi if egrep -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then echo "ok" return 0 else echo "partial" return 0 fi exit $OCF_ERR_GENERIC } ####################################################################### ip_usage() { cat <$IP_CIP_FILE fi if [ "$ip_status" = "no" ]; then if ocf_is_true ${OCF_RESKEY_lvs_support}; then - case `find_interface $OCF_RESKEY_ip` in - lo*) - remove_conflicting_loopback $OCF_RESKEY_ip 32 255.255.255.255 lo - ;; - esac + for i in `find_interface $OCF_RESKEY_ip $NETMASK`; do + case $i in + lo*) + remove_conflicting_loopback $OCF_RESKEY_ip 32 255.255.255.255 lo + ;; + esac + done fi add_interface $OCF_RESKEY_ip $NETMASK $BRDCAST $NIC $IFLABEL if [ $? -ne 0 ]; then ocf_log err "$CMD failed." exit $OCF_ERR_GENERIC fi fi case $NIC in lo*) : no need to run send_arp on loopback ;; ib*) run_send_ib_arp ;; *) if [ -x $SENDARP ]; then run_send_arp fi ;; esac exit $OCF_SUCCESS } ip_stop() { local ip_del_if="yes" if [ -n "$IP_CIP" ]; then # Cluster IPs need special processing when the last bucket # is removed from the node... take a lock to make sure only one # process executes that code ocf_take_lock $CIP_lockfile ocf_release_lock_on_exit $CIP_lockfile fi if [ -f "$SENDARPPIDFILE" ] ; then kill `cat "$SENDARPPIDFILE"` if [ $? -ne 0 ]; then ocf_log warn "Could not kill previously running send_arp for $OCF_RESKEY_ip" else ocf_log info "killed previously running send_arp for $OCF_RESKEY_ip" rm -f "$SENDARPPIDFILE" fi fi local ip_status=`ip_served` ocf_log info "IP status = $ip_status, IP_CIP=$IP_CIP" if [ $ip_status = "no" ]; then : Requested interface not in use exit $OCF_SUCCESS fi if [ -n "$IP_CIP" ] && [ $ip_status != "partial2" ]; then if [ $ip_status = "partial" ]; then exit $OCF_SUCCESS fi echo "-$IP_INC_NO" >$IP_CIP_FILE if [ "x$(cat $IP_CIP_FILE)" = "x" ]; then ocf_log info $OCF_RESKEY_ip, $IP_CIP_HASH i=1 while [ $i -le $IP_INC_GLOBAL ]; do ocf_log info $i $IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ --new \ --clustermac $IF_MAC \ --total-nodes $IP_INC_GLOBAL \ --local-node $i \ --hashmode $IP_CIP_HASH i=`expr $i + 1` done else ip_del_if="no" fi fi if [ "$ip_del_if" = "yes" ]; then delete_interface $OCF_RESKEY_ip $NIC $NETMASK if [ $? -ne 0 ]; then exit $OCF_ERR_GENERIC fi if ocf_is_true ${OCF_RESKEY_lvs_support}; then restore_loopback "$OCF_RESKEY_ip" fi fi exit $OCF_SUCCESS } ip_monitor() { # TODO: Implement more elaborate monitoring like checking for # interface health maybe via a daemon like FailSafe etc... local ip_status=`ip_served` case $ip_status in ok) return $OCF_SUCCESS ;; partial|no|partial2) exit $OCF_NOT_RUNNING ;; *) # Errors on this interface? return $OCF_ERR_GENERIC ;; esac } ip_validate() { check_binary $IP2UTIL IP_CIP= ip_init case "$NIC" in ib*) check_binary ipoibarping ;; esac if [ -n "$IP_CIP" ]; then check_binary $IPTABLES check_binary $MODPROBE fi # $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init, # do not bother here. if ocf_is_true "$OCF_RESKEY_unique_clone_address" && ! ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then ocf_log err "unique_clone_address makes sense only with meta globally_unique set" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_interval" && [ $OCF_RESKEY_arp_interval -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_count" && [ $OCF_RESKEY_arp_count -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]" exit $OCF_ERR_CONFIGURED fi if [ -n "$IP_CIP" ]; then local valid=1 case $IP_CIP_HASH in sourceip|sourceip-sourceport|sourceip-sourceport-destport) ;; *) ocf_log err "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]" exit $OCF_ERR_CONFIGURED ;; esac if ocf_is_true ${OCF_RESKEY_lvs_support}; then ecf_log err "LVS and load sharing not advised to try" exit $OCF_ERR_CONFIGURED fi case $IF_MAC in [0-9a-zA-Z][13579bBdDfF][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]) ;; *) valid=0 ;; esac if [ $valid -eq 0 ]; then ocf_log err "Invalid IF_MAC [$IF_MAC]" exit $OCF_ERR_CONFIGURED fi fi } if ocf_is_true "$OCF_RESKEY_unique_clone_address"; then prefix=`echo $OCF_RESKEY_ip | awk -F. '{print $1"."$2"."$3}'` suffix=`echo $OCF_RESKEY_ip | awk -F. '{print $4}'` suffix=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + $suffix` OCF_RESKEY_ip="$prefix.$suffix" fi case $__OCF_ACTION in meta-data) meta_data ;; usage|help) ip_usage exit $OCF_SUCCESS ;; esac ip_validate case $__OCF_ACTION in start) ip_start ;; stop) ip_stop ;; status) ip_status=`ip_served` if [ $ip_status = "ok" ]; then echo "running" exit $OCF_SUCCESS else echo "stopped" exit $OCF_NOT_RUNNING fi ;; monitor) ip_monitor ;; validate-all) ;; *) ip_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/IPv6addr.c b/heartbeat/IPv6addr.c index d6a5b8d43..52ea2882d 100644 --- a/heartbeat/IPv6addr.c +++ b/heartbeat/IPv6addr.c @@ -1,980 +1,988 @@ /* * This program manages IPv6 address with OCF Resource Agent standard. * * Author: Huang Zhen * Copyright (c) 2004 International Business Machines * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* * It can add an IPv6 address, or remove one. * * Usage: IPv6addr {start|stop|status|monitor|meta-data} * * The "start" arg adds an IPv6 address. * The "stop" arg removes one. * The "status" arg shows whether the IPv6 address exists * The "monitor" arg shows whether the IPv6 address can be pinged (ICMPv6 ECHO) * The "meta_data" arg shows the meta data(XML) */ /* * ipv6-address: * * currently the following forms are legal: * address * address/prefix * * E.g. * 3ffe:ffff:0:f101::3 * 3ffe:ffff:0:f101::3/64 * * It should be passed by environment variant: * OCF_RESKEY_ipv6addr=3ffe:ffff:0:f101::3 * OCF_RESKEY_cidr_netmask=64 * OCF_RESKEY_nic=eth0 * */ /* * start: * 1.IPv6addr will choice a proper interface for the new address. * 2.Then assign the new address to the interface. * 3.Wait until the new address is available (reply ICMPv6 ECHO packet) * 4.Send out the unsolicited advertisements. * * return 0(OCF_SUCCESS) for success * return 1(OCF_ERR_GENERIC) for failure * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) * * * stop: * remove the address from the inferface. * * return 0(OCF_SUCCESS) for success * return 1(OCF_ERR_GENERIC) for failure * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) * * status: * return the status of the address. only check whether it exists. * * return 0(OCF_SUCCESS) for existing * return 1(OCF_NOT_RUNNING) for not existing * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) * * * monitor: * ping the address by ICMPv6 ECHO request. * * return 0(OCF_SUCCESS) for response correctly. * return 1(OCF_NOT_RUNNING) for no response. * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) */ #include #include #include #include #include #include #include #include #include /* for inet_pton */ #include /* for if_nametoindex */ #include #include #include #include #include #include #include #include #define PIDFILE_BASE HA_RSCTMPDIR "/IPv6addr-" /* 0 No error, action succeeded completely 1 generic or unspecified error (current practice) The "monitor" operation shall return this for a crashed, hung or otherwise non-functional resource. 2 invalid or excess argument(s) Likely error code for validate-all, if the instance parameters do not validate. Any other action is free to also return this exit status code for this case. 3 unimplemented feature (for example, "reload") 4 user had insufficient privilege 5 program is not installed 6 program is not configured 7 program is not running 8 resource is running in "master" mode and fully operational 9 resource is in "master" mode but in a failed state */ #define OCF_SUCCESS 0 #define OCF_ERR_GENERIC 1 #define OCF_ERR_ARGS 2 #define OCF_ERR_UNIMPLEMENTED 3 #define OCF_ERR_PERM 4 #define OCF_ERR_INSTALLED 5 #define OCF_ERR_CONFIGURED 6 #define OCF_NOT_RUNNING 7 const char* IF_INET6 = "/proc/net/if_inet6"; const char* APP_NAME = "IPv6addr"; const char* START_CMD = "start"; const char* STOP_CMD = "stop"; const char* STATUS_CMD = "status"; const char* MONITOR_CMD = "monitor"; const char* ADVT_CMD = "advt"; const char* RECOVER_CMD = "recover"; const char* RELOAD_CMD = "reload"; const char* META_DATA_CMD = "meta-data"; const char* VALIDATE_CMD = "validate-all"; char BCAST_ADDR[] = "ff02::1"; const int UA_REPEAT_COUNT = 5; const int QUERY_COUNT = 5; #define HWADDR_LEN 6 /* mac address length */ struct in6_ifreq { struct in6_addr ifr6_addr; uint32_t ifr6_prefixlen; unsigned int ifr6_ifindex; }; static int start_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); static int stop_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); static int status_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); static int monitor_addr6(struct in6_addr* addr6, int prefix_len); static int advt_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); static int meta_data_addr6(void); static void usage(const char* self); int write_pid_file(const char *pid_file); int create_pid_directory(const char *pid_file); static void byebye(int nsig); static char* scan_if(struct in6_addr* addr_target, int* plen_target, int use_mask, char* prov_ifname); static char* find_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname); static char* get_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname); static int assign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name); static int unassign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name); int is_addr6_available(struct in6_addr* addr6); static int send_ua(struct in6_addr* src_ip, char* if_name); int main(int argc, char* argv[]) { char pid_file[256]; char* ipv6addr; char* cidr_netmask; int ret; char* cp; char* prov_ifname = NULL; int prefix_len = -1; struct in6_addr addr6; /* Check the count of parameters first */ if (argc < 2) { usage(argv[0]); return OCF_ERR_ARGS; } /* set termination signal */ siginterrupt(SIGTERM, 1); signal(SIGTERM, byebye); /* open system log */ cl_log_set_entity(APP_NAME); cl_log_set_facility(LOG_DAEMON); /* the meta-data dont need any parameter */ if (0 == strncmp(META_DATA_CMD, argv[1], strlen(META_DATA_CMD))) { ret = meta_data_addr6(); return OCF_SUCCESS; } /* check the OCF_RESKEY_ipv6addr parameter, should be an IPv6 address */ ipv6addr = getenv("OCF_RESKEY_ipv6addr"); if (ipv6addr == NULL) { cl_log(LOG_ERR, "Please set OCF_RESKEY_ipv6addr to the IPv6 address you want to manage."); usage(argv[0]); return OCF_ERR_ARGS; } /* legacy option */ if ((cp = strchr(ipv6addr, '/'))) { prefix_len = atol(cp + 1); if ((prefix_len < 0) || (prefix_len > 128)) { cl_log(LOG_ERR, "Invalid prefix_len [%s], should be an integer in [0, 128]", cp+1); usage(argv[0]); return OCF_ERR_ARGS; } *cp=0; } /* get provided netmask (optional) */ cidr_netmask = getenv("OCF_RESKEY_cidr_netmask"); if (cidr_netmask != NULL) { if ((atol(cidr_netmask) < 0) || (atol(cidr_netmask) > 128)) { cl_log(LOG_ERR, "Invalid prefix_len [%s], " "should be an integer in [0, 128]", cidr_netmask); usage(argv[0]); return OCF_ERR_ARGS; } if (prefix_len != -1 && prefix_len != atol(cidr_netmask)) { cl_log(LOG_DEBUG, "prefix_len(%d) is overwritted by cidr_netmask(%s)", prefix_len, cidr_netmask); } prefix_len = atol(cidr_netmask); } else if (prefix_len == -1) { prefix_len = 0; } /* get provided interface name (optional) */ prov_ifname = getenv("OCF_RESKEY_nic"); if (inet_pton(AF_INET6, ipv6addr, &addr6) <= 0) { cl_log(LOG_ERR, "Invalid IPv6 address [%s]", ipv6addr); usage(argv[0]); return OCF_ERR_ARGS; } /* Check whether this system supports IPv6 */ if (access(IF_INET6, R_OK)) { cl_log(LOG_ERR, "No support for INET6 on this system."); return OCF_ERR_GENERIC; } /* create the pid file so we can make sure that only one IPv6addr * for this address is running */ if (snprintf(pid_file, sizeof(pid_file), "%s%s", PIDFILE_BASE, ipv6addr) >= (int)sizeof(pid_file)) { cl_log(LOG_ERR, "Pid file truncated"); return OCF_ERR_GENERIC; } if (write_pid_file(pid_file) < 0) { return OCF_ERR_GENERIC; } /* switch the command */ if (0 == strncmp(START_CMD,argv[1], strlen(START_CMD))) { ret = start_addr6(&addr6, prefix_len, prov_ifname); }else if (0 == strncmp(STOP_CMD,argv[1], strlen(STOP_CMD))) { ret = stop_addr6(&addr6, prefix_len, prov_ifname); }else if (0 == strncmp(STATUS_CMD,argv[1], strlen(STATUS_CMD))) { ret = status_addr6(&addr6, prefix_len, prov_ifname); }else if (0 ==strncmp(MONITOR_CMD,argv[1], strlen(MONITOR_CMD))) { ret = monitor_addr6(&addr6, prefix_len); }else if (0 ==strncmp(RELOAD_CMD,argv[1], strlen(RELOAD_CMD))) { ret = OCF_ERR_UNIMPLEMENTED; }else if (0 ==strncmp(RECOVER_CMD,argv[1], strlen(RECOVER_CMD))) { ret = OCF_ERR_UNIMPLEMENTED; }else if (0 ==strncmp(VALIDATE_CMD,argv[1], strlen(VALIDATE_CMD))) { /* ipv6addr has been validated by inet_pton, hence a valid IPv6 address */ ret = OCF_SUCCESS; }else if (0 ==strncmp(ADVT_CMD,argv[1], strlen(MONITOR_CMD))) { ret = advt_addr6(&addr6, prefix_len, prov_ifname); }else{ usage(argv[0]); ret = OCF_ERR_ARGS; } /* release the pid file */ unlink(pid_file); return ret; } int start_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) { int i; char* if_name; if(OCF_SUCCESS == status_addr6(addr6,prefix_len,prov_ifname)) { return OCF_SUCCESS; } /* we need to find a proper device to assign the address */ if_name = find_if(addr6, &prefix_len, prov_ifname); if (NULL == if_name) { cl_log(LOG_ERR, "no valid mecahnisms"); return OCF_ERR_GENERIC; } /* Assign the address */ if (0 != assign_addr6(addr6, prefix_len, if_name)) { cl_log(LOG_ERR, "failed to assign the address to %s", if_name); return OCF_ERR_GENERIC; } /* Check whether the address available */ for (i = 0; i < QUERY_COUNT; i++) { if (0 == is_addr6_available(addr6)) { break; } sleep(1); } if (i == QUERY_COUNT) { cl_log(LOG_ERR, "failed to ping the address"); return OCF_ERR_GENERIC; } /* Send unsolicited advertisement packet to neighbor */ for (i = 0; i < UA_REPEAT_COUNT; i++) { send_ua(addr6, if_name); sleep(1); } return OCF_SUCCESS; } int advt_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) { /* First, we need to find a proper device to assign the address */ char* if_name = get_if(addr6, &prefix_len, prov_ifname); int i; if (NULL == if_name) { cl_log(LOG_ERR, "no valid mecahnisms"); return OCF_ERR_GENERIC; } /* Send unsolicited advertisement packet to neighbor */ for (i = 0; i < UA_REPEAT_COUNT; i++) { send_ua(addr6, if_name); sleep(1); } return OCF_SUCCESS; } int stop_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) { char* if_name; if(OCF_NOT_RUNNING == status_addr6(addr6,prefix_len,prov_ifname)) { return OCF_SUCCESS; } if_name = get_if(addr6, &prefix_len, prov_ifname); if (NULL == if_name) { cl_log(LOG_ERR, "no valid mechanisms."); /* I think this should be a success exit according to LSB. */ return OCF_ERR_GENERIC; } /* Unassign the address */ if (0 != unassign_addr6(addr6, prefix_len, if_name)) { cl_log(LOG_ERR, "failed to assign the address to %s", if_name); return OCF_ERR_GENERIC; } return OCF_SUCCESS; } int status_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) { char* if_name = get_if(addr6, &prefix_len, prov_ifname); if (NULL == if_name) { return OCF_NOT_RUNNING; } return OCF_SUCCESS; } int monitor_addr6(struct in6_addr* addr6, int prefix_len) { if(0 == is_addr6_available(addr6)) { return OCF_SUCCESS; } return OCF_NOT_RUNNING; } /* Send an unsolicited advertisement packet * Please refer to rfc4861 / rfc3542 */ int send_ua(struct in6_addr* src_ip, char* if_name) { int status = -1; int fd; int ifindex; int hop; struct ifreq ifr; u_int8_t *payload = NULL; int payload_size; struct nd_neighbor_advert *na; struct nd_opt_hdr *opt; struct sockaddr_in6 src_sin6; struct sockaddr_in6 dst_sin6; if ((fd = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) == 0) { cl_log(LOG_ERR, "socket(IPPROTO_ICMPV6) failed: %s", strerror(errno)); goto err; } /* set the outgoing interface */ ifindex = if_nametoindex(if_name); if (setsockopt(fd, IPPROTO_IPV6, IPV6_MULTICAST_IF, &ifindex, sizeof(ifindex)) < 0) { cl_log(LOG_ERR, "setsockopt(IPV6_MULTICAST_IF) failed: %s", strerror(errno)); goto err; } /* set the hop limit */ hop = 255; /* 255 is required. see rfc4861 7.1.2 */ if (setsockopt(fd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &hop, sizeof(hop)) < 0) { cl_log(LOG_ERR, "setsockopt(IPV6_MULTICAST_HOPS) failed: %s", strerror(errno)); goto err; } /* set the source address */ memset(&src_sin6, 0, sizeof(src_sin6)); src_sin6.sin6_family = AF_INET6; src_sin6.sin6_addr = *src_ip; src_sin6.sin6_port = 0; if (IN6_IS_ADDR_LINKLOCAL(&src_sin6.sin6_addr) || IN6_IS_ADDR_MC_LINKLOCAL(&src_sin6.sin6_addr)) { src_sin6.sin6_scope_id = ifindex; } if (bind(fd, (struct sockaddr *)&src_sin6, sizeof(src_sin6)) < 0) { cl_log(LOG_ERR, "bind() failed: %s", strerror(errno)); goto err; } /* get the hardware address */ memset(&ifr, 0, sizeof(ifr)); strncpy(ifr.ifr_name, if_name, sizeof(ifr.ifr_name) - 1); if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { cl_log(LOG_ERR, "ioctl(SIOCGIFHWADDR) failed: %s", strerror(errno)); goto err; } /* build a neighbor advertisement message */ payload_size = sizeof(struct nd_neighbor_advert) + sizeof(struct nd_opt_hdr) + HWADDR_LEN; payload = memalign(sysconf(_SC_PAGESIZE), payload_size); if (!payload) { cl_log(LOG_ERR, "malloc for payload failed"); goto err; } memset(payload, 0, payload_size); /* Ugly typecast from ia64 hell! */ na = (struct nd_neighbor_advert *)((void *)payload); na->nd_na_type = ND_NEIGHBOR_ADVERT; na->nd_na_code = 0; na->nd_na_cksum = 0; /* calculated by kernel */ na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE; na->nd_na_target = *src_ip; /* options field; set the target link-layer address */ opt = (struct nd_opt_hdr *)(payload + sizeof(struct nd_neighbor_advert)); opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; opt->nd_opt_len = 1; /* The length of the option in units of 8 octets */ memcpy(payload + sizeof(struct nd_neighbor_advert) + sizeof(struct nd_opt_hdr), &ifr.ifr_hwaddr.sa_data, HWADDR_LEN); /* sending an unsolicited neighbor advertisement to all */ memset(&dst_sin6, 0, sizeof(dst_sin6)); dst_sin6.sin6_family = AF_INET6; inet_pton(AF_INET6, BCAST_ADDR, &dst_sin6.sin6_addr); /* should not fail */ if (sendto(fd, payload, payload_size, 0, (struct sockaddr *)&dst_sin6, sizeof(dst_sin6)) != payload_size) { cl_log(LOG_ERR, "sendto(%s) failed: %s", if_name, strerror(errno)); goto err; } status = 0; err: close(fd); free(payload); return status; } /* find the network interface associated with an address */ char* scan_if(struct in6_addr* addr_target, int* plen_target, int use_mask, char* prov_ifname) { FILE *f; static char devname[21]=""; struct in6_addr addr; struct in6_addr mask; unsigned int plen, scope, dad_status, if_idx; unsigned int addr6p[4]; /* open /proc/net/if_inet6 file */ if ((f = fopen(IF_INET6, "r")) == NULL) { return NULL; } /* Loop for each entry */ while (1) { int i; int n; int s; gboolean same = TRUE; i = fscanf(f, "%08x%08x%08x%08x %x %02x %02x %02x %20s\n", &addr6p[0], &addr6p[1], &addr6p[2], &addr6p[3], &if_idx, &plen, &scope, &dad_status, devname); if (i == EOF) { break; } else if (i != 9) { cl_log(LOG_INFO, "Error parsing %s, " "perhaps the format has changed\n", IF_INET6); break; } /* Consider link-local addresses (scope == 0x20) only when * the inerface name is provided, and global addresses * (scope == 0). Skip everything else. */ if (scope != 0) { if (scope != 0x20 || prov_ifname == 0 || *prov_ifname == 0) continue; } /* If specified prefix, only same prefix entry * would be considered. */ if (*plen_target!=0 && plen != *plen_target) { continue; } /* If interface name provided, only same devname entry * would be considered */ if (prov_ifname!=0 && *prov_ifname!=0) { if (strcmp(devname, prov_ifname)) continue; } for (i = 0; i< 4; i++) { addr.s6_addr32[i] = htonl(addr6p[i]); } /* Make the mask based on prefix length */ memset(mask.s6_addr, 0xff, 16); if (use_mask && plen < 128) { n = plen / 32; memset(mask.s6_addr32 + n + 1, 0, (3 - n) * 4); s = 32 - plen % 32; if (s == 32) mask.s6_addr32[n] = 0x0; else mask.s6_addr32[n] = 0xffffffff << s; mask.s6_addr32[n] = htonl(mask.s6_addr32[n]); } /* compare addr and addr_target */ same = TRUE; for (i = 0; i < 4; i++) { if ((addr.s6_addr32[i]&mask.s6_addr32[i]) != (addr_target->s6_addr32[i]&mask.s6_addr32[i])) { same = FALSE; break; } } /* We found it! */ if (same) { fclose(f); *plen_target = plen; return devname; } } fclose(f); return NULL; } /* find a proper network interface to assign the address */ char* find_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname) { - return scan_if(addr_target, plen_target, 1, prov_ifname); + char *best_ifname = scan_if(addr_target, plen_target, 1, prov_ifname); + + /* use the provided ifname and prefix if the address did not match */ + if (best_ifname == NULL && + prov_ifname != 0 && *prov_ifname != 0 && *plen_target != 0) { + cl_log(LOG_INFO, "Could not find a proper interface by the ipv6addr. Using the specified nic:'%s' and cidr_netmask:'%d'", prov_ifname, *plen_target); + return prov_ifname; + } + return best_ifname; } /* get the device name and the plen_target of a special address */ char* get_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname) { return scan_if(addr_target, plen_target, 0, prov_ifname); } int assign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name) { struct in6_ifreq ifr6; /* Get socket first */ int fd; struct ifreq ifr; fd = socket(AF_INET6, SOCK_DGRAM, 0); if (fd < 0) { return 1; } /* Query the index of the if */ strcpy(ifr.ifr_name, if_name); if (ioctl(fd, SIOGIFINDEX, &ifr) < 0) { return -1; } /* Assign the address to the if */ ifr6.ifr6_addr = *addr6; ifr6.ifr6_ifindex = ifr.ifr_ifindex; ifr6.ifr6_prefixlen = prefix_len; if (ioctl(fd, SIOCSIFADDR, &ifr6) < 0) { return -1; } close (fd); return 0; } int unassign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name) { int fd; struct ifreq ifr; struct in6_ifreq ifr6; /* Get socket first */ fd = socket(AF_INET6, SOCK_DGRAM, 0); if (fd < 0) { return 1; } /* Query the index of the if */ strcpy(ifr.ifr_name, if_name); if (ioctl(fd, SIOGIFINDEX, &ifr) < 0) { return -1; } /* Unassign the address to the if */ ifr6.ifr6_addr = *addr6; ifr6.ifr6_ifindex = ifr.ifr_ifindex; ifr6.ifr6_prefixlen = prefix_len; if (ioctl(fd, SIOCDIFADDR, &ifr6) < 0) { return -1; } close (fd); return 0; } #define MINPACKSIZE 64 int is_addr6_available(struct in6_addr* addr6) { struct sockaddr_in6 addr; struct icmp6_hdr icmph; u_char outpack[MINPACKSIZE]; int icmp_sock; int ret; struct iovec iov; u_char packet[MINPACKSIZE]; struct msghdr msg; icmp_sock = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6); memset(&icmph, 0, sizeof(icmph)); icmph.icmp6_type = ICMP6_ECHO_REQUEST; icmph.icmp6_code = 0; icmph.icmp6_cksum = 0; icmph.icmp6_seq = htons(0); icmph.icmp6_id = 0; memset(&outpack, 0, sizeof(outpack)); memcpy(&outpack, &icmph, sizeof(icmph)); memset(&addr, 0, sizeof(struct sockaddr_in6)); addr.sin6_family = AF_INET6; addr.sin6_port = htons(IPPROTO_ICMPV6); memcpy(&addr.sin6_addr,addr6,sizeof(struct in6_addr)); /* Only the first 8 bytes of outpack are meaningful... */ ret = sendto(icmp_sock, (char *)outpack, sizeof(outpack), 0, (struct sockaddr *) &addr, sizeof(struct sockaddr_in6)); if (0 >= ret) { return -1; } iov.iov_base = (char *)packet; iov.iov_len = sizeof(packet); msg.msg_name = &addr; msg.msg_namelen = sizeof(addr); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; ret = recvmsg(icmp_sock, &msg, MSG_DONTWAIT); if (0 >= ret) { return -1; } return 0; } static void usage(const char* self) { printf("usage: %s {start|stop|status|monitor|validate-all|meta-data}\n",self); return; } /* Following code is copied from send_arp.c, linux-HA project. */ void byebye(int nsig) { (void)nsig; /* Avoid an "error exit" log message if we're killed */ exit(0); } int create_pid_directory(const char *pid_file) { int status; int return_status = -1; struct stat stat_buf; char* dir; dir = strdup(pid_file); if (!dir) { cl_log(LOG_INFO, "Memory allocation failure: %s", strerror(errno)); return -1; } dirname(dir); status = stat(dir, &stat_buf); if (status < 0 && errno != ENOENT && errno != ENOTDIR) { cl_log(LOG_INFO, "Could not stat pid-file directory " "[%s]: %s", dir, strerror(errno)); goto err; } if (!status) { if (S_ISDIR(stat_buf.st_mode)) { goto out; } cl_log(LOG_INFO, "Pid-File directory exists but is " "not a directory [%s]", dir); goto err; } if (mkdir(dir, S_IRUSR|S_IWUSR|S_IXUSR | S_IRGRP|S_IXGRP) < 0) { cl_log(LOG_INFO, "Could not create pid-file directory " "[%s]: %s", dir, strerror(errno)); goto err; } out: return_status = 0; err: free(dir); return return_status; } int write_pid_file(const char *pid_file) { int pidfilefd; char pidbuf[11]; unsigned long pid; ssize_t bytes; if (*pid_file != '/') { cl_log(LOG_INFO, "Invalid pid-file name, must begin with a " "'/' [%s]\n", pid_file); return -1; } if (create_pid_directory(pid_file) < 0) { return -1; } while (1) { pidfilefd = open(pid_file, O_CREAT|O_EXCL|O_RDWR, S_IRUSR|S_IWUSR); if (pidfilefd < 0) { if (errno != EEXIST) { /* Old PID file */ cl_log(LOG_INFO, "Could not open pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } } else { break; } pidfilefd = open(pid_file, O_RDONLY, S_IRUSR|S_IWUSR); if (pidfilefd < 0) { cl_log(LOG_INFO, "Could not open pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } while (1) { bytes = read(pidfilefd, pidbuf, sizeof(pidbuf)-1); if (bytes < 0) { if (errno == EINTR) { continue; } cl_log(LOG_INFO, "Could not read pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } pidbuf[bytes] = '\0'; break; } if(unlink(pid_file) < 0) { cl_log(LOG_INFO, "Could not delete pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } if (!bytes) { cl_log(LOG_INFO, "Invalid pid in pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } close(pidfilefd); pid = strtoul(pidbuf, NULL, 10); if (pid == ULONG_MAX && errno == ERANGE) { cl_log(LOG_INFO, "Invalid pid in pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } if (kill(pid, SIGKILL) < 0 && errno != ESRCH) { cl_log(LOG_INFO, "Error killing old proccess [%lu] " "from pid-file [%s]: %s", pid, pid_file, strerror(errno)); return -1; } cl_log(LOG_INFO, "Killed old send_arp process [%lu]", pid); } if (snprintf(pidbuf, sizeof(pidbuf), "%u" , getpid()) >= (int)sizeof(pidbuf)) { cl_log(LOG_INFO, "Pid too long for buffer [%u]", getpid()); return -1; } while (1) { bytes = write(pidfilefd, pidbuf, strlen(pidbuf)); if (bytes != strlen(pidbuf)) { if (bytes < 0 && errno == EINTR) { continue; } cl_log(LOG_INFO, "Could not write pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } break; } close(pidfilefd); return 0; } static int meta_data_addr6(void) { const char* meta_data= "\n" "\n" "\n" " 1.0\n" " \n" " This script manages IPv6 alias IPv6 addresses,It can add an IP6\n" " alias, or remove one.\n" " \n" " Manages IPv6 aliases\n" " \n" " \n" " \n" " The IPv6 address this RA will manage \n" " \n" " IPv6 address\n" " \n" " \n" " \n" " \n" " The netmask for the interface in CIDR format. (ie, 24).\n" " The value of this parameter overwrites the value of _prefix_\n" " of ipv6addr parameter.\n" " \n" " Netmask\n" " \n" " \n" " \n" " \n" " The base network interface on which the IPv6 address will\n" " be brought online.\n" " \n" " Network interface\n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" "\n"; printf("%s\n",meta_data); return OCF_SUCCESS; } diff --git a/heartbeat/LVM b/heartbeat/LVM index 584f97293..0423cf4b4 100755 --- a/heartbeat/LVM +++ b/heartbeat/LVM @@ -1,382 +1,364 @@ #!/bin/sh # # # LVM # # Description: Manages an LVM volume as an HA resource # # # Author: Alan Robertson # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 International Business Machines, Inc. # # This code significantly inspired by the LVM resource # in FailSafe by Lars Marowsky-Bree # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_volgrpname # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { methods=`LVM_methods` methods=`echo $methods | tr ' ' '|'` cat < 1.0 Resource script for LVM. It manages an Linux Volume Manager volume (LVM) as an HA resource. Controls the availability of an LVM Volume Group The name of volume group. Volume group name If set, the volume group will be activated exclusively. Exclusive activation If set, the volume group will be activated even only partial of the physical volumes available. It helps to set to true, when you are using mirroring logical volumes. Activate VG even with partial PV only EOF } # # methods: What methods/operations do we support? # LVM_methods() { cat <&1` - echo "$VGOUT" | grep -i 'Status.*available' >/dev/null - rc=$? - else - VGOUT=`vgdisplay -v $1 2>&1` - echo "$VGOUT" | grep -i 'Status[ \t]*available' >/dev/null + if [ -d /dev/$1 ]; then + test "`cd /dev/$1 && ls`" != "" rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "VG $1 with no logical volumes is not supported by this RA!" + fi fi if [ $rc -ne 0 ]; then - ocf_log $loglevel "LVM Volume $1 is not available (stopped). ${VGOUT}" + ocf_log $loglevel "LVM Volume $1 is not available (stopped)" fi - + if [ "X${2}" = "X" ]; then # status call return return $rc fi # Report on LVM volume status to stdout... if [ $rc -eq 0 ]; then - if - echo "$VGOUT" | grep -i 'Access.*read/write' >/dev/null - then - ocf_log debug "Volume $1 is available read/write (running)" - else - ocf_log debug "Volume $1 is available read-only (running)" - fi + echo "Volume $1 is available (running)" + else + echo "Volume $1 is not available (stopped)" fi - - return $OCF_SUCCESS - + return $rc } # # Monitor the volume - does it really seem to be working? # # LVM_monitor() { - local rc if LVM_status $1 then : OK else ocf_log info "LVM Volume $1 is offline" return $OCF_NOT_RUNNING fi - VGOUT=`vgck $1 2>&1` - rc=$? - if [ $rc -ne 0 ]; then - ocf_log err "LVM Volume $1 is not found. ${VGOUT}:${rc}" - return $OCF_ERR_GENERIC - fi - return $OCF_SUCCESS } # # Enable LVM volume # LVM_start() { local vgchange_options local active_mode # TODO: This MUST run vgimport as well ocf_log info "Activating volume group $1" if [ "$LVM_MAJOR" -eq "1" ]; then ocf_run vgscan $1 else ocf_run vgscan fi active_mode="ly" if ocf_is_true "$OCF_RESKEY_exclusive" ; then active_mode="ey" fi vgchange_options="-a $active_mode" if ocf_is_true "$OCF_RESKEY_partial_activation" ; then vgchange_options="$vgchange_options --partial" fi # for clones (clustered volume groups), we'll also have to force # monitoring, even if disabled in lvm.conf. if ocf_is_clone; then vgchange_options="$vgchange_options --monitor y" fi ocf_run vgchange $vgchange_options $1 || return $OCF_ERR_GENERIC if LVM_status $1; then : OK Volume $1 activated just fine! return $OCF_SUCCESS else ocf_log err "LVM: $1 did not activate correctly" return $OCF_NOT_RUNNING fi } # # Disable the LVM volume # LVM_stop() { vgdisplay "$1" 2>&1 | grep 'Volume group .* not found' >/dev/null && { ocf_log info "Volume group $1 not found" return $OCF_SUCCESS } ocf_log info "Deactivating volume group $1" ocf_run vgchange -a ln $1 || return $OCF_ERR_GENERIC if LVM_status $1 then ocf_log err "LVM: $1 did not stop correctly" return $OCF_ERR_GENERIC fi # TODO: This MUST run vgexport as well return $OCF_SUCCESS } # # Check whether the OCF instance parameters are valid # LVM_validate_all() { check_binary $AWK # Off-the-shelf tests... VGOUT=`vgck ${VOLUME} 2>&1` if [ $? -ne 0 ]; then ocf_log err "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi # Double-check if [ "$LVM_MAJOR" -eq "1" ] then VGOUT=`vgdisplay ${VOLUME} 2>&1` else VGOUT=`vgdisplay -v ${VOLUME} 2>&1` fi if [ $? -ne 0 ]; then ocf_log err "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS;; methods) LVM_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if [ -z "$OCF_RESKEY_volgrpname" ] then ocf_log err "You must identify the volume group name!" exit $OCF_ERR_CONFIGURED fi # Get the LVM version number, for this to work we assume(thanks to panjiam): # # LVM1 outputs like this # # # vgchange --version # vgchange: Logical Volume Manager 1.0.3 # Heinz Mauelshagen, Sistina Software 19/02/2002 (IOP 10) # # LVM2 and higher versions output in this format # # # vgchange --version # LVM version: 2.00.15 (2004-04-19) # Library version: 1.00.09-ioctl (2004-03-31) # Driver version: 4.1.0 LVM_VERSION=`vgchange --version 2>&1 | \ $AWK '/Logical Volume Manager/ {print $5"\n"; exit; } /LVM version:/ {printf $3"\n"; exit;}'` rc=$? if ( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] ) then ocf_log err "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?" exit $OCF_ERR_INSTALLED fi LVM_MAJOR="${LVM_VERSION%%.*}" VOLUME=$OCF_RESKEY_volgrpname OP_METHOD=$1 # What kind of method was invoked? case "$1" in start) LVM_start $VOLUME exit $?;; stop) LVM_stop $VOLUME exit $?;; status) LVM_status $VOLUME $1 exit $?;; monitor) LVM_monitor $VOLUME exit $?;; validate-all) LVM_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am index ed877f611..adfd97c68 100644 --- a/heartbeat/Makefile.am +++ b/heartbeat/Makefile.am @@ -1,128 +1,131 @@ # Makefile.am for OCF RAs # # Author: Sun Jing Dong # Copyright (C) 2004 IBM # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(ocf_SCRIPTS) $(ocfcommon_DATA) \ $(common_DATA) $(hb_DATA) $(dtd_DATA) \ README INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/linux-ha ocfdir = $(OCF_RA_DIR_PREFIX)/heartbeat dtddir = $(datadir)/$(PACKAGE_NAME) dtd_DATA = ra-api-1.dtd if USE_IPV6ADDR ocf_PROGRAMS = IPv6addr else ocf_PROGRAMS = endif IPv6addr_SOURCES = IPv6addr.c IPv6addr_LDADD = -lplumb $(LIBNETLIBS) ocf_SCRIPTS = ClusterMon \ CTDB \ Dummy \ IPaddr \ IPaddr2 \ drbd \ anything \ AoEtarget \ apache \ asterisk \ nginx \ AudibleAlarm \ conntrackd \ db2 \ Delay \ eDir88 \ EvmsSCC \ Evmsd \ ethmonitor \ exportfs \ Filesystem \ fio \ ids \ iscsi \ ICP \ IPsrcaddr \ iSCSITarget \ iSCSILogicalUnit \ jboss \ LinuxSCSI \ LVM \ lxc \ MailTo \ ManageRAID \ ManageVE \ mysql \ mysql-proxy \ named \ nfsserver \ oracle \ oralsnr \ pingd \ portblock \ postfix \ + pound \ pgsql \ proftpd \ Pure-FTPd \ Raid1 \ Route \ rsyncd \ rsyslog \ SAPDatabase \ SAPInstance \ SendArp \ ServeRAID \ slapd \ SphinxSearchDaemon \ Squid \ Stateful \ SysInfo \ scsi2reservation \ sfex \ symlink \ syslog-ng \ tomcat \ VIPArip \ VirtualDomain \ varnish \ vmware \ WAS \ WAS6 \ WinPopup \ Xen \ Xinetd ocfcommondir = $(OCF_LIB_DIR_PREFIX)/heartbeat ocfcommon_DATA = ocf-shellfuncs \ ocf-binaries \ ocf-directories \ ocf-returncodes \ apache-conf.sh \ - http-mon.sh + http-mon.sh \ + sapdb-nosha.sh \ + sapdb.sh # Legacy locations hbdir = $(sysconfdir)/ha.d hb_DATA = shellfuncs diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 index c4ae50e18..f85f55a1b 100755 --- a/heartbeat/Raid1 +++ b/heartbeat/Raid1 @@ -1,418 +1,412 @@ #!/bin/sh # # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # Raid1 # Description: Manages a software Raid1 device on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # RAID patches: http://people.redhat.com/mingo/raid-patches/ # Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 # Sympathetic Ear: mailto:linux-raid@vger.kernel.org # # usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} # # # EXAMPLE config file /etc/raidtab.md0 # This file must exist on both machines! # # raiddev /dev/md0 # raid-level 1 # nr-raid-disks 2 # chunk-size 64k # persistent-superblock 1 # #nr-spare-disks 0 # device /dev/sda1 # raid-disk 0 # device /dev/sdb1 # raid-disk 1 # # EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) # # DEVICE /dev/sdb1 /dev/sdc1 # ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } meta_data() { cat < 1.0 Resource script for RAID1. It manages a software Raid1 device on a shared storage medium. Manages a software RAID1 device on shared storage The RAID configuration file. e.g. /etc/raidtab or /etc/mdadm.conf. RAID config file The block device to use. Alternatively, set to "auto" to manage all devices specified in raidconf. block device The value for the homehost directive; this is an mdadm feature to protect RAIDs against being activated by accident. It is recommended to create RAIDs managed by the cluster with "homehost" set to a special value, so they are not accidentially auto-assembled by nodes not supposed to own them. Homehost for mdadm END } list_conf_arrays() { test -f $RAIDCONF || { ocf_log err "$RAIDCONF gone missing!" exit $OCF_ERR_GENERIC } grep ^ARRAY $RAIDCONF | awk '{print $2}' } forall() { local func=$1 local checkall=$2 local mddev rc=0 for mddev in `list_conf_arrays`; do $func $mddev rc=$(($rc | $?)) [ "$checkall" = all ] && continue [ $rc -ne 0 ] && return $rc done return $rc } # # START: Start up the RAID device # raid1_start() { raid1_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then # md already online, nothing to do. return $OCF_SUCCESS fi if [ $rc -ne $OCF_NOT_RUNNING ]; then # If the array is in a broken state, this agent doesn't # know how to repair that. ocf_log err "$MDDEV in a broken state; cannot start (rc=$rc)" return $OCF_ERR_GENERIC fi - # Insert SCSI module - $MODPROBE scsi_hostadapter - if [ $? -ne 0 ] ; then - ocf_log warn "Couldn't insert SCSI module." - fi - # Insert raid personality module $MODPROBE raid1 if [ $? -ne 0 ] ; then # It is not fatal, chance is that we have raid1 builtin... ocf_log warn "Couldn't insert RAID1 module" fi grep -q "^Personalities.*\[raid1\]" /proc/mdstat 2>/dev/null if [ $? -ne 0 ] ; then ocf_log err "We don't have RAID1 support! Exiting" return $OCF_ERR_GENERIC fi if [ $HAVE_RAIDTOOLS = "true" ]; then # Run raidstart to start up the RAID array $RAIDSTART --configfile $RAIDCONF $MDDEV else # Run mdadm if [ "$MDDEV" = auto ]; then $MDADM --assemble --scan --config=$RAIDCONF $MDADM_HOMEHOST else $MDADM --assemble $MDDEV --config=$RAIDCONF $MDADM_HOMEHOST fi fi raid1_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS else ocf_log err "Couldn't start RAID for $MDDEV" return $OCF_ERR_GENERIC fi } # # STOP: stop the RAID device # mark_readonly() { local mddev=$1 local rc ocf_log info "Attempting to mark array $mddev readonly" $MDADM --readonly $mddev --config=$RAIDCONF rc=$? if [ $rc -ne 0 ]; then ocf_log err "Failed to set $mddev readonly (rc=$rc)" fi return $rc } raid1_stop_one() { ocf_log info "Stopping array $1" $MDADM --stop $1 --config=$RAIDCONF --wait-clean -W } raid1_stop() { local rc # See if the MD device is already cleanly stopped: if [ "$MDDEV" != auto ]; then raid1_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi fi # Turn off raid if [ $HAVE_RAIDTOOLS = "true" ]; then $RAIDSTOP --configfile $RAIDCONF $MDDEV else if [ "$MDDEV" = auto ]; then forall raid1_stop_one all else raid1_stop_one $MDDEV fi fi rc=$? if [ $rc -ne 0 ]; then ocf_log err "Couldn't stop RAID for $MDDEV (rc=$rc)" if [ $HAVE_RAIDTOOLS != "true" ]; then if [ "$MDDEV" = auto ]; then forall mark_readonly all else mark_readonly $MDDEV fi fi return $OCF_ERR_GENERIC fi if [ "$MDDEV" = auto ]; then local mddev for mddev in `list_conf_arrays`; do raid1_monitor_one $mddev rc=$? [ $rc -ne $OCF_NOT_RUNNING ] && break done else raid1_monitor_one $MDDEV rc=$? fi if [ $rc -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi ocf_log err "RAID $MDDEV still active after stop command!" return $OCF_ERR_GENERIC } # # monitor: a less noisy status # raid1_monitor_one() { local mddev=$1 local md=`echo $mddev | sed 's,/dev/,,'` local rc TRY_READD=0 # check if the md device exists first if [ ! -b $mddev ]; then ocf_log info "$mddev is not a block device" return $OCF_NOT_RUNNING fi if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then ocf_log info "$md not found in /proc/mdstat" return $OCF_NOT_RUNNING fi if [ $HAVE_RAIDTOOLS != "true" ]; then $MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$? case $rc in 0) ;; 1) ocf_log warn "$mddev has at least one failed device." TRY_READD=1 ;; 2) ocf_log err "$mddev has failed." return $OCF_ERR_GENERIC ;; 4) ocf_log err "mdadm failed on $mddev." return $OCF_ERR_GENERIC ;; *) ocf_log err "mdadm returned an unknown result ($rc)." return $OCF_ERR_GENERIC ;; esac fi if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \ -a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then ocf_log info "Attempting recovery sequence to re-add devices on $mddev:" $MDADM $mddev --fail detached $MDADM $mddev --remove failed $MDADM $mddev --re-add missing # TODO: At this stage, there's nothing to actually do # here. Either this worked or it did not. fi if ! dd if=$mddev count=1 bs=512 of=/dev/null \ iflag=direct >/dev/null 2>&1 ; then ocf_log err "$mddev: I/O error on read" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } raid1_monitor() { if [ "$MDDEV" = auto ]; then forall raid1_monitor_one else raid1_monitor_one $MDDEV fi } # # STATUS: is the raid device online or offline? # raid1_status() { # See if the MD device is online local rc raid1_monitor rc=$? if [ $rc -ne $OCF_SUCCESS ]; then echo "stopped" else echo "running" fi return $rc } raid1_validate_all() { return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac RAIDCONF="$OCF_RESKEY_raidconf" MDDEV="$OCF_RESKEY_raiddev" if [ -z "$RAIDCONF" ] ; then ocf_log err "Please set OCF_RESKEY_raidconf!" exit $OCF_ERR_CONFIGURED fi if [ ! -r "$RAIDCONF" ] ; then ocf_log err "Configuration file [$RAIDCONF] does not exist, or can not be opend!" exit $OCF_ERR_INSTALLED fi if [ -z "$MDDEV" ] ; then ocf_log err "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" exit $OCF_ERR_CONFIGURED fi HAVE_RAIDTOOLS=false if have_binary $MDADM >/dev/null 2>&1 ; then if [ -n "$OCF_RESKEY_homehost" ]; then MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" else MDADM_HOMEHOST="" fi else check_binary $RAIDSTART HAVE_RAIDTOOLS=true fi if [ "$MDDEV" = "auto" -a $HAVE_RAIDTOOLS = true ]; then ocf_log err "autoconf supported only with mdadm!" exit $OCF_ERR_INSTALLED fi # At this stage, # [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, # otherwise we have raidtools (raidstart and raidstop) # Look for how we are called case "$1" in start) raid1_start ;; stop) raid1_stop ;; status) raid1_status ;; monitor) raid1_monitor ;; validate-all) raid1_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase index 9a695bf46..3b77206b4 100755 --- a/heartbeat/SAPDatabase +++ b/heartbeat/SAPDatabase @@ -1,1008 +1,331 @@ #!/bin/sh # # SAPDatabase # # Description: Manages any type of SAP supported database instance # as a High-Availability OCF compliant resource. # # Author: Alexander Krauth, October 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) -# Copyright: (c) 2006, 2007 Alexander Krauth +# Copyright: (c) 2006, 2007, 2010, 2012 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: -# OCF_RESKEY_SID -# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) -# OCF_RESKEY_DBTYPE -# OCF_RESKEY_NETSERVICENAME (optional, non standard name of Oracle Listener) -# OCF_RESKEY_DBJ2EE_ONLY (optional, default is false) -# OCF_RESKEY_JAVA_HOME (optional, only needed if DBJ2EE_ONLY is true and JAVA_HOME enviroment variable is not set) -# OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) -# OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) -# OCF_RESKEY_DIR_BOOTSTRAP (optional, if non standard J2EE server directory) -# OCF_RESKEY_DIR_SECSTORE (optional, if non standard J2EE secure store directory) -# OCF_RESKEY_DB_JARS (optional, if maintained in bootstrap.properties, mandatory for WebAS Java 7.10) -# OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) -# OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) -# OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) -# OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) -# -# ToDo: -# Remove all the database dependend stuff from the agent and use -# saphostcontrol daemon as soon as SAP will release it. +# OCF_RESKEY_SID +# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) +# OCF_RESKEY_DBTYPE (mandatory, one of the following values: ORA,ADA,DB6,SYB,HDB) +# OCF_RESKEY_DBINSTANCE (optional, Database instance name, if not equal to SID) +# OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) +# OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) +# OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor all database services) +# OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) +# OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) +# OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) +# OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) +# Deprecated parameters: +# OCF_RESKEY_NETSERVICENAME +# OCF_RESKEY_DBJ2EE_ONLY +# OCF_RESKEY_JAVA_HOME +# OCF_RESKEY_DIR_BOOTSTRAP +# OCF_RESKEY_DIR_SECSTORE +# OCF_RESKEY_DB_JARS # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### SH=/bin/sh usage() { methods=`sapdatabase_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages a SAP database of any type as an HA resource. - Currently Oracle, MaxDB and DB/2 UDB are supported. + Currently Oracle, MaxDB, DB/2 UDB, Sybase ASE and SAP HANA Database are supported. ABAP databases as well as JAVA only databases are supported. The 'start' operation starts the instance. The 'stop' operation stops the instance. The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'recover' operation tries to recover the instance after a crash (instance will be stopped first!) The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat < -1.93 +2.06 +Manages a SAP database instance as an HA resource. Resource script for SAP databases. It manages a SAP database of any type as an HA resource. - -Manages any SAP database (based on Oracle, MaxDB, or DB2) +The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB. +The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure. +The resource agent supports the following databases: +- Oracle 10.2 and 11.2 +- DB/2 UDB for Windows and Unix 9.x +- SAP-DB / MaxDB 7.x +- Sybase ASE 15.7 +- SAP HANA Database since 1.00 - with SAP node 1625203 (http://sdn.sap.com) + +In fact this resource agent does not run any database commands directly. It uses the SAP standard process SAPHostAgent to control the database. +The SAPHostAgent must be installed on each cluster node locally. It will not work, if you try to run the SAPHostAgent also as a HA resource. +Please follow SAP note 1031096 for the installation of SAPHostAgent. +The required minimum version of SAPHostAgent is: +Release: 7.20 +Patch Number: 90 +or compile time after: Dec 17 2011 + - The unique SAP system identifier. e.g. P01 - SAP system ID + The unique database system identifier. e.g. P01 + Database system ID - The full qualified path where to find sapstartsrv and sapcontrol. - path of sapstartsrv and sapcontrol - + The full qualified path where to find saphostexec and saphostctrl. +Usually you can leave this empty. Then the default: /usr/sap/hostctrl/exe is used. + + path of saphostexec and saphostctrl + - The name of the database vendor you use. Set either: ORA,DB6,ADA + The name of the database vendor you use. Set either: ADA, DB6, ORA, SYB, HDB database vendor + + Must be used for special database implementations, when database instance name is not equal to the SID (e.g. Oracle DataGuard) + Database instance name, if not equal to SID + + - The Oracle TNS listener name. - listener name + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore - If you do not have a ABAP stack installed in the SAP database, set this to TRUE - only JAVA stack installed - + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore + - This is only needed if the DBJ2EE_ONLY parameter is set to true. Enter the path to the Java SDK which is used by the SAP WebAS Java - Path to Java SDK + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore - This controls how the resource agent monitors the database. If set to true, it will use SAP tools to test the connect to the database. Do not use with Oracle, because it will result in unwanted failovers in case of an archiver stuck + This controls how the resource agent monitors the database. If set to true, it will use 'saphostctrl -function GetDatabaseStatus' to test the database state. If set to false, only operating system processes are monitored. Activates application level monitoring - The SAPDatabase resource agent tries to recover a failed start attempt automaticaly one time. This is done by running a forced abort of the RDBMS and/or executing recovery commands. + If you set this to true, 'saphostctrl -function StartDatabase' will always be called with the '-force' option. Enable or disable automatic startup recovery + + Defines which services are monitored by the SAPDatabase resource agent, if STRICT_MONITORING is set to true. Service names must correspond with the output of the 'saphostctrl -function GetDatabaseStatus' command. + Database services to monitor + + - The full qualified path where to find the J2EE instance bootstrap directory. e.g. /usr/sap/P01/J00/j2ee/cluster/bootstrap - path to j2ee bootstrap directory + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore - The full qualified path where to find the J2EE security store directory. e.g. /usr/sap/P01/SYS/global/security/lib/tools - path to j2ee secure store directory + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore - The full qualified filename of the jdbc driver for the database connection test. It will be automaticaly read from the bootstrap.properties file in Java engine 6.40 and 7.00. For Java engine 7.10 and higher the parameter is mandatory. - file name of the jdbc driver + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore The full qualified path where to find a script or program which should be executed before this resource gets started. path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got started. path to a post-start script The full qualified path where to find a script or program which should be executed before this resource gets stopped. path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got stopped. path to a post-start script END } -trap_handler() { - rm -f $TEMPFILE - exit $OCF_ERR_GENERIC -} - - -# -# listener_start: Start the given listener -# -listener_start() { - local orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" - local lrc=$OCF_SUCCESS - local output - output=`echo "lsnrctl start $NETSERVICENAME" | su - $orasid 2>&1` - if [ $? -eq 0 ] - then - ocf_log info "Oracle Listener $NETSERVICENAME started: $output" - lrc=$OCF_SUCCESS - else - ocf_log err "Oracle Listener $NETSERVICENAME start failed: $output" - lrc=$OCF_ERR_GENERIC - fi - return $lrc -} - -# -# listener_stop: Stop the given listener -# -listener_stop() { - local orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" - local lrc=$OCF_SUCCESS - if - listener_status - then - : listener is running, trying to stop it later... - else - return $OCF_SUCCESS - fi - local output - output=`echo "lsnrctl stop $NETSERVICENAME" | su - $orasid 2>&1` - if [ $? -eq 0 ] - then - ocf_log info "Oracle Listener $NETSERVICENAME stopped: $output" - else - ocf_log err "Oracle Listener $NETSERVICENAME stop failed: $output" - lrc=$OCF_ERR_GENERIC - fi - return $lrc -} - -# -# listener_status: is the given listener running? -# -listener_status() { - local lrc=$OCF_SUCCESS - local orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" - # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here - # as the output might be to long. - local cnt=`ps efo args --user $orasid | grep $NETSERVICENAME | grep -c tnslsnr` - if [ $cnt -eq 1 ] - then - lrc=$OCF_SUCCESS - else - ocf_log info "listener process not running for $NETSERVICENAME for $SID" - lrc=$OCF_ERR_GENERIC - fi - return $lrc -} - -# -# x_server_start: Start the given x_server -# -x_server_start() { - local rc=$OCF_SUCCESS - local output - output=`echo "x_server start" | su - $sidadm 2>&1` - if [ $? -eq 0 ] - then - ocf_log info "MaxDB x_server start: $output" - lrc=$OCF_SUCCESS - else - ocf_log err "MaxDB x_server start failed: $output" - lrc=$OCF_ERR_GENERIC - fi - return $lrc -} - -# -# x_server_stop: Stop the x_server -# -x_server_stop() { - local lrc=$OCF_SUCCESS - local output - output=`echo "x_server stop" | su - $sidadm 2>&1` - if [ $? -eq 0 ] - then - ocf_log info "MaxDB x_server stop: $output" - else - ocf_log err "MaxDB x_server stop failed: $output" - lrc=$OCF_ERR_GENERIC - fi - return $lrc -} - -# -# x_server_status: is the x_server running? -# -x_server_status() { - local lrc=$OCF_SUCCESS - local sdbuser=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` - # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here - # as the output might be to long. - local cnt=`ps efo args --user $sdbuser | grep -c vserver` - if [ $cnt -ge 1 ] - then - lrc=$OCF_SUCCESS - else - ocf_log info "x_server process not running" - lrc=$OCF_ERR_GENERIC - fi - return $lrc -} - -# -# oracle_stop: Stop the Oracle database without any condition -# -oracle_stop() { -echo '#!/bin/sh -LOG=$HOME/stopdb.log -date > $LOG - -if [ -x "${ORACLE_HOME}/bin/sqlplus" ] -then - SRVMGRDBA_EXE="${ORACLE_HOME}/bin/sqlplus" -else - echo "Can not find executable sqlplus" >> $LOG - exit 1 -fi - -$SRVMGRDBA_EXE /NOLOG >> $LOG << ! -connect / as sysdba -shutdown immediate -exit -! -rc=$? -cat $LOG -exit $rc' > $TEMPFILE - -chmod 700 $TEMPFILE -chown $sidadm $TEMPFILE - -su - $sidadm -c $TEMPFILE -retcode=$? -rm -f $TEMPFILE - -if [ $retcode -eq 0 ]; then - sapdatabase_status - if [ $? -ne $OCF_NOT_RUNNING ]; then - retcode=1 - fi -fi - -return $retcode -} - -# -# maxdb_stop: Stop the MaxDB database without any condition -# -maxdb_stop() { - -# x_Server must be running to stop database -x_server_status -if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi - -if [ $DBJ2EE_ONLY -eq 1 ]; then - userkey=c_J2EE -else - userkey=c -fi - -echo "#!/bin/sh -LOG=\$HOME/stopdb.log -date > \$LOG -echo \"Stop database with xuserkey >$userkey<\" >> \$LOG -dbmcli -U ${userkey} db_offline >> \$LOG 2>&1 -exit \$?" > $TEMPFILE - -chmod 700 $TEMPFILE -chown $sidadm $TEMPFILE - -su - $sidadm -c $TEMPFILE -retcode=$? -rm -f $TEMPFILE - -if [ $retcode -eq 0 ]; then - sapdatabase_status - if [ $? -ne $OCF_NOT_RUNNING ]; then - retcode=1 - fi -fi - -return $retcode -} - -# -# db6udb_stop: Stop the DB2/UDB database without any condition -# -db6udb_stop() { -echo '#!/bin/sh -LOG=$HOME/stopdb.log -date > $LOG -echo "Shut down the database" >> $LOG -$INSTHOME/sqllib/bin/db2 deactivate database $DB2DBDFT |tee -a $LOG 2>&1 -$INSTHOME/sqllib/adm/db2stop force |tee -a $LOG 2>&1 -exit $?' > $TEMPFILE - -chmod 700 $TEMPFILE -chown $sidadm $TEMPFILE - -su - $sidadm -c $TEMPFILE -retcode=$? -rm -f $TEMPFILE - -if [ $retcode -eq 0 ]; then - sapdatabase_status - if [ $? -ne $OCF_NOT_RUNNING ]; then - retcode=1 - fi -fi - -return $retcode -} - -# -# oracle_recover: try to clean up oracle after a crash -# -oracle_recover() { -echo '#!/bin/sh -LOG=$HOME/recover.log -date > $LOG -echo "Logfile written by heartbeat SAPDatabase resource agent" >> $LOG - -if [ -x "${ORACLE_HOME}/bin/sqlplus" ] -then - SRVMGRDBA_EXE="${ORACLE_HOME}/bin/sqlplus" -else - echo "Can not find executable sqlplus" >> $LOG - exit 1 -fi - -$SRVMGRDBA_EXE /NOLOG >> $LOG << ! -connect / as sysdba -shutdown abort -startup mount -alter database end backup; -alter database open; -exit -! -rc=$? -cat $LOG -exit $rc' > $TEMPFILE - - chmod 700 $TEMPFILE - chown $sidadm $TEMPFILE - - su - $sidadm -c $TEMPFILE - retcode=$? - rm -f $TEMPFILE - - return $retcode -} - -# -# maxdb_recover: try to clean up MaxDB after a crash -# -maxdb_recover() { - # x_Server must be running to stop database - x_server_status - if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi - - if [ $DBJ2EE_ONLY -eq 1 ]; then - userkey=c_J2EE - else - userkey=c - fi - -echo "#!/bin/sh -LOG=\$HOME/recover.log -date > \$LOG -echo \"Logfile written by heartbeat SAPDatabase resource agent\" >> \$LOG -echo \"Cleanup database with xuserkey >$userkey<\" >> \$LOG -echo \"db_stop\" >> \$LOG 2>&1 -dbmcli -U ${userkey} db_stop >> \$LOG 2>&1 -echo \"db_clear\" >> \$LOG 2>&1 -dbmcli -U ${userkey} db_clear >> \$LOG 2>&1 -echo \"db_online\" >> \$LOG 2>&1 -dbmcli -U ${userkey} db_online >> \$LOG 2>&1 -rc=\$? -cat \$LOG -exit \$rc" > $TEMPFILE - - chmod 700 $TEMPFILE - chown $sidadm $TEMPFILE - - su - $sidadm -c $TEMPFILE - retcode=$? - rm -f $TEMPFILE - - return $retcode -} - -# -# db6udb_recover: try to recover DB/2 after a crash -# -db6udb_recover() { - db2sid="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" - -echo '#!/bin/sh -LOG=$HOME/recover.log -date > $LOG -echo "Logfile written by heartbeat SAPDatabase resource agent" >> $LOG -$INSTHOME/sqllib/bin/db2_kill >> $LOG 2>&1 -$INSTHOME/sqllib/adm/db2start >> $LOG 2>&1 -$INSTHOME/sqllib/bin/db2 activate database $DB2DBDFT >> $LOG 2>&1 -rc=$? -cat $LOG -exit $rc' > $TEMPFILE - - chmod 700 $TEMPFILE - chown $db2sid $TEMPFILE - - su - $db2sid -c $TEMPFILE - retcode=$? - rm -f $TEMPFILE - - return $retcode -} # # methods: What methods/operations do we support? # sapdatabase_methods() { cat <<-! start stop status monitor - recover + recover validate-all methods meta-data usage ! } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { NAME="$1" VALUE="$2" if [ -n "$VALUE" ] then - if [ -x "$VALUE" ] + if have_binary "$VALUE" then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" "$VALUE" >/dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi - return 0 + return $OCF_SUCCESS } # -# sapdatabase_start : Start the SAP database +# saphostctrl_installed # -sapdatabase_start() { - sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" +saphostctrl_installed() { + OCF_RESKEY_DIR_EXECUTABLE_default="/usr/sap/hostctrl/exe" + : ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} + SAPHOSTCTRL="${OCF_RESKEY_DIR_EXECUTABLE}/saphostctrl" + SAPHOSTEXEC="${OCF_RESKEY_DIR_EXECUTABLE}/saphostexec" + SAPHOSTSRV="${OCF_RESKEY_DIR_EXECUTABLE}/sapstartsrv" + SAPHOSTOSCOL="${OCF_RESKEY_DIR_EXECUTABLE}/saposcol" - case $DBTYPE in - ADA) x_server_start - ;; - ORA) listener_start - ;; - esac - - output=`su - $sidadm -c $SAPSTARTDB` - rc=$? - - if [ $DBJ2EE_ONLY -eq 1 ] - then - sapdatabase_monitor 1 - rc=$? - fi - - if [ $rc -ne 0 -a $OCF_RESKEY_AUTOMATIC_RECOVER -eq 1 ] - then - ocf_log warn "SAP database $SID start failed: $output" - ocf_log warn "Try to recover database $SID" - - output='' - sapdatabase_recover - rc=$? - fi - - if [ $rc -eq 0 ] - then - ocf_log info "SAP database $SID started: $output" - rc=$OCF_SUCCESS - sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" - else - ocf_log err "SAP database $SID start failed: $output" - rc=$OCF_ERR_GENERIC - fi - - return $rc -} - -# -# sapdatabase_stop: Stop the SAP database -# -sapdatabase_stop() { - - sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" - - # use of the stopdb kernel script is not possible, because there are to may checks in that - # script. We want to stop the database regardless of anything. - #output=`su - $sidadm -c $SAPSTOPDB` - - case $DBTYPE in - ORA) output=`oracle_stop` - ;; - ADA) output=`maxdb_stop` - ;; - DB6) output=`db6udb_stop` - ;; - esac - - if [ $? -eq 0 ] - then - ocf_log info "SAP database $SID stopped: $output" - rc=$OCF_SUCCESS - else - ocf_log err "SAP database $SID stop failed: $output" - rc=$OCF_ERR_GENERIC - fi - - case $DBTYPE in - ORA) listener_stop - ;; - ADA) x_server_stop - ;; - esac - - sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" - - return $rc -} - - -# -# sapdatabase_monitor: Can the given database instance do anything useful? -# -sapdatabase_monitor() { - strict=$1 - - sapdatabase_status - rc=$? - if [ $rc -ne $OCF_SUCCESS ]; then - return $rc - fi - - case $DBTYPE in - ADA) x_server_status - if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi - ;; - ORA) listener_status - if [ $? -ne $OCF_SUCCESS ]; then listener_start; fi - ;; - esac - - if [ $strict -eq 0 ] - then - return $rc - else - if [ $DBJ2EE_ONLY -eq 0 ] - then - output=`echo "$SAPDBCONNECT -d -w /dev/null" | su $sidadm 2>&1` - if [ $? -le 4 ] - then - rc=$OCF_SUCCESS - else - rc=$OCF_NOT_RUNNING - fi - else - MYCP="" - EXECMD="" - - # WebAS Java 6.40+7.00 - IAIK_JCE="$SECSTORE"/iaik_jce.jar - IAIK_JCE_EXPORT="$SECSTORE"/iaik_jce_export.jar - EXCEPTION="$BOOTSTRAP"/exception.jar - LOGGING="$BOOTSTRAP"/logging.jar - OPENSQLSTA="$BOOTSTRAP"/opensqlsta.jar - TC_SEC_SECSTOREFS="$BOOTSTRAP"/tc_sec_secstorefs.jar - JDDI="$BOOTSTRAP"/../server0/bin/ext/jdbdictionary/jddi.jar - ANTLR="$BOOTSTRAP"/../server0/bin/ext/antlr/antlr.jar - FRAME="$BOOTSTRAP"/../server0/bin/system/frame.jar - - # only start jdbcconnect when all jars available - if [ -f "$EXCEPTION" -a -f "$LOGGING" -a -f "$OPENSQLSTA" -a -f "$TC_SEC_SECSTOREFS" -a -f "$JDDI" -a -f "$ANTLR" -a -f "$FRAME" -a -f "$SAPDBCONNECT" ] - then - MYCP=".:$FRAME:$ANTLR:$JDDI:$IAIK_JCE_EXPORT:$IAIK_JCE:$EXCEPTION:$LOGGING:$OPENSQLSTA:$TC_SEC_SECSTOREFS:$DB_JARS:$SAPDBCONNECT" - EXECMD="com.sap.inst.jdbc.connect.JdbcCon -sec $SID:$SID" - else - # WebAS Java 7.10 - LAUNCHER=${BOOTSTRAP}/sap.com~tc~bl~offline_launcher~impl.jar - - if [ -f "$DB_JARS" -a -f "$SAPDBCONNECT" -a -f "$LAUNCHER" ] - then - MYCP="$LAUNCHER" - EXECMD="com.sap.engine.offline.OfflineToolStart com.sap.inst.jdbc.connect.JdbcCon ${SAPDBCONNECT}:${SECSTORE}:${DB_JARS}:${BOOTSTRAP} -sec $SID:$SID" - fi - fi - - if [ -n "$EXECMD" ] - then - output=`${JAVA_HOME}/bin/java -cp $MYCP $EXECMD 2> /dev/null` - if [ $? -le 0 ] - then - rc=$OCF_SUCCESS - else - rc=$OCF_NOT_RUNNING - fi - else - output="Cannot find all jar files needed for database monitoring." - rc=$OCF_ERR_GENERIC - fi - fi - fi - - if [ $rc -ne $OCF_SUCCESS ] - then - ocf_log err "The SAP database $SID ist not running: $output" - fi - return $rc -} - - -# -# sapdatabase_status: Are there any database processes on this host ? -# -sapdatabase_status() { - case $DBTYPE in - ADA) SEARCH="$SID/db/pgm/kernel" - SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` - SNUM=2 - ;; - ORA) SEARCH="ora_[a-z][a-z][a-z][a-z]_" - SUSER="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" - SNUM=4 - ;; - DB6) SEARCH="db2[a-z][a-z][a-z]" - SUSER="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" - SNUM=2 - ;; - esac - - # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here - # as the output might be to long. - cnt=`ps efo args --user $SUSER 2> /dev/null | grep -c "$SEARCH"` - if [ $cnt -ge $SNUM ] - then - rc=$OCF_SUCCESS - else - # ocf_log info "Database Instance $SID is not running on `hostname`" - rc=$OCF_NOT_RUNNING - fi - return $rc -} - - -# -# sapdatabase_recover: -# -sapdatabase_recover() { - - case $DBTYPE in - ORA) recoutput=`oracle_recover` - ;; - ADA) recoutput=`maxdb_recover` - ;; - DB6) recoutput=`db6udb_recover` - ;; - esac - - sapdatabase_monitor 1 - retcode=$? - - if [ $retcode -eq $OCF_SUCCESS ] - then - ocf_log info "Recover of SAP database $SID was successful: $recoutput" - else - ocf_log err "Recover of SAP database $SID failed: $recoutput" - fi - - return $retcode -} - - -# -# sapdatabase_validate: Check the symantic of the input parameters -# -sapdatabase_validate() { - rc=$OCF_SUCCESS - if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] - then - ocf_log err "Parsing parameter SID: '$SID' is not a valid system ID!" - rc=$OCF_ERR_ARGS - fi - - case "$DBTYPE" in - ORA|ADA|DB6) ;; - *) ocf_log err "Parsing parameter DBTYPE: '$DBTYPE' is not a supported database type!" - rc=$OCF_ERR_ARGS ;; - esac - - return $rc + have_binary $SAPHOSTCTRL && have_binary $SAPHOSTEXEC } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi -# Set a tempfile and make sure to clean it up again -TEMPFILE="/tmp/SAPDatabase.$$.tmp" -trap trap_handler INT TERM - # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) sapdatabase_methods exit $?;; *);; esac -US=`id -u -n` -US=`echo $US` -if - [ $US != root ] +if ! ocf_is_root then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # mandatory parameter check if [ -z "$OCF_RESKEY_SID" ]; then ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!" exit $OCF_ERR_ARGS fi SID=`echo "$OCF_RESKEY_SID"` if [ -z "$OCF_RESKEY_DBTYPE" ]; then - ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ORA,ADA,DB6)!" + ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ADA,DB6,ORA,SYB,HDB)!" exit $OCF_ERR_ARGS fi -DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr "[a-z]" "[A-Z]"` - -# optional OCF parameters, we try to guess which directories are correct -EXESTARTDB="startdb" -EXESTOPDB="stopdb" -EXEDBCONNECT="R3trans" -if [ -z "$OCF_RESKEY_DBJ2EE_ONLY" ]; then - DBJ2EE_ONLY=0 -else - case "$OCF_RESKEY_DBJ2EE_ONLY" in - 1|true|TRUE|yes|YES) DBJ2EE_ONLY=1 - EXESTARTDB="startj2eedb" - EXESTOPDB="stopj2eedb" - EXEDBCONNECT="jdbcconnect.jar" - ;; - 0|false|FALSE|no|NO) DBJ2EE_ONLY=0;; - *) ocf_log err "Parsing parameter DBJ2EE_ONLY: '$DBJ2EE_ONLY' is not a boolean value!" - exit $OCF_ERR_ARGS ;; - esac -fi +DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[:lower:]' '[:upper:]'` -if [ -z "$OCF_RESKEY_NETSERVICENAME" ]; then - case "$DBTYPE" in - ORA|ora) NETSERVICENAME="LISTENER";; - *) NETSERVICENAME="";; - esac -else - NETSERVICENAME="$OCF_RESKEY_NETSERVICENAME" -fi -if [ -z "$OCF_RESKEY_STRICT_MONITORING" ]; then - OCF_RESKEY_STRICT_MONITORING=0 +# source functions and initialize global variables +if saphostctrl_installed; then + . ${OCF_FUNCTIONS_DIR}/sapdb.sh else - case "$OCF_RESKEY_STRICT_MONITORING" in - 1|true|TRUE|yes|YES) OCF_RESKEY_STRICT_MONITORING=1;; - 0|false|FALSE|no|NO) OCF_RESKEY_STRICT_MONITORING=0;; - *) ocf_log err "Parsing parameter STRICT_MONITORING: '$OCF_RESKEY_STRICT_MONITORING' is not a boolean value!" - exit $OCF_ERR_ARGS ;; - esac + . ${OCF_FUNCTIONS_DIR}/sapdb-nosha.sh fi +sapdatabase_init -PATHLIST=" -$OCF_RESKEY_DIR_EXECUTABLE -/usr/sap/$SID/*/exe -/usr/sap/$SID/SYS/exe/run -/sapmnt/$SID/exe -" -DIR_EXECUTABLE="" -for EXEPATH in $PATHLIST -do - if [ -x $EXEPATH/$EXESTARTDB -a -x $EXEPATH/$EXESTOPDB -a -x $EXEPATH/$EXEDBCONNECT ] - then - DIR_EXECUTABLE=$EXEPATH - SAPSTARTDB=$EXEPATH/$EXESTARTDB - SAPSTOPDB=$EXEPATH/$EXESTOPDB - SAPDBCONNECT=$EXEPATH/$EXEDBCONNECT - break - fi -done -if [ -z "$DIR_EXECUTABLE" ] -then - ocf_log warn "Cannot find $EXESTARTDB,$EXESTOPDB and $EXEDBCONNECT executable, please set DIR_EXECUTABLE parameter!" - exit $OCF_NOT_RUNNING -fi -if [ $DBJ2EE_ONLY -eq 1 ] +# we always want to fall to the faster status method in case of a probe by the cluster +ACTION=$1 +if ocf_is_probe then - if [ -n "$OCF_RESKEY_DIR_BOOTSTRAP" ] - then - BOOTSTRAP="$OCF_RESKEY_DIR_BOOTSTRAP" - else - BOOTSTRAP=`ls -1d /usr/sap/$SID/*/j2ee/cluster/bootstrap | head -1` - fi - - if [ -n "$OCF_RESKEY_DIR_SECSTORE" ] - then - SECSTORE="$OCF_RESKEY_DIR_SECSTORE" - else - SECSTORE=/usr/sap/$SID/SYS/global/security/lib/tools - fi - - if [ -n "$OCF_RESKEY_JAVA_HOME" ] - then - JAVA_HOME="$OCF_RESKEY_JAVA_HOME" - PATH=$JAVA_HOME/bin:$PATH - else - if [ -n "$JAVA_HOME" ] - then - PATH=$JAVA_HOME/bin:$PATH - else - ocf_log err "Cannot find JAVA_HOME directory, please set JAVA_HOME parameter!" - exit $OCF_NOT_RUNNING - fi - fi - - if [ -n "$OCF_RESKEY_DB_JARS" ] - then - DB_JARS=$OCF_RESKEY_DB_JARS - else - if [ -f "$BOOTSTRAP"/bootstrap.properties ]; then - DB_JARS=`cat $BOOTSTRAP/bootstrap.properties | grep -i rdbms.driverLocation | sed -e 's/\\\:/:/g' | awk -F= '{print $2}'` - fi - fi -fi - -if [ -z "$OCF_RESKEY_AUTOMATIC_RECOVER" ] -then - OCF_RESKEY_AUTOMATIC_RECOVER=0 -else - case "$OCF_RESKEY_AUTOMATIC_RECOVER" in - 1|true|TRUE|yes|YES) OCF_RESKEY_AUTOMATIC_RECOVER=1;; - 0|false|FALSE|no|NO) OCF_RESKEY_AUTOMATIC_RECOVER=0;; - esac -fi - -# as root user we need the library path to the SAP kernel to be able to call executables -if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then - LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH - export LD_LIBRARY_PATH + ACTION=status fi -sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" # What kind of method was invoked? -case "$1" in - - start) sapdatabase_start - exit $?;; - - stop) sapdatabase_stop - exit $?;; - - monitor) - sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING - exit $?;; - - status) - sapdatabase_status - exit $?;; - - recover) sapdatabase_recover - exit $?;; - - validate-all) sapdatabase_validate - exit $?;; - - *) sapdatabase_methods - exit $OCF_ERR_UNIMPLEMENTED;; +case "$ACTION" in + + start|stop|status|recover) sapdatabase_$ACTION + exit $?;; + monitor) sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING + exit $?;; + validate-all) sapdatabase_validate + exit $?;; + *) sapdatabase_methods + exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/Xen b/heartbeat/Xen index 956d247fc..17cb27b67 100755 --- a/heartbeat/Xen +++ b/heartbeat/Xen @@ -1,485 +1,499 @@ #!/bin/sh # # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Resource Agent for the Xen Hypervisor. # Manages Xen virtual machine instances by # mapping cluster resource start and stop, # to Xen create and shutdown, respectively. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_xmfile # Absolute path to the Xen control file, # for this virtual machine. # OCF_RESKEY_allow_mem_management # Change memory usage on start/stop/migration # of virtual machine # OCF_RESKEY_reserved_Dom0_memory # minimum memory reserved for domain 0 # OCF_RESKEY_monitor_scripts # scripts to monitor services within the # virtual domain ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-! usage: $0 {start|stop|status|monitor|meta-data|validate-all} ! } : ${OCF_RESKEY_xmfile=/etc/xen/vm/MyDomU} +: ${OCF_RESKEY_shutdown_acpi=0} : ${OCF_RESKEY_allow_mem_management=0} : ${OCF_RESKEY_reserved_Dom0_memory=512} meta_data() { cat < 1.0 Resource Agent for the Xen Hypervisor. Manages Xen virtual machine instances by mapping cluster resource start and stop, to Xen create and shutdown, respectively. A note on names We will try to extract the name from the config file (the xmfile attribute). If you use a simple assignment statement, then you should be fine. Otherwise, if there's some python acrobacy involved such as dynamically assigning names depending on other variables, and we will try to detect this, then please set the name attribute. You should also do that if there is any chance of a pathological situation where a config file might be missing, for example if it resides on a shared storage. If all fails, we finally fall back to the instance id to preserve backward compatibility. Para-virtualized guests can also be migrated by enabling the meta_attribute allow-migrate. Manages Xen unprivileged domains (DomUs) Absolute path to the Xen control file, for this virtual machine. Xen control file Name of the virtual machine. Xen DomU name The Xen agent will first try an orderly shutdown using xm shutdown. Should this not succeed within this timeout, the agent will escalate to xm destroy, forcibly killing the node. If this is not set, it will default to two-third of the stop action timeout. Setting this value to 0 forces an immediate destroy. Shutdown escalation timeout + + +Handle shutdown by simulating an ACPI power button event. +Enable this to allow graceful shutdown for HVM domains +without installed PV drivers. + +Simulate power button event on shutdown + + This parameter enables dynamic adjustment of memory for start and stop actions used for Dom0 and the DomUs. The default is to not adjust memory dynamically. Use dynamic memory management In case of a live migration, the system will default to using the IP address associated with the hostname via DNS or /etc/hosts. This parameter allows you to specify a node attribute that will be queried instead for the target node, overriding the IP address. This allows you to use a dedicated network for live migration traffic to a specific node. Warning: make very sure the IP address does point to the right node. Or else the live migration will end up somewhere else, greatly confusing the cluster and causing havoc. Node attribute containing target IP address In case memory management is used, this parameter defines the minimum amount of memory to be reserved for the dom0. The default minimum memory is 512MB. Minimum Dom0 memory To additionally monitor services within the unprivileged domain, add this parameter with a list of scripts to monitor. list of space separated monitor scripts END } Xen_Status() { if have_binary xen-list; then xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi STATUS=`xm list --long $1 2>/dev/null | grep status 2>/dev/null` if [ "X${STATUS}" != "X" ]; then # we have Xen 3.0.4 or higher STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'` if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi else # we have Xen 3.0.3 or lower STATUS=`xm list --long $1 2>/dev/null | grep state 2>/dev/null` echo "${STATUS}" | grep -qs "[-r][-b][-p]---" if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi } Xen_Adjust_Memory() { if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then CNTNEW=$1 RUNNING=`Xen_List_running` RUNCNT=`Xen_Count_running` MAXMEM=`Xen_Total_Memory` if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then RUNCNT=1 fi #NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc` NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) )) # do not rely on ballooning add dom0_mem=512 instead to force memory for dom0 #xm mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory} for DOM in ${RUNNING}; do xm mem-set ${DOM} ${NEWMEM} done ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING" fi } Xen_List_all() { xm list | grep -v -e "Name" -e "Domain-0" | awk '{print $1}' } Xen_List_running() { ALL_DOMS=`Xen_List_all` for DOM in ${ALL_DOMS}; do if Xen_Status $DOM; then echo "${DOM} " fi done } Xen_Count_running() { Xen_List_running | wc -w } Xen_Monitor() { Xen_Status ${DOMAIN_NAME} if [ $? -eq ${OCF_NOT_RUNNING} ]; then return ${OCF_NOT_RUNNING} fi if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then return ${OCF_SUCCESS} fi for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do $SCRIPT if [ $? -ne 0 ]; then return ${OCF_ERR_GENERIC} fi done return ${OCF_SUCCESS} } Xen_Total_Memory() { xm info | grep "^total_memory" | awk '{print $3}' } Xen_Start() { if Xen_Status ${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi if [ ! -f "${OCF_RESKEY_xmfile}" ]; then ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist." return $OCF_ERR_INSTALLED fi if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then Xen_Adjust_Memory 1 ocf_log info "New memory for virtual domains: ${NEWMEM}" sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile} xm mem-set ${DOMAIN_NAME} ${NEWMEM} fi xm create ${OCF_RESKEY_xmfile} name=$DOMAIN_NAME rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC else if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then xm mem-set ${DOMAIN_NAME} ${NEWMEM} fi fi while sleep 1; do Xen_Monitor && return $OCF_SUCCESS done } xen_domain_stop() { local dom=$1 local timeout if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then timeout=$OCF_RESKEY_shutdown_timeout elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=60 fi if [ "$timeout" -gt 0 ]; then ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)" - xm shutdown $dom + if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then + xm trigger $dom power + else + xm shutdown $dom + fi while Xen_Status $dom && [ "$timeout" -gt 0 ]; do ocf_log debug "$dom still not stopped. Waiting..." timeout=$((timeout-1)) sleep 1 done fi if [ "$timeout" -eq 0 ]; then while Xen_Status $dom; do ocf_log warn "Xen domain $dom will be destroyed!" $xenkill $dom sleep 1 done # Note: This does not give up. stop isn't allowed to to fail. # If xm destroy fails, stop will eventually timeout. # This is the correct behaviour. fi ocf_log info "Xen domain $dom stopped." } Xen_Stop() { local vm if Xen_Status ${DOMAIN_NAME}; then vm=${DOMAIN_NAME} elif Xen_Status migrating-${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME is migrating" vm="migrating-${DOMAIN_NAME}" else ocf_log info "Xen domain $DOMAIN_NAME already stopped." fi if [ "$vm" ]; then xen_domain_stop $vm else # It is supposed to be gone, but there have been situations where xm # list / xen-list showed it as stopped but it was still instantiated. # Nuke it once more to make sure: $xenkill ${DOMAIN_NAME} fi Xen_Adjust_Memory 0 return $OCF_SUCCESS } Xen_Migrate_To() { target_node="$OCF_RESKEY_CRM_meta_migrate_target" target_attr="$OCF_RESKEY_CRM_node_ip_attribute" target_addr="$target_node" if Xen_Status ${DOMAIN_NAME}; then ocf_log info "$DOMAIN_NAME: Starting xm migrate to $target_node" if [ -n "$target_attr" ]; then nodevalue=`crm_attribute --type nodes --node-uname $target_node --attr-name $target_attr --get-value -q` if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then target_addr="$nodevalue" ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr" fi fi xm migrate --live $DOMAIN_NAME $target_addr rc=$? if [ $rc -ne 0 ]; then ocf_log err "$DOMAIN_NAME: xm migrate to $target_node failed: $rc" return $OCF_ERR_GENERIC else Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: xm migrate to $target_node succeeded." return $OCF_SUCCESS fi else ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } Xen_Migrate_From() { if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for status to stabilize # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=10 # should be plenty fi while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)" timeout=$((timeout-1)) sleep 1 done if Xen_Status ${DOMAIN_NAME}; then Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: Active locally, migration successful" return $OCF_SUCCESS else ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!" return $OCF_ERR_GENERIC fi } Xen_Validate_All() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # the name business: # # 1. use the name attribute, or # 2. find the name in the config file (if it exists) and use that # unless it contains funny characters such as '%' or space, or # 3. use the OCF_RESOURCE_INSTANCE if [ x"${OCF_RESKEY_name}" != x ]; then DOMAIN_NAME="${OCF_RESKEY_name}" else if [ -f "${OCF_RESKEY_xmfile}" ]; then DOMAIN_NAME=`awk '$1~/^name(=|$)/{print}' ${OCF_RESKEY_xmfile} | sed 's/.*=[[:space:]]*//' | tr -d "[\"']"` if echo "$DOMAIN_NAME" | grep -qs '[%[:space:]]'; then DOMAIN_NAME="" fi fi DOMAIN_NAME=${DOMAIN_NAME:-${OCF_RESOURCE_INSTANCE}} fi for binary in xm sed awk; do check_binary $binary done if have_binary xen-destroy ; then xenkill="xen-destroy" else xenkill="xm destroy" fi if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then ocf_is_decimal "$OCF_RESKEY_shutdown_timeout" || { ocf_log err "shutdown_timeout must be a number" exit $OCF_ERR_CONFIGURED } fi case $1 in start) Xen_Start ;; stop) Xen_Stop ;; migrate_to) Xen_Migrate_To ;; migrate_from) Xen_Migrate_From ;; monitor) Xen_Monitor ;; status) Xen_Status ${DOMAIN_NAME} ;; validate-all) Xen_Validate_All ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Xinetd b/heartbeat/Xinetd index d429b0d95..957a37931 100755 --- a/heartbeat/Xinetd +++ b/heartbeat/Xinetd @@ -1,498 +1,226 @@ #!/bin/sh # # Startup/shutdown script for services managed by xinetd. # # Copyright (C) 2003 Charlie Brooks +# Copyright (C) 2011 Ulrich Windl # -# WARNING: Tested ONLY on Red Hat 7.3 and Fedora Core 2/4 at this time. +# WARNING: Tested ONLY on SLES11 SP1 at this time. # # Author: Charlie Brooks # Description: given parameters of a service name and start|stop|status, # will enable, disable or report on a specified xinetd service # Config: all services must have a descriptor file in /etc/xinetd.d # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # OCF parameters are as below: # OCF_RESKEY_service ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +service=$OCF_RESKEY_service +SVCDEF=/etc/xinetd.d/$service + ####################################################################### meta_data() { cat < 1.0 Resource script for Xinetd. It starts/stops services managed -by xinetd. +by xinetd by enabling or disabling them in the configuration file. + +The xinetd daemon itself must be running: we are not going to start or +stop it ourselves. -Note that the xinetd daemon itself must be running: we are not -going to start it or stop it ourselves. +All services should have a line saying either "disable=yes" or "disable=no". +The script just changes those settings before reloading xinetd. Important: in case the services managed by the cluster are the only ones enabled, you should specify the -stayalive option for xinetd or it will exit on Heartbeat stop. Alternatively, you may enable some internal service such as echo. -Manages an Xinetd service +Manages a service of Xinetd -The service name managed by xinetd. +The name of the service managed by xinetd. service name END } -# don't rely on the pid file, but lookup xinetd in the list of -# processes - +# force xinetd to reload the service descriptions hup_inetd () { - pid=`ps -e -o pid,command | grep '/[x]inetd' | awk '{print $1}'` + # don't rely on the pid file, but lookup xinetd in the list of + # processes + pid=`ps -e -o pid,command | $AWK '$2 ~ /\/[x]inetd/ { print $1 }'` if [ "$pid" ]; then - if kill -s HUP $pid; then - : - else - ocf_log err "Could not SigHUP xinetd superdaemon!" - ocf_log err "perhaps we are booting after a system crash" - exit $OCF_ERR_GENERIC - fi + if kill -s HUP $pid; then + ocf_log info "asked xinetd to reload by sending SIGHUP to process $pid!" + else + ocf_log err "could not send SIGHUP to process $pid!" + exit $OCF_ERR_GENERIC + fi else - ocf_log err "xinetd superdaemon process not found!" - ocf_log err "perhaps we are currently booting the system." - exit $OCF_ERR_GENERIC + ocf_log err "xinetd process not found!" + exit $OCF_ERR_GENERIC fi } -xup_start () { - if [ "running" = "`xup_status`" ]; then - ocf_log info "Service $service already started" - exit $OCF_SUCCESS - fi - - ocf_log "info" "$0: enabling in $RCFILE" - if $AWK '!/disable/' $RCFILE > $RCFILE.xup - then - if mv $RCFILE.xup $RCFILE - then - ocf_log "info" "$0: Starting" - hup_inetd - touch $PIDFILE - else - ocf_log "err" "Could not replace $RCFILE" - fi - else - ocf_log "err" "Could not rewrite $RCFILE!" - fi +# check "disable = X", printing X +check_service() +{ + ocf_log "info" "checking \"disable\" in $1" + typeset result=$(sed -nre 's/^[ ]*disable[ ]*=[ ]*([^ ]+)[# ]*/\1/p' $1) + echo "$result" } -xup_stop () { - if [ "stopped" = "`xup_status`" ]; then - ocf_log info "Service $service already stopped" - exit $OCF_SUCCESS - fi - - ocf_log "info" "$0: disabling in $RCFILE" - if $AWK '!/disable/;/{/{printf "\tdisable\t\t\t= yes\n"}' $RCFILE >$RCFILE.xup +# change "disable = X" to desired value +change_service() +{ + ocf_log "info" "setting \"disable = $1\" in $2" + if ! sed -i -re 's/^([ ]*disable[ ]*=[ ]*)([^ ]+)([# ]*)/\1'"$1"'\3/' $2 then - if mv $RCFILE.xup $RCFILE - then - ocf_log "info" "$0: Shutting down" - hup_inetd - rm -f $PIDFILE - else - ocf_log "err" "Could not replace $RCFILE" - fi - else - ocf_log "err" "Could not rewrite $RCFILE!" - fi -} - -xup_usage () { - echo "Usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data}" - return 0 + ocf_log "err" "could not edit $2" + return 1 + fi + return 0 } xup_status () { - if [ -f $PIDFILE ]; then - echo running - return $OCF_SUCCESS - else - echo stopped - return $OCF_NOT_RUNNING - fi + typeset disabled="$(check_service $SVCDEF)" + if [ "${disabled:=no}" = no ]; then + echo running + return $OCF_SUCCESS + elif [ "$disabled" = yes ]; then + echo stopped + return $OCF_NOT_RUNNING + else + echo unknown + return $OCF_ERR_CONFIGURED + fi } -# -# Check if the arg is a valid integer -# -CheckInteger() { -# Examples of valid integer: "1080", "1", "0080", "0", "0000" -# Examples of invalid integer: "1080bad", "" - case "$1" in - "") #empty string - false;; - *[!0-9]*) #got invalid char - false;; - *) #no invalid char, and has at least one digit, so is a good integer - true;; - esac +xup_start () { + if [ "running" = "`xup_status`" ]; then + ocf_log info "service $service already started" + exit $OCF_SUCCESS + fi + ocf_log "info" "enabling in $SVCDEF" + if change_service "no" $SVCDEF; then + hup_inetd + fi } -# -# Check if the arg is a valid umask -# -CheckUmask() { -# Examples of valid umask: "100", "1", "000", "0" -# Examples of invalid umask: "108", "bad", "1111" "" - case "$1" in - [0-7]) - true;; - [0-7][0-7]) - true;; - [0-7][0-7][0-7]) - true;; - *) - false;; - esac +xup_stop () { + if [ "stopped" = "`xup_status`" ]; then + ocf_log info "service $service already stopped" + exit $OCF_SUCCESS + fi + ocf_log "info" "disabling in $SVCDEF" + if change_service "yes" $SVCDEF; then + hup_inetd + fi } -# Check (part of) the attributes based on xinetd.conf(5) and xinetd source code -# confparse.c -# parsers.c -# attr.h -check_attributes () { - # Empty these attributes before validating them - unset socket_type wait server user protocol port group instances type \ - flags disable nice umask - VAR="" - for SECTION in $*; do - case $SECTION in - *=*) # Check to see if we need to export the previous name=value - # pair. - if [ -n "$VAR" ]; then - export "$VAR" - fi - # Save the "new" name=value pair in VAR - VAR=$SECTION - ;; - *) # This section does not have an equal sign, therefore it is part of - # the *previous* name=value pair, thus we will append it to the - # previous name=value assignment. - VAR="$VAR $SECTION" - ;; - esac - # A final cleanup step. - # Do we need to export VAR ? - if [ -n "$VAR" ]; then - export "$VAR" - fi - done - if [ $? -ne 0 ]; then - return $OCF_ERR_GENERIC - fi - - case $disable in - "") disable=no # Default to no. - ;; - yes|no) ;; - *) ocf_log err "Invalid value for disable [$disable] in $RCFILE, yes|no please" - exit $OCF_ERR_CONFIGURED - ;; - esac - - case $socket_type in - "") socket_type=dgram - # Default value for socket_type is dgram. - ;; - stream|dgram|raw|seqpacket) - ;; - *) ocf_log err "Invalid socket type $socket_type in $RCFILE" - exit $OCF_ERR_CONFIGURED - ;; - esac - - case $wait in # Wait has no default value. - yes|no) ;; - *) ocf_log err "Invalid waid [$wait] in $RCFILE, yes|no please" - exit $OCF_ERR_CONFIGURED - ;; - esac - - case $type in - # Default value for type is RPC or INTERNAL, determined at compile time. - # It may be a list in $RCFILE, however we only capture the first one. - ""|RPC|INTERNAL|UNLISTED|SPECIAL|TCPMUX|TCPMUXPLUS) - ;; - *) ocf_log err "Invalid service type [$type] in $RCFILE" - exit $OCF_ERR_CONFIGURED - ;; - esac - - if [ ! -z "$type" -a "INTERNAL" != "$type" ]; then - # Type is explicitly set to EXTERNAL, hence server and user are necessary. - case $server in - "") ocf_log err "Please specify server in $RCFILE" - exit $OCF_ERR_CONFIGURED - ;; - /*) if [ -x $server ]; then - : OK - else - ocf_log err "Server $server is not executable" - exit $OCF_ERR_CONFIGURED - fi - ;; - *) ocf_log err "Server $server is not of obsolute path" - exit $OCF_ERR_CONFIGURED - ;; - esac - - case $user in - "") ocf_log err "Please specify user in $RCFILE" - exit $OCF_ERR_CONFIGURED - ;; - *) getent passwd $user >/dev/null 2>&1 - if [ $? -ne 0 ]; then - ocf_log err "User $user does not exist!" - exit $OCF_ERR_CONFIGURED - fi - ;; - esac - - # Protocol is necessary when type is MUX - if [ $type = "TCPMUX" -o $type ="TCPMUXPLUS" ]; then - case $protocol in - "") ocf_log err "Please specify protocol in $RCFILE" - exit $OCF_ERR_CONFIGURED - ;; - *) get protocols $protocol >/dev/null 2>&1 - if [ $? -ne 0 ]; then - ocf_log err "Invalid protocol [$protocol] in $RCFILE" - exit $OCF_ERR_CONFIGURED - fi - ;; - esac - fi - fi - - case $group in - "") ;; # OK to be empty, the group for $user is used - *) getent group $group >/dev/null 2>&1 - if [ $? -ne 0 ]; then - ocf_log err "Group $group does not exist!" - exit $OCF_ERR_CONFIGURED - fi - ;; - esac - - case $flags in - # Default value for flags is REUSE. - # It may be a list in $RCFILE, however we only capture the first one. - "") flags=RESUME - ;; - REUSE|INTERCEPT|NORETRY|IDONLY|NAMEINARGS|NODELAY|KEEPALIVE|NOLIBWRAP|SENSOR|IPv4|IPv6) ;; - *) ocf_log err "Invalid flags [$flags] in $RCFILE" - exit $OCF_ERR_CONFIGURED - ;; - esac - - case $instances in - ""|[Uu][Nn][Ll][Ii][Mm][Ii][Ti][Ee][Dd]) ;; - *) if CheckInteger $instances; then - : OK - else - ocf_log err "Invalid instances [$instances] in $RCFILE, non-negative integer expected" - exit $OCF_ERR_CONFIGURED - fi - ;; - esac - - case $nice in - "") ;; - *) local foo=`echo $nice | sed s/^-//` - if CheckInteger $foo; then - : OK - else - ocf_log err "Invalid nice [$nice] in $RCFILE, integer expected" - exit $OCF_ERR_CONFIGURED - fi - ;; - esac - - if [ ! -z $umask ]; then - if CheckUmask $umask; then - : OK - else - ocf_log err "Invalid umask [$umask] in $RCFILE" - exit $OCF_ERR_CONFIGURED - fi - fi +xup_usage () { + echo "Usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data}" + return 0 } xup_validate_all () { - check_binary $AWK - -# Parse the $RCFILE for necessary attributes, assume $RCFILE does not -# contain "include" or "includedir" directives. - space="[ \t]*" - leading_space="^$space" - trailing_space="$space$" - comment="$space\#" - - # Strip comments, delete blank lines. - stripped=`sed -e "/^$comment/d" -e "/=$trailing_space/d" -e "/$leading_space$/d" $RCFILE` - - stripped=`echo $stripped` - # At this stage, stripped="service { attribute-list }". - case $stripped in - *#*) - ocf_log err "Descriptor file $RCFILE contains extra \"#\", which is forbidden" - exit $OCF_ERR_CONFIGURED - ;; - - service*) - case $stripped in - *[!\ \ +-]=*) - ocf_log err "Attribute needs a space before operator =" - exit $OCF_ERR_CONFIGURED - ;; - *[!\ \ ][+-]=*) - ocf_log err "Attribute needs a space before operator [+-]=" - exit $OCF_ERR_CONFIGURED - ;; - *) ;; - esac - - # Strip leading "service" keyword, remove spaces surrounding "=". - stripped=`echo $stripped | sed -e "s/^service//" -e "s/-=/ /g" \ - -e "s/+=/=/g" -e "s/$space=$space/=/g"` - # At this stage, stripped=" { attribute-list }", - # note that the leading space before is significant. - case $stripped in - " "*) - stripped=`echo $stripped | sed -e "s/$leading_space[^ \t]\+$space//"` - # At this stage, stripped="{ attribute-list }" - case $stripped in - {*}) - stripped=`echo $stripped | sed -e "s/^{//" -e "s/}$//"` - # At this stage, stripped= -# echo $stripped - check_attributes "$stripped" - ;; - - *) - ocf_log err "Attrbute list should be contained in {}" - exit $OCF_ERR_CONFIGURED - ;; - esac - ;; - - *) - ocf_log err "Service name should follow the service key word" - exit $OCF_ERR_CONFIGURED - ;; - esac - ;; - - *) - ocf_log err "Service key word is necessary in $RCFILE" - exit $OCF_ERR_CONFIGURED - ;; - esac + if [ ! -f "$SVCDEF" ]; then + ocf_log err "service $service missing $SVCDEF" + return $OCF_ERR_INSTALLED + fi + return $OCF_SUCCESS } -if - ( [ $# -ne 1 ] ) -then - xup_usage - exit $OCF_ERR_ARGS + +if [ $# -ne 1 ]; then + xup_usage + exit $OCF_ERR_ARGS fi # These operations do not require OCF instance parameters to be set case "$1" in - meta-data) + meta-data) meta_data exit $OCF_SUCCESS ;; - usage) + usage) xup_usage exit $OCF_SUCCESS ;; - *) ;; esac -if - [ -z "$OCF_RESKEY_service" ] -then - ocf_log err "Please set OCF_RESKEY_service to the service managed by Xinetd" - if [ "$1" = "start" ]; then - exit $OCF_ERR_ARGS - else - exit $OCF_NOT_RUNNING - fi +if [ -z "$OCF_RESKEY_service" ]; then + ocf_log err "please define \"service\" parameter" + if [ "$1" = "start" ]; then + exit $OCF_ERR_CONFIGURED + else + exit $OCF_NOT_RUNNING + fi fi -service=$OCF_RESKEY_service -RCFILE=/etc/xinetd.d/$service -PIDFILE=$HA_VARRUN/xup$service - # Make sure the OCF_RESKEY_service is a valid xinetd service name -if [ ! -f $RCFILE ]; then - ocf_log err "Service descriptor $RCFILE not found!" +if [ ! -f $SVCDEF ]; then + ocf_log err "service definition $SVCDEF not found!" if [ "$1" = "start" ]; then - exit $OCF_ERR_ARGS + exit $OCF_ERR_INSTALLED else exit $OCF_NOT_RUNNING fi fi # See how we were called. case "$1" in - start) + start) xup_start ;; - stop) + stop) xup_stop ;; - restart) + restart) $0 stop $0 start ;; - status) + status) xup_status ;; - monitor) + monitor) xup_status > /dev/null ;; - validate-all) + validate-all) xup_validate_all ;; - *) + *) xup_usage exit $OCF_ERR_UNIMPLEMENTED esac - exit $? diff --git a/heartbeat/apache b/heartbeat/apache index c9efc1961..4c59e4747 100755 --- a/heartbeat/apache +++ b/heartbeat/apache @@ -1,608 +1,610 @@ #!/bin/sh # # High-Availability Apache/IBMhttp control script # # apache (aka IBMhttpd) # # Description: starts/stops apache web servers. # # Author: Alan Robertson # Sun Jiang Dong # # Support: linux-ha@lists.linux-ha.org # # License: GNU General Public License (GPL) # # Copyright: (C) 2002-2005 International Business Machines # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 apache::/opt/IBMHTTPServer/conf/httpd.conf # node1 10.0.0.170 IBMhttpd # # Our parsing of the Apache config files is very rudimentary. # It'll work with lots of different configurations - but not every # possible configuration. # # Patches are being accepted ;-) # # OCF parameters: # OCF_RESKEY_configfile # OCF_RESKEY_httpd # OCF_RESKEY_port # OCF_RESKEY_statusurl # OCF_RESKEY_options # OCF_RESKEY_testregex # OCF_RESKEY_client # OCF_RESKEY_testurl # OCF_RESKEY_testregex10 # OCF_RESKEY_testconffile # OCF_RESKEY_testname # OCF_RESKEY_envfiles : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/apache-conf.sh . ${OCF_FUNCTIONS_DIR}/http-mon.sh HA_VARRUNDIR=${HA_VARRUN} ####################################################################### # # Configuration options - usually you don't need to change these # ####################################################################### # IBMHTTPD=/opt/IBMHTTPServer/bin/httpd HTTPDLIST="/sbin/httpd2 /usr/sbin/httpd2 /usr/sbin/apache2 /sbin/httpd /usr/sbin/httpd /usr/sbin/apache $IBMHTTPD" MPM=/usr/share/apache2/find_mpm if [ -x $MPM ] then HTTPDLIST="$HTTPDLIST `$MPM 2>/dev/null`" fi LOCALHOST="http://localhost" HTTPDOPTS="-DSTATUS" DEFAULT_IBMCONFIG=/opt/IBMHTTPServer/conf/httpd.conf DEFAULT_NORMCONFIG="/etc/apache2/httpd.conf" # # You can also set # HTTPD # PORT # STATUSURL # CONFIGFILE # in this section if what we're doing doesn't work for you... # # End of Configuration options ####################################################################### CMD=`basename $0` # The config-file-pathname is the pathname to the configuration # file for this web server. Various appropriate defaults are # assumed if no config file is specified. If this command is # invoked as *IBM*, then the default config file name is # $DEFAULT_IBMCONFIG, otherwise the default config file # will be $DEFAULT_NORMCONFIG. usage() { cat <<-! usage: $0 action action: start start the web server stop stop the web server status return the status of web server, run or down monitor return TRUE if the web server appears to be working. For this to be supported you must configure mod_status and give it a server-status URL. You have to have installed either curl or wget for this to work. meta-data show meta data message validate-all validate the instance parameters ! exit $1 } # # return TRUE if a process with given PID is running # ProcessRunning() { ApachePID=$1 # Use /proc if it looks like it's here... if [ -d /proc -a -d /proc/1 ] then [ -d /proc/$ApachePID ] else # This assumes we're running as root... kill -s 0 "$ApachePID" >/dev/null 2>&1 fi } silent_status() { if [ -f $PidFile ] then ProcessRunning `cat $PidFile` else : No pid file false fi } # May be useful to add other distros in future validate_default_config() { if [ -e /etc/SuSE-release ]; then validate_default_suse_config else return 0 fi } # When using the default /etc/apache2/httpd.conf on SUSE, the file # /etc/apache2/sysconfig.d/include.conf is required to be present, # but this is only generated if you run the apache init script # (with contents derived from /etc/sysconfig/apache2). So, here, # if we're using the default system config file and it requires # that include, we run "/etc/init.d/apache2 configtest" to ensure # the relevant config is generated and valid. We're also taking # this opportunity to enable mod_status if it's not present. validate_default_suse_config() { if [ "$CONFIGFILE" = "$DEFAULT_NORMCONFIG" ] && \ grep -Eq '^Include\s+/etc/apache2/sysconfig.d/include.conf' "$CONFIGFILE" then [ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status ocf_run -q /etc/init.d/apache2 configtest return else return 0 fi } start_apache() { if silent_status then ocf_log info "$CMD already running (pid $ApachePID)" return $OCF_SUCCESS fi validate_default_config || return $OCF_ERR_CONFIGURED + # https://bugs.launchpad.net/ubuntu/+source/apache2/+bug/603211 + [ -d /var/run/apache2 ] || mkdir /var/run/apache2 ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE tries=0 while : # wait until the user set timeout do monitor_apache ec=$? if [ $ec -eq $OCF_NOT_RUNNING ] then tries=`expr $tries + 1` ocf_log info "waiting for apache $CONFIGFILE to come up" sleep 1 else break fi done if [ $ec -ne 0 ] && silent_status; then stop_apache fi return $ec } stop_apache() { if silent_status then if kill $ApachePID then tries=0 while ProcessRunning $ApachePID && [ $tries -lt 10 ] do sleep 1 kill $ApachePID >/dev/null ocf_log info "Killing apache PID $ApachePID" tries=`expr $tries + 1` done else ocf_log warn "Killing apache PID $ApachePID FAILED." fi if ProcessRunning $ApachePID then ocf_log info "$CMD still running ($ApachePID)." false else ocf_log info "$CMD stopped." fi else ocf_log info "$CMD is not running." fi for sig in SIGTERM SIGHUP SIGKILL ; do if pgrep -f $HTTPD.*$CONFIGFILE >/dev/null ; then pkill -$sig -f $HTTPD.*$CONFIGFILE >/dev/null ocf_log info "apache children were signalled ($sig)" sleep 1 else break fi done } status_apache() { silent_status rc=$? if [ $rc -eq 0 ] then ocf_log info "$CMD is running (pid $ApachePID)." return $OCF_SUCCESS else ocf_log info "$CMD is stopped." return $OCF_NOT_RUNNING fi } monitor_apache_extended() { if [ "$TESTCONFFILE" ]; then readtestconf < $TESTCONFFILE else test_url="$TESTURL" test_regex="$TESTREGEX10" fi whattorun=`gethttpclient` fixtesturl is_testconf_sane || return $OCF_ERR_CONFIGURED $whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null } monitor_apache_basic() { if [ -z "$STATUSURL" ]; then ocf_log err "statusurl parameter empty" return $OCF_ERR_CONFIGURED elif [ -z "$ourhttpclient" ]; then ocf_log err "could not find a http client; make sure that either wget or curl is available" return $OCF_ERR_CONFIGURED fi ${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null } monitor_apache() { silent_status if [ $? -ne 0 ]; then ocf_log info "$CMD not running" return $OCF_NOT_RUNNING fi ourhttpclient=`findhttpclient` # we'll need one monitor_apache_basic rc=$? [ $rc -ne 0 ] && return $rc case "$OCF_CHECK_LEVEL" in ""|0) true;; 10) monitor_apache_extended;; *) ocf_log err "bad OCF_CHECK_LEVEL: $OCF_CHECK_LEVEL" return $OCF_ERR_CONFIGURED ;; esac } metadata_apache(){ cat < 1.0 This is the resource agent for the Apache web server. This resource agent operates both version 1.x and version 2.x Apache servers. The start operation ends with a loop in which monitor is repeatedly called to make sure that the server started and that it is operational. Hence, if the monitor operation does not succeed within the start operation timeout, the apache resource will end with an error status. The monitor operation by default loads the server status page which depends on the mod_status module and the corresponding configuration file (usually /etc/apache2/mod_status.conf). Make sure that the server status page works and that the access is allowed *only* from localhost (address 127.0.0.1). See the statusurl and testregex attributes for more details. See also http://httpd.apache.org/ Manages an Apache web server instance The full pathname of the Apache configuration file. This file is parsed to provide defaults for various other resource agent parameters. configuration file path The full pathname of the httpd binary (optional). httpd binary path A port number that we can probe for status information using the statusurl. This will default to the port number found in the configuration file, or 80, if none can be found in the configuration file. httpd port The URL to monitor (the apache server status page by default). If left unspecified, it will be inferred from the apache configuration file. If you set this, make sure that it succeeds *only* from the localhost (127.0.0.1). Otherwise, it may happen that the cluster complains about the resource being active on multiple nodes. url name Regular expression to match in the output of statusurl. Case insensitive. monitor regular expression Client to use to query to Apache. If not specified, the RA will try to find one on the system. Currently, wget and curl are supported. For example, you can set this parameter to "curl" if you prefer that to wget. http client URL to test. If it does not start with "http", then it's considered to be relative to the Listen address. test url Regular expression to match in the output of testurl. Case insensitive. extended monitor regular expression A file which contains test configuration. Could be useful if you have to check more than one web application or in case sensitive info should be passed as arguments (passwords). Furthermore, using a config file is the only way to specify certain parameters. Please see README.webapps for examples and file description. test configuration file Name of the test within the test configuration file. test name Extra options to apply when starting apache. See man httpd(8). command line options Files (one or more) which contain extra environment variables. If you want to prevent script from reading the default file, set this parameter to empty string. environment settings files We will try to detect if the URL (for monitor) is IPv6, but if that doesn't work set this to true to enforce IPv6. use ipv6 with http clients END exit $OCF_SUCCESS } validate_all_apache() { if CheckPort $PORT; then # We are sure to succeed here, since we forced $PORT to be valid in GetParams() : OK else ocf_log err "Port number $PORT is invalid!" exit $OCF_ERR_ARGS fi if [ -z $STATUSURL ]; then : OK to be empty else case $STATUSURL in - http://*/*) ;; + http://*) ;; *) ocf_log err "Invalid STATUSURL $STATUSURL" exit $OCF_ERR_ARGS ;; esac fi if [ ! -x $HTTPD ]; then ocf_log err "HTTPD $HTTPD not found or is not an executable!" exit $OCF_ERR_ARGS fi if [ ! -f $CONFIGFILE ]; then # We are sure to succeed here, since we have parsed $CONFIGFILE before getting here ocf_log err "Configuration file $CONFIGFILE not found!" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } if [ $# -eq 1 ] then COMMAND=$1 HTTPD="$OCF_RESKEY_httpd" PORT="$OCF_RESKEY_port" STATUSURL="$OCF_RESKEY_statusurl" CONFIGFILE="$OCF_RESKEY_configfile" OPTIONS="$OCF_RESKEY_options" CLIENT=${OCF_RESKEY_client} - TESTREGEX=${OCF_RESKEY_testregex:-'[[:space:]]*'} + TESTREGEX=${OCF_RESKEY_testregex:-''} TESTURL="$OCF_RESKEY_testurl" TESTREGEX10=${OCF_RESKEY_testregex10} TESTCONFFILE="$OCF_RESKEY_testconffile" TESTNAME="$OCF_RESKEY_testname" : ${OCF_RESKEY_envfiles="/etc/apache2/envvars"} source_envfiles $OCF_RESKEY_envfiles else usage $OCF_ERR_ARGS fi LSB_STATUS_STOPPED=3 if [ "X$HTTPD" = X -o ! -f "$HTTPD" -o ! -x "$HTTPD" ] then case $0 in *IBM*) HTTPD=$IBMHTTPD DefaultConfig=$DEFAULT_IBMCONFIG;; *) HTTPD= for h in $HTTPDLIST do if [ -f $h -a -x $h ] then HTTPD=$h break fi done # It is possible that we still do not have a valid httpd at this stage if [ -z "$HTTPD" ] then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; meta-data) metadata_apache;; esac ocf_log err "No valid httpd found! Please revise your item" exit $OCF_ERR_INSTALLED fi # Let the user know that the $HTTPD used is not the one (s)he specified via $OCF_RESKEY_httpd if [ "X$OCF_RESKEY_httpd" != X ] then ocf_log info "Using $HTTPD as HTTPD" fi DefaultConfig=$DEFAULT_NORMCONFIG;; esac fi httpd_basename=`basename $HTTPD` case $httpd_basename in *-*) httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;; esac case "$CONFIGFILE" in "") CONFIGFILE=$DefaultConfig;; *) ;; esac if [ ! -f "$CONFIGFILE" ] then case $COMMAND in stop) ocf_log warn "$CONFIGFILE not found - apache considered stopped" exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac fi if [ "X$COMMAND" = Xmeta-data ] || GetParams $CONFIGFILE then : OK else ocf_log err "Cannot parse config file [$CONFIGFILE]" exit $OCF_ERR_INSTALLED fi case $COMMAND in start) start_apache;; stop) stop_apache;; status) status_apache;; monitor) monitor_apache;; meta-data) metadata_apache;; validate-all) validate_all_apache;; *) usage $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/asterisk b/heartbeat/asterisk index 9b91c8b76..cc78ba1ee 100755 --- a/heartbeat/asterisk +++ b/heartbeat/asterisk @@ -1,485 +1,479 @@ #!/bin/sh # # # Asterisk # # Description: Manages an Asterisk PBX as an HA resource # # Authors: Martin Gerhard Loschwitz # Florian Haas # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # (c) 2011 hastexo Professional Services GmbH # # This resource agent is losely derived from the MySQL resource # agent, which itself is made available to the public under the # following copyright: # # (c) 2002-2005 International Business Machines, Inc. # 2005-2010 Linux-HA contributors # # See usage() function below for more details ... # # OCF instance parameters: # OCF_RESKEY_binary # OCF_RESKEY_canary_binary # OCF_RESKEY_config # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_additional_parameters # OCF_RESKEY_realtime # OCF_RESKEY_maxfiles ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Fill in some defaults if no values are specified HOSTOS=`uname` if [ "X${HOSTOS}" = "XOpenBSD" ]; then OCF_RESKEY_user_default="_asterisk" OCF_RESKEY_group_default="_asterisk" else OCF_RESKEY_user_default="asterisk" OCF_RESKEY_group_default="asterisk" fi OCF_RESKEY_binary_default="asterisk" OCF_RESKEY_canary_binary_default="astcanary" OCF_RESKEY_config_default="/etc/asterisk/asterisk.conf" OCF_RESKEY_additional_parameters_default="-g -vvv" OCF_RESKEY_realtime_default="false" OCF_RESKEY_maxfiles_default="8192" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_canary_binary=${OCF_RESKEY_canary_binary_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} : ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}} : ${OCF_RESKEY_realtime=${OCF_RESKEY_realtime_default}} : ${OCF_RESKEY_maxfiles=${OCF_RESKEY_maxfiles_default}} ####################################################################### usage() { cat < 1.0 Resource agent for the Asterisk PBX. May manage an Asterisk PBX telephony system or a clone set that forms an Asterisk distributed device setup. Manages an Asterisk PBX Location of the Asterisk PBX server binary Asterisk PBX server binary Location of the Asterisk PBX Canary server binary Asterisk PBX Canary server binary The Asterisk PBX configuration file Asterisk PBX config User running Asterisk PBX daemon Asterisk PBX user Group running Asterisk PBX daemon (for logfile and directory permissions) Asterisk PBX group Additional parameters which are passed to the Asterisk PBX on startup (e.g. -L <load> or -M <value>). Additional parameters to pass to the Asterisk PBX Determines whether the Asterisk PBX daemon will be run with realtime priority or not. Asterisk PBX realtime priority Determines how many files the Asterisk PBX is allowed to open at a time. Helps to fix the 'Too many open files' error message. Asterisk PBX allowed MAXFILES A SIP URI to check when monitoring. During monitor, the agent will attempt to do a SIP OPTIONS request against this URI. Requires the sipsak utility to be present and executable. If unset, the agent does no SIP URI monitoring. SIP URI to check when monitoring END } ####################################################################### # Convenience functions asterisk_rx() { # if $HOME is set, asterisk -rx writes a .asterisk_history there ( unset HOME ocf_run $OCF_RESKEY_binary -r -s $ASTRUNDIR/asterisk.ctl -x "$1" ) } ####################################################################### # Functions invoked by resource manager actions asterisk_validate() { local rc check_binary $OCF_RESKEY_binary check_binary pgrep if [ -n "$OCF_RESKEY_monitor_sipuri" ]; then check_binary sipsak fi + # A config file on shared storage that is not available + # during probes is OK. if [ ! -f $OCF_RESKEY_config ]; then - ocf_log err "Config $OCF_RESKEY_config doesn't exist" - return $OCF_ERR_INSTALLED + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" fi getent passwd $OCF_RESKEY_user >/dev/null 2>&1 rc=$? if [ $rc -ne 0 ]; then ocf_log err "User $OCF_RESKEY_user doesn't exist" return $OCF_ERR_INSTALLED fi getent group $OCF_RESKEY_group >/dev/null 2>&1 rc=$? if [ $rc -ne 0 ]; then ocf_log err "Group $OCF_RESKEY_group doesn't exist" return $OCF_ERR_INSTALLED fi true } asterisk_status() { local pid local rc if [ ! -f $ASTRUNDIR/asterisk.pid ]; then ocf_log info "Asterisk PBX is not running" return $OCF_NOT_RUNNING fi pid=`cat $ASTRUNDIR/asterisk.pid` ocf_run kill -s 0 $pid rc=$? if [ $rc -eq 0 ]; then if ocf_is_true "$OCF_RESKEY_realtime"; then astcanary_pid=`pgrep -d " " -f "astcanary $ASTRUNDIR/alt.asterisk.canary.tweet.tweet.tweet"` if [ ! "$astcanary_pid" ]; then ocf_log err "Asterisk PBX is running but astcanary is not although it should" return $OCF_ERR_GENERIC fi else return $OCF_SUCCESS fi else ocf_log info "Asterisk PBX not running: removing old PID file" rm -f $ASTRUNDIR/asterisk.pid return $OCF_NOT_RUNNING fi } asterisk_monitor() { local rc asterisk_status rc=$? # If status returned an error, return that immediately if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi # Check whether connecting to asterisk is possible asterisk_rx 'core show channels count' rc=$? if [ $rc -ne 0 ]; then ocf_log err "Failed to connect to the Asterisk PBX" return $OCF_ERR_GENERIC fi # Optionally check the monitor URI with sipsak # The return values: # 0 means that a 200 was received. # 1 means something else then 1xx or 2xx was received. # 2 will be returned on local errors like non resolvable names # or wrong options combination. # 3 will be returned on remote errors like socket errors # (e.g. icmp error), redirects without a contact header or # simply no answer (timeout). # This can also happen if sipsak is run too early after asterisk # start. if [ -n "$OCF_RESKEY_monitor_sipuri" ]; then ocf_run sipsak -s "$OCF_RESKEY_monitor_sipuri" rc=$? case "$rc" in 1|2) return $OCF_ERR_GENERIC;; 3) return $OCF_NOT_RUNNING;; esac fi ocf_log debug "Asterisk PBX monitor succeeded" return $OCF_SUCCESS } asterisk_start() { local asterisk_extra_params local dir local rc asterisk_status rc=$? if [ $rc -eq $OCF_SUCCESS ]; then ocf_log info "Asterisk PBX already running" return $OCF_SUCCESS fi # If Asterisk is not already running, make sure there is no # old astcanary instance when the new asterisk starts. To # achieve this, kill old astcanary instances belonging to # this $ASTRUNDIR. # Find out PIDs of running astcanaries astcanary_pid=`pgrep -d " " -f "astcanary $ASTRUNDIR/alt.asterisk.canary.tweet.tweet.tweet"` # If there are astcanaries running that belong to $ASTRUNDIR, # kill them. if [ "$astcanary_pid" ]; then for i in $astcanary_pid; do ocf_run kill -s KILL $astcanary_pid; done fi for dir in $ASTRUNDIR $ASTLOGDIR $ASTLOGDIR/cdr-csv $ASTLOGDIR/cdr-custom; do if [ ! -d "$dir" ]; then ocf_run install -d -o $OCF_RESKEY_user -g $OCF_RESKEY_group $dir \ || exit $OCF_ERR_GENERIC fi # Regardless of whether we just created the directory or it # already existed, check whether it is writable by the configured # user if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then ocf_log err "Directory $dir is not writable by $OCF_RESKEY_user" exit $OCF_ERR_PERM fi done # set MAXFILES ulimit -n $OCF_RESKEY_maxfiles # Determine whether Asterisk PBX is supposed to run in Realtime mode # or not and make asterisk daemonize automatically if ocf_is_true "$OCF_RESKEY_realtime"; then asterisk_extra_params="-F -p" else asterisk_extra_params="-F" fi ocf_run ${OCF_RESKEY_binary} -G $OCF_RESKEY_group -U $OCF_RESKEY_user \ -C $OCF_RESKEY_config \ $OCF_RESKEY_additional_parameters \ $asterisk_extra_params rc=$? if [ $rc -ne 0 ]; then ocf_log err "Asterisk PBX start command failed: $rc" exit $OCF_ERR_GENERIC fi # Spin waiting for the server to come up. # Let the CRM/LRM time us out if required while true; do asterisk_monitor rc=$? [ $rc -eq $OCF_SUCCESS ] && break if [ $rc -ne $OCF_NOT_RUNNING ]; then ocf_log err "Asterisk PBX start failed" exit $OCF_ERR_GENERIC fi sleep 2 done ocf_log info "Asterisk PBX started" return $OCF_SUCCESS } asterisk_stop() { local pid local astcanary_pid local rc asterisk_status rc=$? if [ $rc -eq $OCF_NOT_RUNNING ]; then ocf_log info "Asterisk PBX already stopped" return $OCF_SUCCESS fi - # do a "soft shutdown" via the asterisk command line first - asterisk_rx 'core stop now' - - asterisk_status - rc=$? - if [ $rc -eq $OCF_NOT_RUNNING ]; then - ocf_log info "Asterisk PBX stopped" - return $OCF_SUCCESS - fi - - # If "core stop now" didn't succeed, try SIGTERM pid=`cat $ASTRUNDIR/asterisk.pid` ocf_run kill -s TERM $pid rc=$? if [ $rc -ne 0 ]; then ocf_log err "Asterisk PBX couldn't be stopped" exit $OCF_ERR_GENERIC fi # stop waiting shutdown_timeout=15 if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) fi count=0 while [ $count -lt $shutdown_timeout ]; do asterisk_status rc=$? if [ $rc -eq $OCF_NOT_RUNNING ]; then break fi count=`expr $count + 1` sleep 1 ocf_log debug "Asterisk PBX still hasn't stopped yet. Waiting ..." done asterisk_status rc=$? if [ $rc -ne $OCF_NOT_RUNNING ]; then # SIGTERM didn't help either, try SIGKILL ocf_log info "Asterisk PBX failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL ..." ocf_run kill -s KILL $pid fi # After killing asterisk, stop astcanary if ocf_is_true "$OCF_RESKEY_realtime"; then astcanary_pid=`pgrep -d " " -f "astcanary $ASTRUNDIR/alt.asterisk.canary.tweet.tweet.tweet"` if [ "$astcanary_pid" ]; then for i in $astcanary_pid; do ocf_run kill -s KILL $astcanary_pid; done fi fi ocf_log info "Asterisk PBX stopped" return $OCF_SUCCESS } ####################################################################### case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac # Anything except meta-data and help must pass validation asterisk_validate || exit $? # Now that validate has passed and we can be sure to be able to read # the config file, set convenience variables ASTRUNDIR=`grep astrundir $OCF_RESKEY_config | awk '/^astrundir/ {print $3}'` ASTLOGDIR=`grep astlogdir $OCF_RESKEY_config | awk '/^astlogdir/ {print $3}'` # What kind of method was invoked? case "$1" in start) asterisk_start;; stop) asterisk_stop;; status) asterisk_status;; monitor) asterisk_monitor;; validate-all) ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/conntrackd b/heartbeat/conntrackd index 3d429902b..7502f5af1 100755 --- a/heartbeat/conntrackd +++ b/heartbeat/conntrackd @@ -1,327 +1,327 @@ #!/bin/bash # # # An OCF RA for conntrackd # http://conntrack-tools.netfilter.org/ # # Copyright (c) 2011 Dominik Klein # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### OCF_RESKEY_binary_default=conntrackd OCF_RESKEY_config_default=/etc/conntrackd/conntrackd.conf : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} meta_data() { cat < 1.1 Master/Slave OCF Resource Agent for conntrackd This resource agent manages conntrackd Name of the conntrackd executable. If conntrackd is installed and available in the default PATH, it is sufficient to configure the name of the binary For example "my-conntrackd-binary-version-0.9.14" If conntrackd is installed somewhere else, you may also give a full path For example "/packages/conntrackd-0.9.14/sbin/conntrackd" Name of the conntrackd executable Full path to the conntrackd.conf file. For example "/packages/conntrackd-0.9.14/etc/conntrackd/conntrackd.conf" Path to conntrackd.conf END } meta_expect() { local what=$1 whatvar=OCF_RESKEY_CRM_meta_${1//-/_} op=$2 expect=$3 local val=${!whatvar} if [[ -n $val ]]; then # [, not [[, or it won't work ;) [ $val $op $expect ] && return fi ocf_log err "meta parameter misconfigured, expected $what $op $expect, but found ${val:-unset}." exit $OCF_ERR_CONFIGURED } conntrackd_is_master() { # You can't query conntrackd whether it is master or slave. It can be both at the same time. # This RA creates a statefile during promote and enforces master-max=1 and clone-node-max=1 ha_pseudo_resource $statefile monitor } conntrackd_set_master_score() { ${HA_SBIN_DIR}/crm_master -Q -l reboot -v $1 } conntrackd_monitor() { rc=$OCF_NOT_RUNNING # It does not write a PID file, so check the socket exists after # extracting its path from the configuration file local conntrack_socket=$(awk '/^[ \t]*UNIX[ \t]*{/,/^[ \t]*}/ { if ($1 == "Path") { print $2 } }' $OCF_RESKEY_config) [ -S "$conntrack_socket" ] && rc=$OCF_SUCCESS if [ "$rc" -eq "$OCF_SUCCESS" ]; then # conntrackd is running # now see if it acceppts queries if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -s > /dev/null 2>&1; then rc=$OCF_ERR_GENERIC ocf_log err "conntrackd is running but not responding to queries" fi if conntrackd_is_master; then rc=$OCF_RUNNING_MASTER # Restore master setting on probes if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then conntrackd_set_master_score $master_score fi else # Restore master setting on probes if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then conntrackd_set_master_score $slave_score fi fi fi return $rc } conntrackd_start() { rc=$OCF_ERR_GENERIC # Keep trying to start the resource; # wait for the CRM to time us out if this fails while :; do conntrackd_monitor status=$? case "$status" in $OCF_SUCCESS) rc=$OCF_SUCCESS conntrackd_set_master_score $slave_score break ;; $OCF_NOT_RUNNING) ocf_log info "Starting conntrackd" $OCF_RESKEY_binary -C $OCF_RESKEY_config -d ;; $OCF_RUNNING_MASTER) ocf_log warn "conntrackd already in master mode, demoting." ha_pseudo_resource $statefile stop ;; $OCF_ERR_GENERIC) ocf_log err "conntrackd start failed" rc=$OCF_ERR_GENERIC break ;; esac done return $rc } conntrackd_stop() { rc=$OCF_ERR_GENERIC # Keep trying to bring down the resource; # wait for the CRM to time us out if this fails while :; do conntrackd_monitor status=$? case "$status" in - $OCF_SUCCESS) + $OCF_SUCCESS|$OCF_ERR_GENERIC) ocf_log info "Stopping conntrackd" $OCF_RESKEY_binary -C $OCF_RESKEY_config -k ;; $OCF_NOT_RUNNING) rc=$OCF_SUCCESS break ;; $OCF_RUNNING_MASTER) ocf_log warn "conntrackd still master" ;; esac done return $rc } conntrackd_validate_all() { check_binary "$OCF_RESKEY_binary" if ! [ -e "$OCF_RESKEY_config" ]; then ocf_log err "Config FILE $OCF_RESKEY_config does not exist" return $OCF_ERR_INSTALLED fi meta_expect master-node-max = 1 meta_expect master-max = 1 meta_expect clone-node-max = 1 meta_expect clone-max = 2 return $OCF_SUCCESS } conntrackd_promote() { rc=$OCF_SUCCESS if ! conntrackd_is_master; then # -c = Commit the external cache to the kernel # -f = Flush internal and external cache # -R = resync with the kernel table # -B = send a bulk update on the line for parm in c f R B; do if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm; then ocf_log err "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during promote." rc=$OCF_ERR_GENERIC break fi done ha_pseudo_resource $statefile start conntrackd_set_master_score $master_score fi return $rc } conntrackd_demote() { rc=$OCF_SUCCESS if conntrackd_is_master; then # -t = shorten kernel timers to remove zombies # -n = request a resync from the others for parm in t n; do if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm; then ocf_log err "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during demote." rc=$OCF_ERR_GENERIC break fi done ha_pseudo_resource $statefile stop conntrackd_set_master_score $slave_score fi return $rc } conntrackd_notify() { hostname=$(hostname) # OCF_RESKEY_CRM_meta_notify_master_uname is a whitespace separated list of master hostnames for master in $OCF_RESKEY_CRM_meta_notify_master_uname; do # if we are the master and an instance was just started on another node: # send a bulk update to allow failback if [ "$hostname" = "$master" -a "$OCF_RESKEY_CRM_meta_notify_type" = "post" -a "$OCF_RESKEY_CRM_meta_notify_operation" = "start" -a "$OCF_RESKEY_CRM_meta_notify_start_uname" != "$hostname" ]; then ocf_log info "Sending bulk update in post start to peers to allow failback" $OCF_RESKEY_binary -C $OCF_RESKEY_config -B fi done for tobepromoted in $OCF_RESKEY_CRM_meta_notify_promote_uname; do # if there is a promote action to be executed on another node: # send a bulk update to allow failback if [ "$hostname" != "$tobepromoted" -a "$OCF_RESKEY_CRM_meta_notify_type" = "pre" -a "$OCF_RESKEY_CRM_meta_notify_operation" = "promote" ]; then ocf_log info "Sending bulk update in pre promote to peers to allow failback" $OCF_RESKEY_binary -C $OCF_RESKEY_config -B fi done } conntrackd_usage() { cat < 1.0 Exportfs uses the exportfs command to add/remove nfs exports. It does NOT manage the nfs server daemon. It depends on Linux specific NFS implementation details, so is considered not portable to other platforms yet. Manages NFS exports The client specification allowing remote machines to mount the directory over NFS. Client ACL. The options to pass to exportfs for the exported directory. Export options. The directory which you wish to export using NFS. The directory to export. The fsid option to pass to exportfs. This can be a unique positive integer, a UUID, or the special string "root" which is functionally identical to numeric fsid of 0. 0 (root) identifies the export as the root of an NFSv4 pseudofilesystem -- avoid this setting unless you understand its special status. This value will override any fsid provided via the options parameter. Unique fsid within cluster. Relinquish NFS locks associated with this filesystem when the resource stops. Enabling this parameter is highly recommended unless the path exported by this ${__SCRIPT_NAME} resource is also exported by a different resource. Unlock filesystem on stop? When stopping (unexporting), wait out the NFSv4 lease time. Only after all leases have expired does the NFS kernel server relinquish all server-side handles on the exported filesystem. If this ${__SCRIPT_NAME} resource manages an export that resides on a mount point designed to fail over along with the NFS export itself, then enabling this parameter will ensure such failover is working properly. Note that when this parameter is set, your stop timeout MUST accommodate for the wait period. This parameter is safe to disable if none of your NFS clients are using NFS version 4 or later. Ride out the NFSv4 lease time on resource stop? Back up those entries from the NFS rmtab that apply to the exported directory, to the specified backup file. The filename is interpreted as relative to the exported directory. This backup is required if clients are connecting to the export via NFSv3 over TCP. Note that a configured monitor operation is required for this functionality. To disable rmtab backups, set this parameter to the special string "none". Location of the rmtab backup, relative to directory. END return $OCF_SUCCESS } backup_rmtab() { local rmtab_backup if [ ${OCF_RESKEY_rmtab_backup} != "none" ]; then rmtab_backup="${OCF_RESKEY_directory}/${OCF_RESKEY_rmtab_backup}" grep ":${OCF_RESKEY_directory}:" /var/lib/nfs/rmtab > ${rmtab_backup} fi } restore_rmtab() { local rmtab_backup if [ ${OCF_RESKEY_rmtab_backup} != "none" ]; then rmtab_backup="${OCF_RESKEY_directory}/${OCF_RESKEY_rmtab_backup}" if [ -r ${rmtab_backup} ]; then - cat ${rmtab_backup} >> /var/lib/nfs/rmtab + local tmpf=`mktemp` + sort -u ${rmtab_backup} /var/lib/nfs/rmtab > $tmpf && + install -o root -m 644 $tmpf /var/lib/nfs/rmtab + rm -f $tmpf ocf_log debug "Restored `wc -l ${rmtab_backup}` rmtab entries from ${rmtab_backup}." else ocf_log warn "rmtab backup ${rmtab_backup} not found or not readable." fi fi } exportfs_usage() { cat < ${unlockfile} ocf_log info "Unlocked NFS export ${OCF_RESKEY_directory}" else ocf_log warn "Unable to unlock NFS export ${OCF_RESKEY_directory}, ${unlockfile} not found or not writable" fi fi if ocf_is_true ${OCF_RESKEY_wait_for_leasetime_on_stop}; then local leasetimefile local sleeptime leasetimefile=/proc/fs/nfsd/nfsv4leasetime if [ -r ${leasetimefile} ]; then sleeptime=$((`cat ${leasetimefile}`+2)) ocf_log info "Sleeping ${sleeptime} seconds to accommodate for NFSv4 lease expiry" sleep ${sleeptime}s else ocf_log warn "Unable to read NFSv4 lease time from ${leasetimefile}, file not found or not readable" fi fi if [ $rc -eq 0 ]; then ocf_log info "Un-exported file system" return $OCF_SUCCESS fi ocf_log err "Failed to un-export file system" exit $OCF_ERR_GENERIC } exportfs_validate () { # Checks for required parameters if [ -z "$OCF_RESKEY_directory" ]; then ocf_log err "Missing required parameter \"directory\"" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_fsid" ]; then ocf_log err "Missing required parameter \"fsid\"" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_clientspec" ]; then ocf_log err "Missing required parameter \"clientspec\"" exit $OCF_ERR_CONFIGURED fi # Checks applicable only to non-probes if ! ocf_is_probe; then if [ ! -d $OCF_RESKEY_directory ]; then ocf_log err "$OCF_RESKEY_directory does not exist or is not a directory" exit $OCF_ERR_INSTALLED fi fi } if [ $# -ne 1 ]; then exportfs_usage exit $OCF_ERR_ARGS fi case $__OCF_ACTION in meta-data) exportfs_meta_data exit $OCF_SUCCESS ;; usage|help) exportfs_usage exit $OCF_SUCCESS ;; *) ;; esac exportfs_validate case $__OCF_ACTION in start) exportfs_start ;; stop) exportfs_stop ;; status|monitor) exportfs_monitor ;; validate-all) # nothing to do -- we're already validated ;; *) exportfs_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/iSCSILogicalUnit b/heartbeat/iSCSILogicalUnit index c64cd1b0c..25ee32e32 100755 --- a/heartbeat/iSCSILogicalUnit +++ b/heartbeat/iSCSILogicalUnit @@ -1,469 +1,506 @@ #!/bin/bash # # # iSCSILogicalUnit OCF RA. Exports and manages iSCSI Logical Units. # # (c) 2009-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults # Set a default implementation based on software installed if have_binary ietadm; then OCF_RESKEY_implementation_default="iet" elif have_binary tgtadm; then OCF_RESKEY_implementation_default="tgt" elif have_binary lio_node; then OCF_RESKEY_implementation_default="lio" fi : ${OCF_RESKEY_implementation=${OCF_RESKEY_implementation_default}} # Use a default SCSI ID and SCSI SN that is unique across the cluster, # and persistent in the event of resource migration. # SCSI IDs are limited to 24 bytes, but only 16 bytes are known to be # supported by all iSCSI implementations this RA cares about. Thus, # for a default, use the first 16 characters of # $OCF_RESOURCE_INSTANCE. OCF_RESKEY_scsi_id_default="${OCF_RESOURCE_INSTANCE:0:16}" : ${OCF_RESKEY_scsi_id=${OCF_RESKEY_scsi_id_default}} # To have a reasonably unique default SCSI SN, use the first 8 bytes # of an MD5 hash of of $OCF_RESOURCE_INSTANCE sn=`echo -n "${OCF_RESOURCE_INSTANCE}" | openssl md5 | sed -e 's/(stdin)= //'` OCF_RESKEY_scsi_sn_default=${sn:0:8} : ${OCF_RESKEY_scsi_sn=${OCF_RESKEY_scsi_sn_default}} ####################################################################### meta_data() { cat < 0.9 Manages iSCSI Logical Unit. An iSCSI Logical unit is a subdivision of an SCSI Target, exported via a daemon that speaks the iSCSI protocol. Manages iSCSI Logical Units (LUs) The iSCSI target daemon implementation. Must be one of "iet", "tgt", or "lio". If unspecified, an implementation is selected based on the availability of management utilities, with "iet" being tried first, then "tgt", then "lio". iSCSI target daemon implementation The iSCSI Qualified Name (IQN) that this Logical Unit belongs to. iSCSI target IQN The Logical Unit number (LUN) exposed to initiators. Logical Unit number (LUN) The path to the block device exposed. Some implementations allow this to be a regular file, too. Block device (or file) path The SCSI ID to be configured for this Logical Unit. The default is the resource name, truncated to 24 bytes. SCSI ID The SCSI serial number to be configured for this Logical Unit. The default is a hash of the resource name, truncated to 8 bytes. SCSI serial number The SCSI vendor ID to be configured for this Logical Unit. SCSI vendor ID The SCSI product ID to be configured for this Logical Unit. SCSI product ID Additional LU parameters. A space-separated list of "name=value" pairs which will be passed through to the iSCSI daemon's management interface. The supported parameters are implementation dependent. Neither the name nor the value may contain whitespace. List of iSCSI LU parameters + + +Allowed initiators. A space-separated list of initiators allowed to +connect to this lun. Initiators may be listed in any syntax +the target implementation allows. If this parameter is empty or +not set, access to this lun will not be allowed from any initiator, +if target is not in demo mode. + +This parameter is only necessary, when using LIO. + +List of iSCSI initiators allowed to connect +to this lun. + + + END } ####################################################################### iSCSILogicalUnit_usage() { cat < 0.9 Manages iSCSI targets. An iSCSI target is a collection of SCSI Logical Units (LUs) exported via a daemon that speaks the iSCSI protocol. iSCSI target export agent The iSCSI target daemon implementation. Must be one of "iet", "tgt", or "lio". If unspecified, an implementation is selected based on the availability of management utilities, with "iet" being tried first, then "tgt", then "lio". -Manages an iSCSI target export +Specifies the iSCSI target implementation +("iet", "tgt" or "lio"). The target iSCSI Qualified Name (IQN). Should follow the conventional "iqn.yyyy-mm.<reversed domain name>[:identifier]" syntax. iSCSI target IQN The iSCSI target ID. Required for tgt. iSCSI target ID iSCSI network portal addresses. Not supported by all implementations. If unset, the default is to create one portal that listens on ${OCF_RESKEY_portal_default}. iSCSI portal addresses Allowed initiators. A space-separated list of initiators allowed to connect to this target. Initiators may be listed in any syntax the target implementation allows. If this parameter is empty or not set, access to this target will be allowed from any initiator. List of iSCSI initiators allowed to connect to this target A username used for incoming initiator authentication. If unspecified, allowed initiators will be able to log in without authentication. This is a unique parameter, as it not allowed to re-use a single username across multiple target instances. Incoming account username A password used for incoming initiator authentication. Incoming account password Additional target parameters. A space-separated list of "name=value" pairs which will be passed through to the iSCSI daemon's management interface. The supported parameters are implementation dependent. Neither the name nor the value may contain whitespace. List of iSCSI target parameters - + END } ####################################################################### iSCSITarget_usage() { cat <> /etc/initiators.deny echo "${OCF_RESKEY_iqn} ${OCF_RESKEY_allowed_initiators// /,}" >> /etc/initiators.allow else echo "${OCF_RESKEY_iqn} ALL" >> /etc/initiators.allow fi # In iet, adding a new user and assigning it to a target # is one operation. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run ietadm --op new --user \ --tid=${tid} \ --params=IncomingUser=${OCF_RESKEY_incoming_username},Password=${OCF_RESKEY_incoming_password} \ || exit $OCF_ERR_GENERIC fi ;; tgt) local tid tid="${OCF_RESKEY_tid}" # Create the target. ocf_run tgtadm --lld iscsi --op new --mode target \ --tid=${tid} \ --targetname ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC # Set parameters. for param in ${OCF_RESKEY_additional_parameters}; do name=${param%=*} value=${param#*=} ocf_run tgtadm --lld iscsi --op update --mode target \ --tid=${tid} \ --name=${name} --value=${value} || exit $OCF_ERR_GENERIC done # For tgt, we always have to add access per initiator; # access to targets is denied by default. If # "allowed_initiators" is unset, we must use the special # keyword ALL. for initiator in ${OCF_RESKEY_allowed_initiators=ALL}; do ocf_run tgtadm --lld iscsi --op bind --mode target \ --tid=${tid} \ --initiator-address=${initiator} || exit $OCF_ERR_GENERIC done # In tgt, we must first create a user account, then assign # it to a target using the "bind" operation. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run tgtadm --lld iscsi --mode account --op new \ --user=${OCF_RESKEY_incoming_username} \ --password=${OCF_RESKEY_incoming_password} || exit $OCF_ERR_GENERIC ocf_run tgtadm --lld iscsi --mode account --op bind \ --tid=${tid} \ --user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC fi ;; lio) # lio distinguishes between targets and target portal # groups (TPGs). We will always create one TPG, with the # number 1. In lio, creating a network portal # automatically creates the corresponding target if it # doesn't already exist. for portal in ${OCF_RESKEY_portals}; do ocf_run lio_node --addnp ${OCF_RESKEY_iqn} 1 \ ${portal} || exit $OCF_ERR_GENERIC done # in lio, we can set target parameters by manipulating # the appropriate configfs entries for param in ${OCF_RESKEY_additional_parameters}; do name=${param%=*} value=${param#*=} configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/param/${name}" if [ -e ${configfs_path} ]; then echo ${value} > ${configfs_path} || exit $OCF_ERR_GENERIC else ocf_log warn "Unsupported iSCSI target parameter ${name}: will be ignored." fi done # lio does per-initiator filtering by default. To disable # this, we need to switch the target to "permissive mode". if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then for initiator in ${OCF_RESKEY_allowed_initiators}; do ocf_run lio_node --addnodeacl ${OCF_RESKEY_iqn} 1 \ ${initiator} || exit $OCF_ERR_GENERIC done else ocf_run lio_node --permissive ${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC # permissive mode enables read-only access by default, # so we need to change that to RW to be in line with # the other implementations. echo 0 > "/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/attrib/demo_mode_write_protect" if [ `cat /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/attrib/demo_mode_write_protect` -ne 0 ]; then ocf_log err "Failed to disable write protection for target ${OCF_RESKEY_iqn}." exit $OCF_ERR_GENERIC fi fi # TODO: add CHAP authentication support when it gets added # back into LIO ocf_run lio_node --disableauth ${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC # Finally, we need to enable the target to allow # initiators to connect ocf_run lio_node --enabletpg=${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC ;; esac return $OCF_SUCCESS } iSCSITarget_stop() { iSCSITarget_monitor if [ $? = $OCF_SUCCESS ]; then local tid case $OCF_RESKEY_implementation in iet) # Figure out the target ID tid=`sed -ne "s/tid:\([[:digit:]]\+\) name:${OCF_RESKEY_iqn}/\1/p" < /proc/net/iet/volume` if [ -z "${tid}" ]; then ocf_log err "Failed to retrieve target ID for IQN ${OCF_RESKEY_iqn}" exit $OCF_ERR_GENERIC fi # Close existing connections. There is no other way to # do this in IET than to parse the contents of # /proc/net/iet/session. set -- $(sed -ne '/^tid:'${tid}' /,/^tid/ { /^[[:space:]]*sid:\([0-9]\+\)/ { s/^[[:space:]]*sid:\([0-9]*\).*/--sid=\1/; h; }; /^[[:space:]]*cid:\([0-9]\+\)/ { s/^[[:space:]]*cid:\([0-9]*\).*/--cid=\1/; G; p; }; }' < /proc/net/iet/session) while [[ -n $2 ]]; do # $2 $1 looks like "--sid=X --cid=Y" ocf_run ietadm --op delete \ --tid=${tid} $2 $1 shift 2 done # In iet, unassigning a user from a target and # deleting the user account is one operation. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run ietadm --op delete --user \ --tid=${tid} \ --params=IncomingUser=${OCF_RESKEY_incoming_username} \ || exit $OCF_ERR_GENERIC fi # Loop on delete. Keep trying until we time out, if # necessary. while true; do if ietadm --op delete --tid=${tid}; then ocf_log debug "Removed target ${OCF_RESKEY_iqn}." break else ocf_log warn "Failed to remove target ${OCF_RESKEY_iqn}, retrying." sleep 1 fi done # Avoid stale /etc/initiators.{allow,deny} entries # for this target if [ -e /etc/initiators.deny ]; then ocf_run sed -e "/^${OCF_RESKEY_iqn}[[:space:]]/d" \ -i /etc/initiators.deny fi if [ -e /etc/initiators.allow ]; then ocf_run sed -e "/^${OCF_RESKEY_iqn}[[:space:]]/d" \ -i /etc/initiators.allow fi ;; tgt) tid="${OCF_RESKEY_tid}" # Close existing connections. There is no other way to # do this in tgt than to parse the output of "tgtadm --op # show". set -- $(tgtadm --lld iscsi --op show --mode target \ | sed -ne '/^Target '${tid}':/,/^Target/ { /^[[:space:]]*I_T nexus: \([0-9]\+\)/ { s/^.*: \([0-9]*\).*/--sid=\1/; h; }; /^[[:space:]]*Connection: \([0-9]\+\)/ { s/^.*: \([0-9]*\).*/--cid=\1/; G; p; }; /^[[:space:]]*LUN information:/ q; }') while [[ -n $2 ]]; do # $2 $1 looks like "--sid=X --cid=Y" ocf_run tgtadm --lld iscsi --op delete --mode connection \ --tid=${tid} $2 $1 shift 2 done # In tgt, we must first unbind the user account from # the target, then remove the account itself. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run tgtadm --lld iscsi --mode account --op unbind \ --tid=${tid} \ --user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC ocf_run tgtadm --lld iscsi --mode account --op delete \ --user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC fi # Loop on delete. Keep trying until we time out, if # necessary. while true; do if tgtadm --lld iscsi --op delete --mode target --tid=${tid}; then ocf_log debug "Removed target ${OCF_RESKEY_iqn}." break else ocf_log warn "Failed to remove target ${OCF_RESKEY_iqn}, retrying." sleep 1 fi done # In tgt, we don't have to worry about our ACL # entries. They are automatically removed upon target # deletion. ;; lio) # In lio, removing a target automatically removes all # associated TPGs, network portals, and LUNs. ocf_run lio_node --deliqn ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC ;; esac fi return $OCF_SUCCESS } iSCSITarget_monitor() { case $OCF_RESKEY_implementation in iet) grep -Eq "tid:[0-9]+ name:${OCF_RESKEY_iqn}" /proc/net/iet/volume && return $OCF_SUCCESS ;; tgt) tgtadm --lld iscsi --op show --mode target \ | grep -Eq "Target [0-9]+: ${OCF_RESKEY_iqn}" && return $OCF_SUCCESS ;; lio) # if we have no configfs entry for the target, it's # definitely stopped [ -d /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn} ] || return $OCF_NOT_RUNNING # if the target is there, but its TPG is not enabled, then # we also consider it stopped [ `cat /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/enable` -eq 1 ] || return $OCF_NOT_RUNNING return $OCF_SUCCESS ;; esac return $OCF_NOT_RUNNING } iSCSITarget_validate() { # Do we have all required variables? local required_vars case $OCF_RESKEY_implementation in iet) - required_vars="implementation iqn" + required_vars="iqn" ;; tgt) - required_vars="implementation iqn tid" + required_vars="iqn tid" ;; esac for var in ${required_vars}; do param="OCF_RESKEY_${var}" if [ -z "${!param}" ]; then ocf_log error "Missing resource parameter \"$var\"!" exit $OCF_ERR_CONFIGURED fi done # Is the configured implementation supported? - case $OCF_RESKEY_implementation in - iet|tgt|lio) + case "$OCF_RESKEY_implementation" in + "iet"|"tgt"|"lio") + ;; + "") + # The user didn't specify an implementation, and we were + # unable to determine one from installed binaries (in + # other words: no binaries for any supported + # implementation could be found) + ocf_log error "Undefined iSCSI target implementation" + exit $OCF_ERR_INSTALLED ;; *) ocf_log error "Unsupported iSCSI target implementation \"$OCF_RESKEY_implementation\"!" exit $OCF_ERR_CONFIGURED + ;; esac # Do we have any configuration parameters that the current # implementation does not support? local unsupported_params local var local envar case $OCF_RESKEY_implementation in iet|tgt) # IET and tgt do not support binding a target portal to a # specific IP address. unsupported_params="portals" ;; lio) # TODO: Remove incoming_username and incoming_password # from this check when LIO 3.0 gets CHAP authentication unsupported_params="tid incoming_username incoming_password" ;; esac for var in ${unsupported_params}; do envar=OCF_RESKEY_${var} if [ -n "${!envar}" ]; then ocf_log warn "Configuration parameter \"${var}\"" \ "is not supported by the iSCSI implementation" \ "and will be ignored." fi done if ! ocf_is_probe; then # Do we have all required binaries? case $OCF_RESKEY_implementation in iet) check_binary ietadm ;; tgt) check_binary tgtadm ;; lio) check_binary tcm_node check_binary lio_node ;; esac # Is the required kernel functionality available? case $OCF_RESKEY_implementation in iet) [ -d /proc/net/iet ] if [ $? -ne 0 ]; then ocf_log err "/proc/net/iet does not exist or is not a directory -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi ;; tgt) # tgt is userland only ;; lio) # lio needs configfs to be mounted if ! grep -Eq "^.*/sys/kernel/config[[:space:]]+configfs" /proc/mounts; then ocf_log err "configfs not mounted at /sys/kernel/config -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi # check for configfs entries created by target_core_mod if [ ! -d /sys/kernel/config/target ]; then ocf_log err "/sys/kernel/config/target does not exist or is not a directory -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi ;; esac fi return $OCF_SUCCESS } case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) iSCSITarget_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test iSCSITarget_validate case $__OCF_ACTION in start) iSCSITarget_start;; stop) iSCSITarget_stop;; monitor|status) iSCSITarget_monitor;; reload) ocf_log err "Reloading..." iSCSITarget_start ;; validate-all) ;; *) iSCSITarget_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/iscsi b/heartbeat/iscsi index 3f110fe37..07e2f634e 100755 --- a/heartbeat/iscsi +++ b/heartbeat/iscsi @@ -1,429 +1,433 @@ #!/bin/sh # # iSCSI OCF resource agent # Description: manage iSCSI disks (add/remove) using open-iscsi # # Copyright Dejan Muhamedagic # (C) 2007 Novell Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # See usage() and meta_data() below for more details... # # OCF instance parameters: # OCF_RESKEY_portal: the iSCSI portal address or host name (required) # OCF_RESKEY_target: the iSCSI target (required) # OCF_RESKEY_iscsiadm: iscsiadm program path (optional) # OCF_RESKEY_discovery_type: discovery type (optional; default: sendtargets) # # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_udev_default="yes" OCF_RESKEY_iscsiadm_default="iscsiadm" OCF_RESKEY_discovery_type_default="sendtargets" : ${OCF_RESKEY_udev=${OCF_RESKEY_udev_default}} : ${OCF_RESKEY_iscsiadm=${OCF_RESKEY_iscsiadm_default}} : ${OCF_RESKEY_discovery_type=${OCF_RESKEY_discovery_type_default}} usage() { methods=`iscsi_methods` methods=`echo $methods | tr ' ' '|'` cat < 1.0 OCF Resource Agent for iSCSI. Add (start) or remove (stop) iSCSI targets. Manages a local iSCSI initiator and its connections to iSCSI targets The iSCSI portal address in the form: {ip_address|hostname}[":"port] Portal address The iSCSI target IQN. Target IQN Target discovery type. Check the open-iscsi documentation for supported discovery types. Target discovery type open-iscsi administration utility binary. iscsiadm binary If the next resource depends on the udev creating a device then we wait until it is finished. On a normally loaded host this should be done quickly, but you may be unlucky. If you are not using udev set this to "no", otherwise we will spin in a loop until a timeout occurs. udev EOF } iscsi_methods() { cat <= "2.0-872" changed discovery semantics # see http://www.mail-archive.com/open-iscsi@googlegroups.com/msg04883.html # there's a new discoverydb command which should be used instead discovery open_iscsi_discovery() { local output local severity=err local discovery_variant="discovery" local options="" local cmd local version=`$iscsiadm --version | awk '{print $3}'` ocf_version_cmp "$version" "2.0-871" if [ $? -eq 2 ]; then # newer than 2.0-871? discovery_variant="discoverydb" [ "$discovery_type" = "sendtargets" ] && options="-D" fi cmd="$iscsiadm -m $discovery_variant -p $OCF_RESKEY_portal -t $discovery_type $options" ocf_is_probe && severity=info output=`$cmd` if [ $? -ne 0 -o x = "x$output" ]; then [ x != "x$output" ] && { ocf_log $severity "$cmd FAILED" echo "$output" } return 3 fi portal=`echo "$output" | awk -v target="$OCF_RESKEY_target" ' $NF==target{ if( NF==3 ) portal=$2; # sles compat mode else portal=$1; sub(",.*","",portal); print portal; }'` case `echo "$portal" | wc -w` in 0) #target not found echo "$output" ocf_log $severity "target $OCF_RESKEY_target not found at portal $OCF_RESKEY_portal" return 1 ;; 1) #we're ok return 0 ;; *) # handle multihome hosts reporting multiple portals for p in $portal; do if [ "$OCF_RESKEY_portal" = "$p" ]; then portal="$OCF_RESKEY_portal" return 0 fi done echo "$output" ocf_log err "sorry, can't handle multihomed hosts unless you specify the portal exactly" return 2 ;; esac } open_iscsi_add() { $iscsiadm -m node -p $1 -T $2 -l } open_iscsi_remove() { $iscsiadm -m node -p $1 -T $2 -u } open_iscsi_status() { $iscsiadm -m session 2>/dev/null | grep -qs "$2$" } # # NB: this is udev specific! # wait_for_udev() { dev=/dev/disk/by-path/ip-$portal-iscsi-$OCF_RESKEY_target while :; do ls $dev* >/dev/null 2>&1 && break ocf_log warning "waiting for udev to create $dev" sleep 1 done } iscsi_status() { if $disk_status "$portal" $OCF_RESKEY_target; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } iscsi_start() { if iscsi_status; then ocf_log info "iscsi $portal $OCF_RESKEY_target already running" return $OCF_SUCCESS else $add_disk $portal $OCF_RESKEY_target || return $OCF_ERR_GENERIC case "$udev" in [Yy]es) wait_for_udev || return $OCF_ERR_GENERIC ;; *) ;; esac if iscsi_status; then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi fi } iscsi_stop() { if iscsi_status; then $remove_disk $portal $OCF_RESKEY_target || return $OCF_ERR_GENERIC if iscsi_status; then return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi else ocf_log info "iscsi $portal $OCF_RESKEY_target already stopped" return $OCF_SUCCESS fi } iscsi_monitor() { if $disk_status "$portal" $OCF_RESKEY_target; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } # # 'main' starts here... # if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) iscsi_methods exit $OCF_SUCCESS;; esac if [ x = "x$OCF_RESKEY_target" ]; then ocf_log err "target parameter not set" exit $OCF_ERR_CONFIGURED fi if [ x = "x$OCF_RESKEY_portal" ]; then ocf_log err "portal parameter not set" exit $OCF_ERR_CONFIGURED fi case `uname` in Linux) setup=open_iscsi_setup ;; *) ocf_log info "platform `uname` may not be supported" setup=open_iscsi_setup ;; esac LSB_STATUS_STOPPED=3 $setup -rc=$? -if [ $rc -ne 0 ]; then +setup_rc=$? +if [ $setup_rc -gt 1 ]; then ocf_log info "iscsi initiator utilities not installed or not setup" case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; - *) exit $rc;; + *) exit $OCF_ERR_INSTALLED;; esac fi if [ `id -u` != 0 ]; then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi discovery_type=${OCF_RESKEY_discovery_type} udev=${OCF_RESKEY_udev} $discovery # discover and setup the real portal string (address) case $? in 0) ;; 1) [ "$1" = stop ] && exit $OCF_SUCCESS [ "$1" = monitor ] && exit $OCF_NOT_RUNNING [ "$1" = status ] && exit $LSB_STATUS_STOPPED exit $OCF_ERR_GENERIC ;; 2) [ "$1" = stop ] && { iscsi_monitor || exit $OCF_SUCCESS } ocf_is_probe && { iscsi_monitor; exit } exit $OCF_ERR_GENERIC ;; 3) ocf_is_probe && exit $OCF_NOT_RUNNING + if ! is_iscsid_running; then + [ $setup_rc -eq 1 ] && + ocf_log warning "iscsid.startup probably not correctly set in /etc/iscsi/iscsid.conf" + [ "$1" = stop ] && exit $OCF_SUCCESS + exit $OCF_ERR_INSTALLED + fi exit $OCF_ERR_GENERIC ;; esac # which method was invoked? case "$1" in start) iscsi_start ;; stop) iscsi_stop ;; status) if iscsi_status then echo iscsi target $OCF_RESKEY_target running exit $OCF_SUCCESS else echo iscsi target $OCF_RESKEY_target stopped exit $OCF_NOT_RUNNING fi ;; monitor) iscsi_status ;; validate-all) # everything already validated # just exit successfully here. exit $OCF_SUCCESS;; *) iscsi_methods exit $OCF_ERR_UNIMPLEMENTED;; esac # # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/heartbeat/lxc b/heartbeat/lxc index f53475763..43e505ab9 100755 --- a/heartbeat/lxc +++ b/heartbeat/lxc @@ -1,320 +1,323 @@ #!/bin/bash # Should now conform to guidelines: http://www.linux-ha.org/doc/dev-guides/ra-dev-guide.html # # LXC (Linux Containers) OCF RA. # Used to cluster enable the start, stop and monitoring of a LXC container. # # Copyright (c) 2011 AkurIT.com.au, Darren Thompson # All Rights Reserved. # # Without limiting the rights of the original copyright holders # This resource is licensed under GPL version 2 # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # OCF instance parameters # OCF_RESKEY_container # OCF_RESKEY_config # OCF_RESKEY_log # OCF_RESKEY_use_screen # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_log_default="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.log" OCF_RESKEY_use_screen_default="false" : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_use_screen=${OCF_RESKEY_use_screen_default}} # Set default TRANS_RES_STATE (temporary file to "flag" if resource was stated but not stopped) TRANS_RES_STATE="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.state" meta_data() { cat < 0.1 Allows LXC containers to be managed by the cluster. If the container is running "init" it will also perform an orderly shutdown. It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" I have absolutly no idea how this is done with 'upstart' or 'systemd', YMMV if your container is using one of them. Manages LXC containers The unique name for this 'Container Instance' e.g. 'test1'. Container Name Absolute path to the file holding the specific configuration for this container e.g. '/etc/lxc/test1/config'. The LXC config file. Absolute path to the container log file Container log file Provides the option of capturing the 'root console' from the container and showing it on a separate screen. To see the screen output run 'screen -r {container name}' The default value is set to 'false', change to 'true' to activate this option Use 'screen' for container 'root console' output END } LXC_usage() { cat <${CGROUP_MOUNT_POINT}/notify_on_release return 0 } LXC_start() { # put this here as it's so long it gets messy later!!! if ocf_is_true $OCF_RESKEY_use_screen; then STARTCMD="screen -dmS ${OCF_RESKEY_container} lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log}" else STARTCMD="lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log} -d" fi LXC_status if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already running" ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi ocf_log info "Starting" ${OCF_RESKEY_container} ocf_run ${STARTCMD} || exit $OCF_ERR_GENERIC # Spin on status, wait for the cluster manager to time us out if # we fail while ! LXC_status; do ocf_log info "Container ${OCF_RESKEY_container} has not started, waiting" sleep 1 done ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } LXC_stop() { local shutdown_timeout local now LXC_status if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi # If the container is running "init" and is able to perform and orderly shutdown, then it should be done. # It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. # On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" typeset -i PID=0 # This should work for traditional 'sysvinit' and 'upstart' lxc-ps -C init -opid |while read CN PID ;do [ $PID -gt 1 ] || continue [ "$CN" = "${OCF_RESKEY_container}" ] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"sysV init\" or \"upstart\"" kill -PWR $PID done # This should work for containers using 'systemd' instead of 'init' lxc-ps -C systemd -opid |while read CN PID ;do [[ $PID -gt 1 ]] || continue [[ "$CN" = "${OCF_RESKEY_container}" ]] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"systemd\"" kill -PWR $PID done # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds now=$(date +%s) shutdown_timeout=$(( $now + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $now -lt $shutdown_timeout ]; do LXC_status status=$? case $status in "$OCF_NOT_RUNNING") ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS ;; "$OCF_SUCCESS") # Container is still running, keep waiting (until # shutdown_timeout expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac now=$(date +%s) done # If the container is still running, it will be stopped now. regardless of state! ocf_run lxc-stop -n ${OCF_RESKEY_container} || exit $OCF_ERR_GENERIC ocf_log info "Container" ${OCF_RESKEY_container} "stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS } LXC_status() { - S=`lxc-info -n ${OCF_RESKEY_container}` + # run lxc-info with -s option for LXC-0.7.5 or later + local lxc_info_opt="-s" + ocf_version_cmp "`lxc-version | cut -d' ' -f 3`" 0.7.5 && lxc_info_opt="" + S=`lxc-info $lxc_info_opt -n ${OCF_RESKEY_container}` ocf_log debug "State of ${OCF_RESKEY_container}: $S" if [[ "${S##* }" = "RUNNING" ]] ; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } LXC_monitor() { LXC_status && return $OCF_SUCCESS if [ -f $TRANS_RES_STATE ]; then ocf_log err "${OCF_RESKEY_container} is not running, but state file ${TRANS_RES_STATE} exists." exit $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } LXC_validate() { # Quick check that all required attributes are set if [ -z "${OCF_RESKEY_container}" ]; then ocf_log err "LXC container name not set!" exit $OCF_ERR_CONFIGURED fi if [ -z "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration filename name not set!" exit $OCF_ERR_CONFIGURED fi # Tests that apply only to non-probes if ! ocf_is_probe; then if ! [ -f "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration file \"${OCF_RESKEY_config}\" missing or not found!" exit $OCF_ERR_INSTALLED fi if ocf_is_true $OCF_RESKEY_use_screen; then check_binary screen fi check_binary lxc-start check_binary lxc-stop check_binary lxc-ps check_binary lxc-info fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then LXC_usage exit $OCF_ERR_ARGS fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) LXC_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test LXC_validate case $__OCF_ACTION in start) LXC_start;; stop) LXC_stop;; status) LXC_status;; monitor) LXC_monitor;; validate-all) ;; *) LXC_usage ocf_log err "$0 was called with unsupported arguments: $*" exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/mysql b/heartbeat/mysql index 8f1ad462d..1ca9281ef 100755 --- a/heartbeat/mysql +++ b/heartbeat/mysql @@ -1,1084 +1,1084 @@ #!/bin/sh # # # MySQL # # Description: Manages a MySQL database as Linux-HA resource # # Authors: Alan Robertson: DB2 Script # Jakub Janczak: rewrite as MySQL # Andrew Beekhof: cleanup and import # Sebastian Reitenbach: add OpenBSD defaults, more cleanup # Narayan Newton: add Gentoo/Debian defaults # Marian Marinov, Florian Haas: add replication capability # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # (c) 2002-2005 International Business Machines, Inc. # 2005-2010 Linux-HA contributors # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 mysql # # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_binary # OCF_RESKEY_client_binary # OCF_RESKEY_config # OCF_RESKEY_datadir # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_test_table # OCF_RESKEY_test_user # OCF_RESKEY_test_passwd # OCF_RESKEY_enable_creation # OCF_RESKEY_additional_parameters # OCF_RESKEY_log # OCF_RESKEY_pid # OCF_RESKEY_socket ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Fill in some defaults if no values are specified HOSTOS=`uname` if [ "X${HOSTOS}" = "XOpenBSD" ];then OCF_RESKEY_binary_default="/usr/local/bin/mysqld_safe" OCF_RESKEY_config_default="/etc/my.cnf" OCF_RESKEY_datadir_default="/var/mysql" OCF_RESKEY_user_default="_mysql" OCF_RESKEY_group_default="_mysql" OCF_RESKEY_log_default="/var/log/mysqld.log" OCF_RESKEY_pid_default="/var/mysql/mysqld.pid" OCF_RESKEY_socket_default="/var/run/mysql/mysql.sock" else OCF_RESKEY_binary_default="/usr/bin/safe_mysqld" OCF_RESKEY_config_default="/etc/my.cnf" OCF_RESKEY_datadir_default="/var/lib/mysql" OCF_RESKEY_user_default="mysql" OCF_RESKEY_group_default="mysql" OCF_RESKEY_log_default="/var/log/mysqld.log" OCF_RESKEY_pid_default="/var/run/mysql/mysqld.pid" OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock" fi OCF_RESKEY_client_binary_default="mysql" OCF_RESKEY_test_user_default="root" OCF_RESKEY_test_table_default="mysql.user" OCF_RESKEY_test_passwd_default="" OCF_RESKEY_enable_creation_default=0 OCF_RESKEY_additional_parameters_default="" OCF_RESKEY_replication_port_default="3306" OCF_RESKEY_max_slave_lag_default="3600" OCF_RESKEY_evict_outdated_slaves_default="false" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}` : ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}} : ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}} : ${OCF_RESKEY_test_table=${OCF_RESKEY_test_table_default}} : ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}} : ${OCF_RESKEY_enable_creation=${OCF_RESKEY_enable_creation_default}} : ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}} : ${OCF_RESKEY_replication_user=${OCF_RESKEY_replication_user_default}} : ${OCF_RESKEY_replication_passwd=${OCF_RESKEY_replication_passwd_default}} : ${OCF_RESKEY_replication_port=${OCF_RESKEY_replication_port_default}} : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}} : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}} ####################################################################### usage() { cat < 1.0 Resource script for MySQL. May manage a standalone MySQL database, a clone set with externally managed replication, or a complete master/slave replication setup. Manages a MySQL database instance Location of the MySQL server binary MySQL server binary Location of the MySQL client binary MySQL client binary Configuration file MySQL config Directory containing databases MySQL datadir User running MySQL daemon MySQL user Group running MySQL daemon (for logfile and directory permissions) MySQL group The logfile to be used for mysqld. MySQL log file The pidfile to be used for mysqld. MySQL pid file The socket to be used for mysqld. MySQL socket Table to be tested in monitor statement (in database.table notation) MySQL test table MySQL test user MySQL test user MySQL test user password MySQL test user password If the MySQL database does not exist, it will be created Create the database if it does not exist Additional parameters which are passed to the mysqld on startup. (e.g. --skip-external-locking or --skip-grant-tables) Additional parameters to pass to mysqld MySQL replication user. This user is used for starting and stopping MySQL replication, for setting and resetting the master host, and for setting and unsetting read-only mode. Because of that, this user must have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, and PROCESS privileges on all nodes within the cluster. MySQL replication user MySQL replication password. Used for replication client and slave. MySQL replication user password The port on which the Master MySQL instance is listening. MySQL replication port The maximum number of seconds a replication slave is allowed to lag behind its master. Do not set this to zero. What the cluster manager does in case a slave exceeds this maximum lag is determined by the evict_outdated_slaves parameter. Maximum time (seconds) a MySQL slave is allowed to lag behind a master If set to true, any slave which is more than max_slave_lag seconds behind the master has its MySQL instance shut down. If this parameter is set to false in a primitive or clone resource, it is simply ignored. If set to false in a master/slave resource, then exceeding the maximum slave lag will merely push down the master preference so the lagging slave is never promoted to the new master. Determines whether to shut down badly lagging slaves END } ####################################################################### # Convenience variables MYSQL=$OCF_RESKEY_client_binary MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10" MYSQL_OPTIONS_REPL="--user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd" CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot " HOSTNAME=`uname -n` CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME -l forever" INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'` ####################################################################### # Convenience functions set_read_only() { # Sets or unsets read-only mode. Accepts one boolean as its # optional argument. If invoked without any arguments, defaults to # enabling read only mode. Should only be set in master/slave # setups. # Returns $OCF_SUCCESS if the operation succeeds, or # $OCF_ERR_GENERIC if it fails. local ro_val if ocf_is_true $1; then ro_val="on" else ro_val="off" fi local mysql_options mysql_options="$MYSQL_OPTIONS_LOCAL" if [ -n $OCF_RESKEY_replication_user ]; then mysql_options="$mysql_options $MYSQL_OPTIONS_REPL" fi ocf_run $MYSQL $mysql_options \ -e "SET GLOBAL read_only=${ro_val}" } get_read_only() { # Check if read-only is set local mysql_options local read_only_state mysql_options="$MYSQL_OPTIONS_LOCAL" if [ -n $OCF_RESKEY_replication_user ]; then mysql_options="$mysql_options $MYSQL_OPTIONS_REPL" fi read_only_state=`$MYSQL $mysql_options \ -e "SHOW VARIABLES" | grep read_only | awk '{print $2}'` if [ "$read_only_state" = "ON" ]; then return 0 else return 1 fi } is_slave() { # Determine whether the machine is currently running as a MySQL # slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW # SLAVE STATUS creates an empty result set, 0 otherwise. local rc local tmpfile local mysql_options # Check whether this machine should be slave if ! ocf_is_ms || ! get_read_only; then return 1; fi tmpfile=`mktemp ${HA_RSCTMP}/is_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX` mysql_options="$MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL" $MYSQL $mysql_options \ -e 'SHOW SLAVE STATUS\G' > $tmpfile # "SHOW SLAVE STATUS" returns an empty set if instance is not a # replication slave if [ -s $tmpfile ]; then rm -f $tmpfile return 0 fi rm -f $tmpfile return 1 } parse_slave_info() { # Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2 sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2 } get_slave_info() { # Warning: this sets $tmpfile and LEAVE this file! You must delete it after use! local mysql_options tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX` mysql_options="$MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL" $MYSQL $mysql_options \ -e 'SHOW SLAVE STATUS\G' > $tmpfile if [ -s $tmpfile ]; then master_host=`parse_slave_info Master_Host $tmpfile` master_user=`parse_slave_info Master_User $tmpfile` master_port=`parse_slave_info Master_Port $tmpfile` master_log_file=`parse_slave_info Master_Log_File $tmpfile` master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile` slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile` slave_io=`parse_slave_info Slave_IO_Running $tmpfile` last_errno=`parse_slave_info Last_Errno $tmpfile` secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile` ocf_log debug "MySQL instance running as a replication slave" else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave ocf_log err "check_slave invoked on an instance that is not a replication slave." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } check_slave() { # Checks slave status local rc get_slave_info rc=$? if [ $rc -eq 0 ]; then if [ $last_errno -ne 0 ]; then # Whoa. Replication ran into an error. This slave has # diverged from its master. Make sure this resource # doesn't restart in place. ocf_log err "MySQL instance configured for replication, but replication has failed." ocf_log err "See $tmpfile for details" exit $OCF_ERR_INSTALLED fi if [ "$slave_io" != 'Yes' ]; then # Not necessarily a bad thing. The master may have # temporarily shut down, and the slave may just be # reconnecting. A warning can't hurt, though. ocf_log warn "MySQL Slave IO threads currently not running." fi if [ "$slave_sql" != 'Yes' ]; then # We don't have a replication SQL thread running. Not a # good thing. Try to recoved by restarting the resource in # place. ocf_log err "MySQL Slave SQL threads currently not running." ocf_log err "See $tmpfile for details" exit $OCF_ERR_GENERIC fi if ocf_is_true $OCF_RESKEY_evict_outdated_slaves; then # We're supposed to bail out if we lag too far # behind. Let's check our lag. if [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then ocf_log err "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)." ocf_log err "See $tmpfile for details" exit $OCF_ERR_INSTALLED fi elif ocf_is_ms; then # Even if we're not set to evict lagging slaves, we can # still use the seconds behind master value to set our # master preference. local master_pref master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind})) if [ $master_pref -lt 0 ]; then # Sanitize a below-zero preference to just zero master_pref=0 fi $CRM_MASTER -v $master_pref fi ocf_log debug "MySQL instance running as a replication slave" rm -f $tmpfile else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave rm -f $tmpfile ocf_log err "check_slave invoked on an instance that is not a replication slave." exit $OCF_ERR_GENERIC fi } set_master() { local new_master_host master_log_file master_log_pos local master_params new_master_host=$1 # Keep replication position get_slave_info if [ "$master_log_file" -a "$new_master_host" = "$master_host" ]; then # master_params=", MASTER_LOG_FILE='$master_log_file', \ # MASTER_LOG_POS=$master_log_pos" ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos" rm -f $tmpfile return else master_log_file=`$CRM_ATTR -n $new_master_host-log-file-${INSTANCE_ATTR_NAME} -q -G` master_log_pos=`$CRM_ATTR -n $new_master_host-log-pos-${INSTANCE_ATTR_NAME} -q -G` if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then master_params=", MASTER_LOG_FILE='$master_log_file', \ MASTER_LOG_POS=$master_log_pos" ocf_log info "Restored master pos for $new_master_host : $master_log_file:$master_log_pos" fi fi # Informs the MySQL server of the master to replicate # from. Accepts one mandatory argument which must contain the host # name of the new master host. The master must either be unchanged # from the laste master the slave replicated from, or freshly # reset with RESET MASTER. ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \ -e "CHANGE MASTER TO MASTER_HOST='$new_master_host', \ MASTER_USER='$OCF_RESKEY_replication_user', \ MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params" rm -f $tmpfile } unset_master(){ # Instructs the MySQL server to stop replicating from a master # host. # If we're currently not configured to be replicating from any # host, then there's nothing to do. But we do log a warning as # no-one but the CRM should be touching the MySQL master/slave # configuration. if ! is_slave; then ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave" return $OCF_SUCCESS fi local mysql_options mysql_options="$MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL" local tmpfile tmpfile=`mktemp ${HA_RSCTMP}/unset_master.${OCF_RESOURCE_INSTANCE}.XXXXXX` # First, stop the slave I/O thread and wait for relay log # processing to complete ocf_run $MYSQL $mysql_options \ -e "STOP SLAVE IO_THREAD" if [ $? -gt 0 ]; then ocf_log err "Error stopping slave IO thread" exit $OCF_ERR_GENERIC fi while true; do $MYSQL $mysql_options \ -e 'SHOW PROCESSLIST\G' > $tmpfile - if grep 'Has read all relay log' $tmpfile >/dev/null; then + if grep '[Hh]as read all relay log' $tmpfile >/dev/null; then ocf_log info "MySQL slave has finished processing relay log" break fi if ! grep -q 'system user' $tmpfile; then ocf_log info "Slave not runnig - not waiting to finish" break fi ocf_log info "Waiting for MySQL slave to finish processing relay log" sleep 1 done rm -f $tmpfile # Now, stop all slave activity and unset the master host ocf_run $MYSQL $mysql_options \ -e "STOP SLAVE" if [ $? -gt 0 ]; then ocf_log err "Error stopping rest slave threads" exit $OCF_ERR_GENERIC fi #Save current state get_slave_info $CRM_ATTR -n $master_host-log-file-${INSTANCE_ATTR_NAME} -v $master_log_file $CRM_ATTR -n $master_host-log-pos-${INSTANCE_ATTR_NAME} -v $master_log_pos rm -f $tmpfile ocf_run $MYSQL $mysql_options \ -e "CHANGE MASTER TO MASTER_HOST=''" if [ $? -gt 0 ]; then ocf_log err "Failed to set master" exit $OCF_ERR_GENERIC fi } # Start replication as slave. Master hostname as parameter start_slave() { local master_host master_host="$1" # Remove state attributes - it will be invalid after START SLAVE $CRM_ATTR -n $master_host-log-file-${INSTANCE_ATTR_NAME} -D $CRM_ATTR -n $master_host-log-pos-${INSTANCE_ATTR_NAME} -D ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" } ####################################################################### # Functions invoked by resource manager actions mysql_validate() { check_binary $OCF_RESKEY_binary check_binary $OCF_RESKEY_client_binary if [ ! -f $OCF_RESKEY_config ]; then ocf_log err "Config $OCF_RESKEY_config doesn't exist"; return $OCF_ERR_INSTALLED; fi if [ ! -d $OCF_RESKEY_datadir ]; then ocf_log err "Datadir $OCF_RESKEY_datadir doesn't exist"; return $OCF_ERR_INSTALLED; fi getent passwd $OCF_RESKEY_user >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "User $OCF_RESKEY_user doesn't exit"; return $OCF_ERR_INSTALLED; fi getent group $OCF_RESKEY_group >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "Group $OCF_RESKEY_group doesn't exist"; return $OCF_ERR_INSTALLED; fi true } mysql_status() { # Set the log level of the error message (default:err) local loglevel loglevel=${1:-err} if [ ! -e $OCF_RESKEY_pid ]; then ocf_log $loglevel "MySQL is not running" return $OCF_NOT_RUNNING; fi pid=`cat $OCF_RESKEY_pid`; if [ -d /proc -a -d /proc/1 ]; then [ "u$pid" != "u" -a -d /proc/$pid ] else kill -s 0 $pid >/dev/null 2>&1 fi if [ $? -eq 0 ]; then return $OCF_SUCCESS; else ocf_log $loglevel "MySQL not running: removing old PID file" rm -f $OCF_RESKEY_pid return $OCF_NOT_RUNNING; fi } mysql_monitor() { local rc local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi mysql_status $status_loglevel rc=$? # If status returned an error, return that immediately if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then # Check if this instance is configured as a slave, and if so # check slave status if is_slave; then check_slave fi local mysql_options mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd" # Check for test table ocf_run -q $MYSQL $mysql_options \ -e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table" rc=$? if [ $rc -ne 0 ]; then ocf_log err "Failed to select from $OCF_RESKEY_test_table"; return $OCF_ERR_GENERIC; fi fi if ocf_is_ms && ! get_read_only; then ocf_log debug "MySQL monitor succeeded (master)"; return $OCF_RUNNING_MASTER else ocf_log debug "MySQL monitor succeeded"; return $OCF_SUCCESS fi } mysql_start() { mysql_status info if [ $? = $OCF_SUCCESS ]; then ocf_log info "MySQL already running" return $OCF_SUCCESS fi touch $OCF_RESKEY_log chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log chmod 0640 $OCF_RESKEY_log [ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log if ocf_is_true "$OCF_RESKEY_enable_creation" && [ ! -d $OCF_RESKEY_datadir/mysql ] ; then ocf_log info "Initializing MySQL database: " $MYSQL_BINDIR/mysql_install_db --datadir=$OCF_RESKEY_datadir rc=$? if [ $rc -ne 0 ] ; then ocf_log err "Initialization failed: $rc"; exit $OCF_ERR_GENERIC fi chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_datadir fi pid_dir=`dirname $OCF_RESKEY_pid` if [ ! -d $pid_dir ] ; then ocf_log info "Creating PID dir: $pid_dir" mkdir -p $pid_dir chown $OCF_RESKEY_user:$OCF_RESKEY_group $pid_dir fi socket_dir=`dirname $OCF_RESKEY_socket` if [ ! -d $socket_dir ] ; then ocf_log info "Creating socket dir: $socket_dir" mkdir -p $socket_dir chown $OCF_RESKEY_user:$OCF_RESKEY_group $socket_dir fi # Regardless of whether we just created the directory or it # already existed, check whether it is writable by the configured # user for dir in $pid_dir $socket_dir; do if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then ocf_log err "Directory $dir is not writable by $OCF_RESKEY_user" exit $OCF_ERR_PERM; fi done # Uncomment to perform permission clensing # - not convinced this should be enabled by default # #chmod 0755 $OCF_RESKEY_datadir #chown -R $OCF_RESKEY_user $OCF_RESKEY_datadir #chgrp -R $OCF_RESKEY_group $OCF_RESKEY_datadir mysql_extra_params= if ocf_is_ms; then mysql_extra_params="--skip-slave-start" fi ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \ --pid-file=$OCF_RESKEY_pid \ --socket=$OCF_RESKEY_socket \ --datadir=$OCF_RESKEY_datadir \ --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \ $mysql_extra_params >/dev/null 2>&1 & rc=$? if [ $rc != 0 ]; then ocf_log err "MySQL start command failed: $rc" return $rc fi # Spin waiting for the server to come up. # Let the CRM/LRM time us out if required. start_wait=1 while [ $start_wait = 1 ]; do mysql_status info rc=$? if [ $rc = $OCF_SUCCESS ]; then start_wait=0 elif [ $rc != $OCF_NOT_RUNNING ]; then ocf_log err "MySQL start failed: $rc" return $rc fi sleep 2 done if ocf_is_ms; then # We're configured as a stateful resource. We must start as # slave by default. At this point we don't know if the CRM has # already promoted a master. So, we simply start in read only # mode. set_read_only on # Now, let's see whether there is a master. We might be a new # node that is just joining the cluster, and the CRM may have # promoted a master before. master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "` - if [ "$master_host" -a "$master_host" != `uname -n` ]; then + if [ "$master_host" -a "$master_host" != ${HOSTNAME} ]; then ocf_log info "Changing MySQL configuration to replicate from $master_host." set_master $master_host ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \ -e 'START SLAVE' if [ $? -ne 0 ]; then ocf_log err "Failed to start slave" return $OCF_ERR_GENERIC fi else ocf_log info "No MySQL master present, clearing replication state" unset_master fi master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname` - if [ "$master_host" -a "$master_host" != `uname -n` ]; then + if [ "$master_host" -a "$master_host" != ${HOSTNAME} ]; then ocf_log info "Changing MySQL configuration to replicate from $master_host." set_master $master_host start_slave $master_host if [ $? -ne 0 ]; then ocf_log err "Failed to start slave" return $OCF_ERR_GENERIC fi else ocf_log info "No MySQL master present - clearing replication state" unset_master fi # We also need to set a master preference, otherwise Pacemaker # won't ever promote us in the absence of any explicit # preference set by the administrator. We choose a low # greater-than-zero preference. $CRM_MASTER -v 1 fi # Initial monitor action if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" -a -n "$OCF_RESKEY_test_passwd" ]; then OCF_CHECK_LEVEL=10 fi mysql_monitor rc=$? if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then ocf_log err "Failed initial monitor action" return $rc fi ocf_log info "MySQL started" return $OCF_SUCCESS } mysql_stop() { if ocf_is_ms; then # clear preference for becoming master $CRM_MASTER -D fi mysql_status info rc=$? if [ $rc = $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi pid=`cat $OCF_RESKEY_pid 2> /dev/null ` /bin/kill $pid > /dev/null rc=$? if [ $rc != 0 ]; then ocf_log err "MySQL couldn't be stopped" return $OCF_ERR_GENERIC fi # stop waiting shutdown_timeout=15 if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) fi count=0 while [ $count -lt $shutdown_timeout ]; do mysql_status info rc=$? if [ $rc = $OCF_NOT_RUNNING ]; then break fi count=`expr $count + 1` sleep 1 ocf_log info "MySQL still hasn't stopped yet. Waiting..." done mysql_status info if [ $? != $OCF_NOT_RUNNING ]; then ocf_log warn "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..." /bin/kill -KILL $pid > /dev/null fi ocf_log info "MySQL stopped"; rm -f /var/lock/subsys/mysqld rm -f $OCF_RESKEY_socket return $OCF_SUCCESS } mysql_promote() { if ( ! mysql_status ); then return $OCF_NOT_RUNNING fi ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE" set_read_only off || return $OCF_ERR_GENERIC # Existing master gets a higher-than-default master preference, so # the cluster manager does not shuffle the master role around # unnecessarily $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1)) return $OCF_SUCCESS } mysql_demote() { if ! mysql_status; then return $OCF_NOT_RUNNING fi set_read_only on if [ $? -ne 0 ]; then ocf_log err "Failed to set read-only"; return $OCF_ERR_GENERIC; fi # Return master preference to default, so the cluster manager gets # a chance to select a new master $CRM_MASTER -v 1 } mysql_notify() { # If not configured as a Stateful resource, we make no sense of # notifications. if ! ocf_is_ms; then ocf_log info "This agent makes no use of notifications unless running in master/slave mode." return $OCF_SUCCESS fi local type_op type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" ocf_log debug "Received $type_op notification." case "$type_op" in 'pre-promote') # A new master is about to being promoted. It's not in # read-write mode yet (that only occurs when it actually # executes the promote action), so we can now safely # connect to it and wait for it to start replicating. local master_host local master_status master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname` if ( ! mysql_status ); then return $OCF_NOT_RUNNING fi if [ -z "$master_host" ]; then ocf_log err "Unable to determine master host!" return $OCF_ERR_GENERIC fi - if [ $master_host = `uname -n` ]; then + if [ $master_host = ${HOSTNAME} ]; then ocf_log info "This will be new master" else ocf_log info "Changing MySQL configuration to replicate from $master_host" set_master $master_host if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi fi ;; 'post-promote') # The master has completed its promotion. Now is a good # time to check whether our replication slave is working # correctly. master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname` - if [ "$master_host" = `uname -n` ]; then + if [ "$master_host" = ${HOSTNAME} ]; then ocf_log info "Ignoring post-promote notification for my own promotion." return $OCF_SUCCESS fi start_slave $master_host if [ $? -ne 0 ]; then ocf_log err "Failed to start slave" return $OCF_ERR_GENERIC fi ;; 'post-demote') demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname` - if [ $demote_host = `uname -n` ]; then + if [ $demote_host = ${HOSTNAME} ]; then ocf_log info "Ignoring post-demote notification for my own demotion." return $OCF_SUCCESS fi ocf_log info "post-demote notification for $demote_host." # The former master has just been gracefully demoted. unset_master ;; *) return $OCF_SUCCESS ;; esac } ####################################################################### case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac mysql_validate rc=$? LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi # What kind of method was invoked? case "$1" in start) mysql_start;; stop) mysql_stop;; status) mysql_status;; monitor) mysql_monitor;; promote) mysql_promote;; demote) mysql_demote;; notify) mysql_notify;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/named b/heartbeat/named index dbcaa54ff..bc19f4a67 100755 --- a/heartbeat/named +++ b/heartbeat/named @@ -1,454 +1,454 @@ #!/bin/sh # # Description: Manages a named (Bind) server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) # # Copyright: 2011 Serge Dubrouski # # License: GNU General Public License (GPL) # ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs #Defaults OCF_RESKEY_named_default="/usr/sbin/named" OCF_RESKEY_rndc_default="/usr/sbin/rndc" OCF_RESKEY_host_default="/usr/bin/host" OCF_RESKEY_named_user_default=named OCF_RESKEY_named_config_default="" OCF_RESKEY_named_pidfile_default="/var/run/named/named.pid" OCF_RESKEY_named_rootdir_default="" OCF_RESKEY_named_options_default="" OCF_RESKEY_named_keytab_file_default="" OCF_RESKEY_monitor_request_default="localhost" OCF_RESKEY_monitor_response_default="127.0.0.1" OCF_RESKEY_monitor_ip_default="127.0.0.1" : ${OCF_RESKEY_named=${OCF_RESKEY_named_default}} : ${OCF_RESKEY_rndc=${OCF_RESKEY_rndc_default}} : ${OCF_RESKEY_host=${OCF_RESKEY_host_default}} : ${OCF_RESKEY_named_user=${OCF_RESKEY_named_user_default}} : ${OCF_RESKEY_named_config=${OCF_RESKEY_named_config_default}} : ${OCF_RESKEY_named_pidfile=${OCF_RESKEY_named_pidfile_default}} : ${OCF_RESKEY_named_rootdir=${OCF_RESKEY_named_rootdir_default}} : ${OCF_RESKEY_named_options=${OCF_RESKEY_named_options_default}} : ${OCF_RESKEY_named_keytab_file=${OCF_RESKEY_named_keytab_file_default}} : ${OCF_RESKEY_monitor_request=${OCF_RESKEY_monitor_request_default}} : ${OCF_RESKEY_monitor_response=${OCF_RESKEY_monitor_response_default}} : ${OCF_RESKEY_monitor_ip=${OCF_RESKEY_monitor_ip_default}} usage() { cat < 1.0 Resource script for named (Bind) server. It manages named as an HA resource. Manages a named server Path to the named command. named Path to the rndc command. rndc Path to the host command. host User that should own named process. named_user Configuration file for named. named_config PIDFILE file for named. named_pidfile Directory that named should use for chroot if any. named_rootdir Options for named process if any. named_options named service keytab file (for GSS-TSIG). named_keytab_file Request that shall be sent to named for monitoring. Usually an A record in DNS. monitor_request Expected response from named server. monitor_response IP Address where named listens. monitor_ip EOF } # # methods: What methods/operations do we support? # named_methods() { cat </dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "User $OCF_RESKEY_named_user doesn't exist"; return $OCF_ERR_INSTALLED; fi if [ -z "$OCF_RESKEY_monitor_request" -o \ -z "$OCF_RESKEY_monitor_response" -o \ -z "$OCF_RESKEY_monitor_ip" ]; then ocf_log err "None of monitor_request, monitor_response, and monitor_ip can be empty" return $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # # named_getpid. Get pid of named process with a given parameters. # named_getpid () { local pattern="$OCF_RESKEY_named" if [ -n "$OCF_RESKEY_named_rootdir" ]; then pattern="$pattern.*-t $OCF_RESKEY_named_rootdir" fi if [ -n "$OCF_RESKEY_named_config" ]; then pattern="$pattern.*-c $OCF_RESKEY_named_config" fi pid=`ps -e -o pid,command,args | grep "$pattern" | grep -v grep | awk '{print $1}'` echo $pid } # # named_status. Simple check of the status of named process by pidfile. # named_status () { ocf_pidfile_status ${OCF_RESKEY_named_pidfile} >/dev/null 2>&1 } # # named_monitor. Send a request to named and check response. # named_monitor() { local output if ! named_status then ocf_log info "named is down" return $OCF_NOT_RUNNING fi output=`$OCF_RESKEY_host $OCF_RESKEY_monitor_request $OCF_RESKEY_monitor_ip` if [ $? -ne 0 ] || ! echo $output | grep -q '.* has .*address '"$OCF_RESKEY_monitor_response" then ocf_log err "named didn't answer properly for $OCF_RESKEY_monitor_request." ocf_log err "Expected: $OCF_RESKEY_monitor_response." ocf_log err "Got: $output" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # Reload # named_reload() { $OCF_RESKEY_rndc reload >/dev/null || return $OCF_ERR_GENERIC return $OCF_SUCCESS } # # Start # named_start() { local root_dir_opt local pid root_dir_opt="" named_status && return $OCF_SUCCESS # Remove pidfile if exists rm -f ${OCF_RESKEY_named_pidfile} if [ -n "${OCF_RESKEY_named_rootdir}" -a "x${OCF_RESKEY_named_rootdir}" != "x/" ] then root_dir_opt="-t ${OCF_RESKEY_named_rootdir}" [ -s /etc/localtime ] && cp -fp /etc/localtime ${OCF_RESKEY_named_rootdir}/etc/localtime fi if [ -n "$OCF_RESKEY_named_config" ]; then OCF_RESKEY_named_options="-c $OCF_RESKEY_named_config $OCF_RESKEY_named_options" fi if ! ${OCF_RESKEY_named} -u ${OCF_RESKEY_named_user} $root_dir_opt ${OCF_RESKEY_named_options} then ocf_log err "named failed to start." return $OCF_ERR_GENERIC fi pid=`named_getpid` if [ -n "$pid" ]; then if [ ! -e ${OCF_RESKEY_named_pidfile} ]; then echo $pid > ${OCF_RESKEY_named_pidfile} fi else ocf_log err "named failed to start. Probably error in configuration." return $OCF_ERR_GENERIC fi while : do named_monitor && break sleep 1 ocf_log debug "named hasn't started yet." done ocf_log info "named has started." return $OCF_SUCCESS } # # Stop # named_stop () { local timeout local timewait named_status || return $OCF_SUCCESS $OCF_RESKEY_rndc stop >/dev/null & if [ $? -ne 0 ]; then ocf_log info "rndc stop failed. Killing named." kill `cat ${OCF_RESKEY_named_pidfile}` fi if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timewait=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timewait=20 fi sleep 1; timeout=0 # Sleep here for 1 sec to let rndc finish. while named_status ; do if [ $timeout -ge $timewait ]; then break else sleep 1 timeout=`expr $timeout + 1` ocf_log debug "named appears to hung, waiting ..." fi done #If still up if named_status 2>&1; then ocf_log err "named is still up! Killing" kill -9 `cat ${OCF_RESKEY_named_pidfile}` fi rm -f ${OCF_RESKEY_named_pidfile} return $OCF_SUCCESS } # Main part if [ $# -ne 1 ]; then usage exit $OCF_ERR_GENERIC fi case "$1" in methods) named_methods exit $?;; meta-data) named_meta_data exit $OCF_SUCCESS;; esac named_validate_all rc=$? [ "$1" = "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi -if [ "$EUID" != "0" ]; then +if [ `id -u` -ne 0 ]; then ocf_log err "$0 must be run as root" exit $OCF_ERR_GENERIC fi case "$1" in status) if named_status then ocf_log info "named is up" exit $OCF_SUCCESS else ocf_log info "named is down" exit $OCF_NOT_RUNNING fi;; monitor) named_monitor exit $?;; start) named_start exit $?;; stop) named_stop exit $?;; reload) named_reload exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver index 10200386b..296d6c878 100755 --- a/heartbeat/nfsserver +++ b/heartbeat/nfsserver @@ -1,262 +1,269 @@ #!/bin/sh # nfsserver # # Description: Manages nfs server as OCF resource # by hxinwei@gmail.com # License: GNU General Public License v2 (GPLv2) and later if [ -n "$OCF_DEBUG_LIBRARY" ]; then . $OCF_DEBUG_LIBRARY else : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs fi DEFAULT_INIT_SCRIPT="/etc/init.d/nfsserver" DEFAULT_NOTIFY_CMD="/sbin/sm-notify" nfsserver_meta_data() { cat < 1.0 Nfsserver helps to manage the Linux nfs server as a failover-able resource in Linux-HA. It depends on Linux specific NFS implementation details, so is considered not portable to other platforms yet. Manages an NFS server The default init script shipped with the Linux distro. The nfsserver resource agent offloads the start/stop/monitor work to the init script because the procedure to start/stop/monitor nfsserver varies on different Linux distro. Init script for nfsserver The tool to send out NSM reboot notification. Failover of nfsserver can be considered as rebooting to different machines. The nfsserver resource agent use this command to notify all clients about the happening of failover. The tool to send out notification. The nfsserver resource agent will save nfs related information in this specific directory. And this directory must be able to fail-over before nfsserver itself. Directory to store nfs server related information. -The floating IP address used to access the nfs service +Comma separated list of floating IP addresses used to access the nfs service -IP address. +IP addresses. END return $OCF_SUCCESS } nfsserver_usage() { cat < $fn 2>&1 rc=$? ocf_log debug `cat $fn` rm -f $fn #Adapte LSB status code to OCF return code if [ $rc -eq 0 ]; then return $OCF_SUCCESS elif [ $rc -eq 3 ]; then return $OCF_NOT_RUNNING else return $OCF_ERR_GENERIC fi } prepare_directory () { [ -d "$fp" ] || mkdir -p $fp [ -d "$fp/rpc_pipefs" ] || mkdir -p $fp/rpc_pipefs [ -d "$fp/sm" ] || mkdir -p $fp/sm [ -d "$fp/sm.ha" ] || mkdir -p $fp/sm.ha [ -d "$fp/sm.bak" ] || mkdir -p $fp/sm.bak [ -d "$fp/v4recovery" ] || mkdir -p $fp/v4recovery } is_bound () { mount | grep -q "$1 on $2 type none (.*bind)" return $? } bind_tree () { if is_bound $fp /var/lib/nfs; then ocf_log debug "$fp is already bound to /var/lib/nfs" return 0 fi mount --bind $fp /var/lib/nfs } unbind_tree () { if `mount | grep -q "rpc_pipefs on /var/lib/nfs/rpc_pipefs"`; then umount /var/lib/nfs/rpc_pipefs fi if is_bound $fp /var/lib/nfs; then umount /var/lib/nfs fi } nfsserver_start () { prepare_directory bind_tree - rm -f /var/lib/nfs/sm.ha/* > /dev/null 2>&1 - cp -f /var/lib/nfs/sm/* /var/lib/nfs/sm.ha > /dev/null 2>&1 + rm -rf /var/lib/nfs/sm.ha/* > /dev/null 2>&1 + cp -rf /var/lib/nfs/sm /var/lib/nfs/sm.bak /var/lib/nfs/state /var/lib/nfs/sm.ha > /dev/null 2>&1 ocf_log info "Starting NFS server ..." fn=`mktemp` ${OCF_RESKEY_nfs_init_script} start > $fn 2>&1 rc=$? ocf_log debug `cat $fn` rm -f $fn if [ $rc -ne 0 ]; then ocf_log err "Failed to start NFS server" return $rc fi #Notify the nfs server has been moved or rebooted #The init script do that already, but with the hostname, which may be ignored by client #we have to do it again with the nfs_ip - local opts="-v" + local opts="-f -v" echo $OCF_RESKEY_nfs_notify_cmd | grep -qws rpc.statd && opts="" - ${OCF_RESKEY_nfs_notify_cmd} $opts $OCF_RESKEY_nfs_ip -P /var/lib/nfs/sm.ha + rm -rf /var/lib/nfs/sm.ha.save > /dev/null 2>&1 + cp -rf /var/lib/nfs/sm.ha /var/lib/nfs/sm.ha.save > /dev/null 2>&1 + for ip in `echo ${OCF_RESKEY_nfs_ip} | sed 's/,/ /g'`; do + ${OCF_RESKEY_nfs_notify_cmd} $opts $ip -P /var/lib/nfs/sm.ha + rm -rf /var/lib/nfs/sm.ha > /dev/null 2>&1 + cp -rf /var/lib/nfs/sm.ha.save /var/lib/nfs/sm.ha > /dev/null 2>&1 + done + ocf_log info "NFS server started" return $OCF_SUCCESS } nfsserver_stop () { ocf_log info "Stopping NFS server ..." fn=`mktemp` ${OCF_RESKEY_nfs_init_script} stop > $fn 2>&1 rc=$? ocf_log debug `cat $fn` rm -f $fn if [ $rc -eq 0 ]; then unbind_tree ocf_log info "NFS server stopped" return $OCF_SUCCESS fi ocf_log err "Failed to stop NFS server" return $rc } nfsserver_validate () { check_binary ${OCF_RESKEY_nfs_init_script} check_binary ${OCF_RESKEY_nfs_notify_cmd} - if [ -z ${OCF_RESKEY_nfs_ip} ]; then + if [ x = x"${OCF_RESKEY_nfs_ip}" ]; then ocf_log err "nfs_ip not set" exit $OCF_ERR_CONFIGURED fi if [ x = "x$OCF_RESKEY_nfs_shared_infodir" ]; then ocf_log err "nfs_shared_infodir not set" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } if [ -n "$OCF_RESKEY_CRM_meta_clone" ]; then ocf_log err "THIS RA DO NOT SUPPORT CLONE MODE!" exit $OCF_ERR_CONFIGURED fi nfsserver_validate case $__OCF_ACTION in start) nfsserver_start ;; stop) nfsserver_stop ;; monitor) nfsserver_monitor ;; validate-all) nfsserver_validate ;; *) nfsserver_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in index 811c536a1..e1cd33fdf 100644 --- a/heartbeat/ocf-shellfuncs.in +++ b/heartbeat/ocf-shellfuncs.in @@ -1,591 +1,616 @@ # # # Common helper functions for the OCF Resource Agents supplied by # heartbeat. # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Build version: $Format:%H$ # TODO: Some of this should probably split out into a generic OCF # library for shell scripts, but for the time being, we'll just use it # ourselves... # # TODO wish-list: # - Generic function for evaluating version numbers # - Generic function(s) to extract stuff from our own meta-data # - Logging function which automatically adds resource identifier etc # prefixes # TODO: Move more common functionality for OCF RAs here. # # This was common throughout all legacy Heartbeat agents unset LC_ALL; export LC_ALL unset LANGUAGE; export LANGUAGE __SCRIPT_NAME=`basename $0` if [ -z "$OCF_ROOT" ]; then : ${OCF_ROOT=@OCF_ROOT_DIR@} fi if [ "$OCF_FUNCTIONS_DIR" = ${OCF_ROOT}/resource.d/heartbeat ]; then # old unset OCF_FUNCTIONS_DIR fi : ${OCF_FUNCTIONS_DIR:=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-binaries . ${OCF_FUNCTIONS_DIR}/ocf-returncodes . ${OCF_FUNCTIONS_DIR}/ocf-directories # Define OCF_RESKEY_CRM_meta_interval in case it isn't already set, # to make sure that ocf_is_probe() always works : ${OCF_RESKEY_CRM_meta_interval=0} ocf_is_root() { if [ X`id -u` = X0 ]; then true else false fi } ocf_maybe_random() { local rnd="$RANDOM" # Something sane-ish in case a shell doesn't support $RANDOM [ -n "$rnd" ] || rnd=$$ echo $rnd } # Portability comments: # o The following rely on Bourne "sh" pattern-matching, which is usually # that for filename generation (note: not regexp). # o The "*) true ;;" clause is probably unnecessary, but is included # here for completeness. # o The negation in the pattern uses "!". This seems to be common # across many OSes (whereas the alternative "^" fails on some). # o If an OS is encountered where this negation fails, then a possible # alternative would be to replace the function contents by (e.g.): # [ -z "`echo $1 | tr -d '[0-9]'`" ] # ocf_is_decimal() { case "$1" in ""|*[!0-9]*) # empty, or at least one non-decimal false ;; *) true ;; esac } ocf_is_true() { case "$1" in yes|true|1|YES|TRUE|ja|on|ON) true ;; *) false ;; esac } ocf_is_hex() { case "$1" in ""|*[!0-9a-fA-F]*) # empty, or at least one non-hex false ;; *) true ;; esac } ocf_is_octal() { case "$1" in ""|*[!0-7]*) # empty, or at least one non-octal false ;; *) true ;; esac } __ocf_set_defaults() { __OCF_ACTION="$1" # Return to sanity for the agents... unset LANG LC_ALL=C export LC_ALL # TODO: Review whether we really should source this. Or rewrite # to match some emerging helper function syntax...? This imports # things which no OCF RA should be using... # Strip the OCF_RESKEY_ prefix from this particular parameter if [ -z "$OCF_RESKEY_OCF_CHECK_LEVEL" ]; then : ${OCF_CHECK_LEVEL:=0} else : ${OCF_CHECK_LEVEL:=$OCF_RESKEY_OCF_CHECK_LEVEL} fi if [ ! -d "$OCF_ROOT" ]; then ha_log "ERROR: OCF_ROOT points to non-directory $OCF_ROOT." exit $OCF_ERR_GENERIC fi if [ -z "$OCF_RESOURCE_TYPE" ]; then : ${OCF_RESOURCE_TYPE:=$__SCRIPT_NAME} fi if [ -z "$OCF_RA_VERSION_MAJOR" ]; then : We are being invoked as an init script. : Fill in some things with reasonable values. : ${OCF_RESOURCE_INSTANCE:="default"} return 0 fi if [ "x$__OCF_ACTION" = "xmeta-data" ]; then OCF_RESOURCE_INSTANCE="undef" fi if [ -z "$OCF_RESOURCE_INSTANCE" ]; then ha_log "ERROR: Need to tell us our resource instance name." exit $OCF_ERR_ARGS fi } hadate() { date "+${HA_DATEFMT}" } set_logtag() { if [ -z "$HA_LOGTAG" ]; then if [ -n "$OCF_RESOURCE_INSTANCE" ]; then HA_LOGTAG="$__SCRIPT_NAME($OCF_RESOURCE_INSTANCE)[$$]" else HA_LOGTAG="$__SCRIPT_NAME[$$]" fi fi } ha_log() { local loglevel [ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY="" # if we're connected to a tty, then output to stderr if tty >/dev/null; then if [ "x$HA_debug" = "x0" -a "x$loglevel" = xdebug ] ; then return 0 fi if [ "$HA_LOGTAG" ]; then echo "$HA_LOGTAG: $*" else echo "$*" fi >&2 return 0 fi set_logtag if [ "x${HA_LOGD}" = "xyes" ] ; then ha_logger -t "${HA_LOGTAG}" "$@" if [ "$?" -eq "0" ] ; then return 0 fi fi if [ -n "$HA_LOGFACILITY" ] then : logging through syslog # loglevel is unknown, use 'notice' for now loglevel=notice case "${*}" in *ERROR*) loglevel=err;; *WARN*) loglevel=warning;; *INFO*|info) loglevel=info;; esac logger -t "$HA_LOGTAG" -p ${HA_LOGFACILITY}.${loglevel} "${*}" fi if [ -n "$HA_LOGFILE" ] then : appending to $HA_LOGFILE echo "$HA_LOGTAG: "`hadate`"${*}" >> $HA_LOGFILE fi if [ -z "$HA_LOGFACILITY" -a -z "$HA_LOGFILE" ] then : appending to stderr echo `hadate`"${*}" >&2 fi if [ -n "$HA_DEBUGLOG" ] then : appending to $HA_DEBUGLOG echo "$HA_LOGTAG: "`hadate`"${*}" >> $HA_DEBUGLOG fi } ha_debug() { if [ "x${HA_debug}" = "x0" ] ; then return 0 fi if tty >/dev/null; then if [ "$HA_LOGTAG" ]; then echo "$HA_LOGTAG: $*" else echo "$*" fi >&2 return 0 fi set_logtag if [ "x${HA_LOGD}" = "xyes" ] ; then ha_logger -t "${HA_LOGTAG}" -D "ha-debug" "$@" if [ "$?" -eq "0" ] ; then return 0 fi fi [ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY="" if [ -n "$HA_LOGFACILITY" ] then : logging through syslog logger -t "$HA_LOGTAG" -p "${HA_LOGFACILITY}.debug" "${*}" fi if [ -n "$HA_DEBUGLOG" ] then : appending to $HA_DEBUGLOG echo "$HA_LOGTAG: "`hadate`"${*}" >> $HA_DEBUGLOG fi if [ -z "$HA_LOGFACILITY" -a -z "$HA_DEBUGLOG" ] then : appending to stderr echo "$HA_LOGTAG: `hadate`${*}: ${HA_LOGFACILITY}" >&2 fi } ha_parameter() { local VALUE VALUE=`sed -e 's%[ ][ ]*% %' -e 's%^ %%' -e 's%#.*%%' $HA_CF | grep -i "^$1 " | sed 's%[^ ]* %%'` if [ "X$VALUE" = X ] then case $1 in keepalive) VALUE=2;; deadtime) ka=`ha_parameter keepalive` VALUE=`expr $ka '*' 2 '+' 1`;; esac fi echo $VALUE } ocf_log() { # TODO: Revisit and implement internally. if [ $# -lt 2 ] then ocf_log err "Not enough arguments [$#] to ocf_log." fi __OCF_PRIO="$1" shift __OCF_MSG="$*" case "${__OCF_PRIO}" in crit) __OCF_PRIO="CRIT";; err) __OCF_PRIO="ERROR";; warn) __OCF_PRIO="WARNING";; info) __OCF_PRIO="INFO";; debug)__OCF_PRIO="DEBUG";; *) __OCF_PRIO=`echo ${__OCF_PRIO}| tr '[a-z]' '[A-Z]'`;; esac if [ "${__OCF_PRIO}" = "DEBUG" ]; then ha_debug "${__OCF_PRIO}: $__OCF_MSG" else ha_log "${__OCF_PRIO}: $__OCF_MSG" fi } # # ocf_deprecated: Log a deprecation warning # Usage: ocf_deprecated [param-name] # Arguments: param-name optional, name of a boolean resource # parameter that can be used to suppress # the warning (default # "ignore_deprecation") ocf_deprecated() { local param param=${1:-ignore_deprecation} # don't use ${!param} here, it's a bashism if ! ocf_is_true $(eval echo \$OCF_RESKEY_$param); then ocf_log warn "This resource agent is deprecated" \ "and may be removed in a future release." \ "See the man page for details." \ "To suppress this warning, set the \"${param}\"" \ "resource parameter to true." fi } # # Ocf_run: Run a script, and log its output. # Usage: ocf_run [-q] [-info|-warn|-err] # -q: don't log the output of the command if it succeeds # -info|-warn|-err: log the output of the command at given # severity if it fails (defaults to err) # ocf_run() { local rc local output local verbose=1 local loglevel=err local var for var in 1 2 do case "$1" in "-q") verbose="" shift 1;; "-info"|"-warn"|"-err") loglevel=`echo $1 | sed -e s/-//g` shift 1;; *) ;; esac done output=`"$@" 2>&1` rc=$? output=`echo $output` if [ $rc -eq 0 ]; then if [ "$verbose" -a ! -z "$output" ]; then ocf_log info "$output" fi return $OCF_SUCCESS else if [ ! -z "$output" ]; then ocf_log $loglevel "$output" else ocf_log $loglevel "command failed: $*" fi return $rc fi } ocf_pidfile_status() { local pid pidfile=$1 if [ ! -e $pidfile ]; then # Not exists return 2 fi pid=`cat $pidfile` kill -0 $pid 2>&1 > /dev/null if [ $? = 0 ]; then return 0 fi # Stale return 1 } ocf_take_lock() { local lockfile=$1 local rnd=$(ocf_maybe_random) sleep 0.$rnd while ocf_pidfile_status $lockfile do ocf_log info "Sleeping until $lockfile is released..." sleep 0.$rnd done echo $$ > $lockfile } ocf_release_lock_on_exit() { local lockfile=$1 trap "rm -f $lockfile" EXIT } # returns true if the CRM is currently running a probe. A probe is # defined as a monitor operation with a monitoring interval of zero. ocf_is_probe() { [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" = 0 ] } # returns true if the resource is configured as a clone. This is # defined as a resource where the clone-max meta attribute is present, # and set to greater than zero. ocf_is_clone() { [ ! -z "${OCF_RESKEY_CRM_meta_clone_max}" ] && [ "${OCF_RESKEY_CRM_meta_clone_max}" -gt 0 ] } # returns true if the resource is configured as a multistate # (master/slave) resource. This is defined as a resource where the # master-max meta attribute is present, and set to greater than zero. ocf_is_ms() { [ ! -z "${OCF_RESKEY_CRM_meta_master_max}" ] && [ "${OCF_RESKEY_CRM_meta_master_max}" -gt 0 ] } # version check functions # allow . and - to delimit version numbers # max version number is 999 # letters and such are effectively ignored # ocf_is_ver() { echo $1 | grep '^[0-9][0-9.-]*[0-9]$' >/dev/null 2>&1 } ocf_ver2num() { echo $1 | awk -F'[.-]' ' {for(i=1; i<=NF; i++) s=s*1000+$i; print s} ' } +ocf_ver_level(){ + echo $1 | awk -F'[.-]' '{print NF}' +} +ocf_ver_complete_level(){ + local ver="$1" + local level="$2" + local i=0 + while [ $i -lt $level ]; do + ver=${ver}.0 + i=`expr $i + 1` + done + echo $ver +} # usage: ocf_version_cmp VER1 VER2 # version strings can contain digits, dots, and dashes # must start and end with a digit # returns: # 0: VER1 smaller (older) than VER2 # 1: versions equal # 2: VER1 greater (newer) than VER2 # 3: bad format ocf_version_cmp() { ocf_is_ver "$1" || return 3 ocf_is_ver "$2" || return 3 - local v1=`ocf_ver2num $1` - local v2=`ocf_ver2num $2` + local v1=$1 + local v2=$2 + local v1_level=`ocf_ver_level $v1` + local v2_level=`ocf_ver_level $v2` + local level_diff + if [ $v1_level -lt $v2_level ]; then + level_diff=`expr $v2_level - $v1_level` + v1=`ocf_ver_complete_level $v1 $level_diff` + elif [ $v1_level -gt $v2_level ]; then + level_diff=`expr $v1_level - $v2_level` + v2=`ocf_ver_complete_level $v2 $level_diff` + fi + v1=`ocf_ver2num $v1` + v2=`ocf_ver2num $v2` if [ $v1 -eq $v2 ]; then return 1 elif [ $v1 -lt $v2 ]; then return 0 else return 2 # -1 would look funny in shell ;-) fi } # usage: dirname DIR dirname() { local a local b [ $# = 1 ] || return 1 a="$1" while [ 1 ]; do b="${a%/}" [ "$a" = "$b" ] && break a="$b" done b=${a%/*} [ -z "$b" -o "$a" = "$b" ] && b="." echo "$b" return 0 } # # pseudo_resource status tracking function... # # This allows pseudo resources to give correct status information. As we add # resource monitoring, and better resource tracking in general, this will # become essential. # # These scripts work because ${HA_RSCTMP} is cleaned out every time # heartbeat is started. # # We create "resource-string" tracking files under ${HA_RSCTMP} in a # very simple way: # # Existence of "${HA_RSCTMP}/resource-string" means that we consider # the resource named by "resource-string" to be running. # # Note that "resource-string" needs to be unique. Using the resource type # plus the resource instance arguments to make up the resource string # is probably sufficient... # # usage: ha_pseudo_resource resource-string op [tracking_file] # where op is {start|stop|monitor|status|restart|reload|print} # print is a special op which just prints the tracking file location # user can override our choice of the tracking file location by # specifying it as the third arg # Note that all operations are silent... # ha_pseudo_resource() { local ha_resource_tracking_file="${3:-${HA_RSCTMP}/$1}" case $2 in start|restart|reload) touch "$ha_resource_tracking_file";; stop) rm -f "$ha_resource_tracking_file";; status|monitor) if [ -f "$ha_resource_tracking_file" ] then return 0 else case $2 in status) return 3;; *) return 7;; esac fi;; print) echo "$ha_resource_tracking_file";; *) return 3;; esac } # usage: rmtempdir TMPDIR rmtempdir() { [ $# = 1 ] || return 1 if [ -e "$1" ]; then rmdir "$1" || return 1 fi return 0 } # usage: maketempfile [-d] maketempfile() { if [ $# = 1 -a "$1" = "-d" ]; then mktemp -d return -0 elif [ $# != 0 ]; then return 1 fi mktemp return 0 } # usage: rmtempfile TMPFILE rmtempfile () { [ $# = 1 ] || return 1 if [ -e "$1" ]; then rm "$1" || return 1 fi return 0 } __ocf_set_defaults "$@" diff --git a/heartbeat/oracle b/heartbeat/oracle index 2bc07994b..c08c3f3fb 100755 --- a/heartbeat/oracle +++ b/heartbeat/oracle @@ -1,765 +1,779 @@ #!/bin/sh # # # oracle # # Description: Manages an Oracle Database as a High-Availability # resource # # # Author: Dejan Muhamedagic # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2006 International Business Machines, Inc. # # This code inspired by the DB2 resource script # written by Alan Robertson # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 oracle::RK1::/oracle/10.2::orark1 # # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_sid # OCF_RESKEY_home (optional; else read it from /etc/oratab) # OCF_RESKEY_user (optional; figure it out by checking file ownership) # OCF_RESKEY_ipcrm (optional; defaults to "instance") # OCF_RESKEY_clear_backupmode (optional; default to "false") # OCF_RESKEY_shutdown_method (optional; default to "checkpoint/abort") # # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { methods=`oracle_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 {$methods} $0 manages an Oracle Database instance as an HA resource. The 'start' operation starts the database. The 'stop' operation stops the database. The 'status' operation reports whether the database is running The 'monitor' operation reports whether the database seems to be working The 'dumpinstipc' operation prints IPC resources used by the instance The 'cleanup' operation tries to clean up after Oracle was brutally stopped The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat < 1.0 Resource script for oracle. Manages an Oracle Database instance as an HA resource. Manages an Oracle Database instance The Oracle SID (aka ORACLE_SID). sid The Oracle home directory (aka ORACLE_HOME). If not specified, then the SID along with its home should be listed in /etc/oratab. home The Oracle owner (aka ORACLE_OWNER). If not specified, then it is set to the owner of file \$ORACLE_HOME/dbs/*\${ORACLE_SID}.ora. If this does not work for you, just set it explicitely. user Sometimes IPC objects (shared memory segments and semaphores) belonging to an Oracle instance might be left behind which prevents the instance from starting. It is not easy to figure out which shared segments belong to which instance, in particular when more instances are running as same user. What we use here is the "oradebug" feature and its "ipc" trace utility. It is not optimal to parse the debugging information, but I am not aware of any other way to find out about the IPC information. In case the format or wording of the trace report changes, parsing might fail. There are some precautions, however, to prevent stepping on other peoples toes. There is also a dumpinstipc option which will make us print the IPC objects which belong to the instance. Use it to see if we parse the trace file correctly. Three settings are possible: - none: don't mess with IPC and hope for the best (beware: you'll probably be out of luck, sooner or later) - instance: try to figure out the IPC stuff which belongs to the instance and remove only those (default; should be safe) - orauser: remove all IPC belonging to the user which runs the instance (don't use this if you run more than one instance as same user or if other apps running as this user use IPC) The default setting "instance" should be safe to use, but in that case we cannot guarantee that the instance will start. In case IPC objects were already left around, because, for instance, someone mercilessly killing Oracle processes, there is no way any more to find out which IPC objects should be removed. In that case, human intervention is necessary, and probably _all_ instances running as same user will have to be stopped. The third setting, "orauser", guarantees IPC objects removal, but it does that based only on IPC objects ownership, so you should use that only if every instance runs as separate user. Please report any problems. Suggestions/fixes welcome. ipcrm The clear of the backup mode of ORACLE. clear_backupmode How to stop Oracle is a matter of taste it seems. The default method ("checkpoint/abort") is: alter system checkpoint; shutdown abort; This should be the fastest safe way bring the instance down. If you find "shutdown abort" distasteful, set this attribute to "immediate" in which case we will shutdown immediate; If you still think that there's even better way to shutdown an Oracle instance we are willing to listen. shutdown_method END } # # methods: What methods/operations do we support? # oracle_methods() { cat <<-! start stop status monitor dumpinstipc showdbstat cleanup validate-all methods meta-data usage ! } # Gather up information about our oracle instance ora_info() { ORACLE_SID=$1 ORACLE_HOME=$2 ORACLE_OWNER=$3 # get ORACLE_HOME from /etc/oratab if not set [ x = "x$ORACLE_HOME" ] && ORACLE_HOME=`awk -F: "/^$ORACLE_SID:/"'{print $2}' /etc/oratab` # there a better way to find out ORACLE_OWNER? [ x = "x$ORACLE_OWNER" ] && ORACLE_OWNER=`ls -ld $ORACLE_HOME/. 2>/dev/null | awk 'NR==1{print $3}'` sqlplus=$ORACLE_HOME/bin/sqlplus lsnrctl=$ORACLE_HOME/bin/lsnrctl tnsping=$ORACLE_HOME/bin/tnsping } testoraenv() { # Let's make sure a few important things are set... if [ x = "x$ORACLE_HOME" ]; then ocf_log info "ORACLE_HOME not set" return $OCF_ERR_CONFIGURED fi if [ x = "x$ORACLE_OWNER" ]; then ocf_log info "ORACLE_OWNER not set" return $OCF_ERR_CONFIGURED fi # and some important things are there if [ ! -x "$sqlplus" ]; then ocf_log info "$sqlplus does not exist" return $OCF_ERR_INSTALLED fi if [ ! -x "$lsnrctl" ]; then ocf_log err "$lsnrctl does not exist" return $OCF_ERR_INSTALLED fi if [ ! -x "$tnsping" ]; then ocf_log err "$tnsping does not exist" return $OCF_ERR_INSTALLED fi return 0 } setoraenv() { LD_LIBRARY_PATH=$ORACLE_HOME/lib LIBPATH=$ORACLE_HOME/lib TNS_ADMIN=$ORACLE_HOME/network/admin PATH=$ORACLE_HOME/bin:$ORACLE_HOME/dbs:$PATH export ORACLE_SID ORACLE_HOME ORACLE_OWNER TNS_ADMIN export LD_LIBRARY_PATH LIBPATH } dumporaenv() { cat<" } # # IPC stuff: not overly complex, but quite involved :-/ # +IPC_LOCKFILE=${HA_RSCTMP}/oracle_ipc.lock # Part 1: Oracle dumpinstipc() { local dumpdest=`dbasql_one getdumpdest` if [ "x$dumpdest" = x -o ! -d "$dumpdest" ]; then ocf_log warn "$dumpdest is not a directory" return 1 fi - local fcount=`ls -rt $dumpdest | wc -l` + ocf_take_lock $IPC_LOCKFILE + local fcount=`ls -rt $dumpdest | grep '\.trc$' | wc -l` output=`dbasql getipc` - local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1` - local fcount2=`ls -rt $dumpdest | wc -l` + local lastf=`ls -rt $dumpdest | grep '\.trc$' | tail -1` + local fcount2=`ls -rt $dumpdest | grep '\.trc$' | wc -l` + rm -f $IPC_LOCKFILE if [ $((fcount+1)) -eq $fcount2 ]; then echo $dumpdest/$lastf else ocf_log warn "'dbasql getipc' failed: $output" return 1 fi } parseipc() { local inf=$1 - test -f "$1" || return 1 + if [ ! -f "$1" ]; then + ocf_log warn "$1: no such ipc trace file" + return 1 + fi awk ' $3 == "Shmid" {n=1;next} n { if( $3~/^[0-9]+$/ ) print $3; n=0 } ' $inf | sort -u | sed 's/^/m:/' awk ' /Semaphore List/ {insems=1;next} insems { for( i=1; i<=NF; i++ ) if( $i~/^[0-9]+$/ ) print $i; } /system semaphore information/ {exit} ' $inf | sort -u | sed 's/^/s:/' } # Part 2: OS (ipcs,ipcrm) filteroraipc() { # this portable? grep -w $ORACLE_OWNER | awk '{print $2}' } ipcdesc() { local what=$1 case $what in m) echo "shared memory segment";; s) echo "semaphore";; q) echo "message queue";; esac } rmipc() { local what=$1 id=$2 ipcs -$what | filteroraipc | grep -w $id >/dev/null 2>&1 || return ocf_log info "Removing `ipcdesc $what` $id." ipcrm -$what $id } ipcrm_orauser() { local what id for what in m s q; do for id in `ipcs -$what | filteroraipc`; do rmipc $what $id done done } ipcrm_instance() { local ipcobj for ipcobj; do rmipc `echo $ipcobj | sed 's/:/ /'` done } # # oracle_status: is the Oracle instance running? # # quick check to see if the instance is up is_oracle_up() { ps -ef | grep -wiqs "[^ ]*[_]pmon_${ORACLE_SID}" } # instance in OPEN state? instance_live() { local status=`dbasql_one dbstat` if [ "$status" = OPEN ]; then return 0 else ocf_log info "$ORACLE_SID instance state is not OPEN (dbstat output: $status)" return 1 fi } ora_cleanup() { #rm -fr /tmp/.oracle #??? - rm -f `ls $ORACLE_HOME/dbs/lk* | grep -i $ORACLE_SID` + rm -f `ls $ORACLE_HOME/dbs/lk* | grep -i "$ORACLE_SID\$"` #return case $IPCRM in none) ;; instance) ipcrm_instance $* ;; orauser) ipcrm_orauser $* ;; *) ocf_log warn "bad usage: ipcrm set to $IPCRM" ;; esac } # # oracle_start: Start the Oracle instance # # NOTE: We handle instance in the MOUNTED and STARTED states # efficiently # We *do not* handle instance in the restricted or read-only # mode, i.e. it appears as running, but its availability is # "not for general use" # oracle_start() { local status output if is_oracle_up; then status="`dbasql_one dbstat`" case "$status" in "OPEN") : nothing to be done, we can leave right now ocf_log info "Oracle instance $ORACLE_SID already running" return $OCF_SUCCESS ;; "STARTED") output=`dbasql dbmount` ;; "MOUNTED") : we proceed if mounted ;; *) # status unknown output=`dbasql dbstop dbstart_mount` ;; esac else output="`dbasql dbstart_mount`" # try to cleanup in case of # ORA-01081: cannot start already-running ORACLE - shut it down first if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then ocf_log info "ORA-01081 error found, trying to cleanup oracle (dbstart_mount output: $output)" ora_cleanup output=`dbasql dbstart_mount` fi fi # oracle instance should be mounted. status="`dbasql_one dbstat`" case "$status" in "MOUNTED") ;; *) : error!! ocf_log err "oracle $ORACLE_SID can not be mounted (status: $status)" return $OCF_ERR_GENERIC ;; esac # It is examined whether mode is "online backup mode", # and if it is true, makes clear the mode. # Afterwards, DB is opened. if is_clear_backupmode_set && is_instance_in_backup_mode; then clear_backup_mode fi output=`dbasql dbopen` if ! is_oracle_up; then ocf_log err "oracle process not running: $output" return $OCF_ERR_GENERIC elif ! instance_live; then ocf_log err "oracle instance $ORACLE_SID not started: $output" return $OCF_ERR_GENERIC else : cool, we are up and running ocf_log info "Oracle instance $ORACLE_SID started: $output" return $OCF_SUCCESS fi } # # oracle_stop: Stop the Oracle instance # oracle_stop() { local status output ipc="" if is_oracle_up; then [ "$IPCRM" = "instance" ] && ipc=$(parseipc `dumpinstipc`) output=`dbasql dbstop` else ocf_log info "Oracle instance $ORACLE_SID already stopped" return $OCF_SUCCESS fi ora_kill # kill any processes left if is_oracle_up; then ocf_log err "Oracle instance $ORACLE_SID not stopped: $output" return $OCF_ERR_GENERIC else ocf_log info "Oracle instance $ORACLE_SID stopped: $output" sleep 1 # give em a chance to cleanup ocf_log info "Cleaning up for $ORACLE_SID" ora_cleanup "$ipc" return $OCF_SUCCESS fi } # kill the database processes (if any left) # give them 30 secs to exit cleanly (6 times 5) killprocs() { local sig=$1 shift 1 # Record stderr kill -s $sig $* >/dev/null } ora_kill() { oraprocs=`eval $procs | awk '{print $1}'` if [ -z "$oraprocs" ]; then ocf_log debug "All oracle processes are already stopped." return fi killprocs TERM $oraprocs for i in 1 2 3 4 5; do if [ -z "`eval $procs | awk '{print $1}'`" ]; then ocf_log debug "All oracle processes are killed." return fi sleep 5 done killprocs KILL `eval $procs | awk '{print $1}'` } # # oracle_monitor: Can the Oracle instance do anything useful? # oracle_monitor() { if ! is_oracle_up; then ocf_log info "oracle process not running" return $OCF_NOT_RUNNING fi if ! instance_live; then ocf_log info "oracle instance $ORACLE_SID is down" return $OCF_NOT_RUNNING fi #ocf_log info "Oracle instance $ORACLE_SID is alive" return $OCF_SUCCESS } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) oracle_methods exit $?;; *);; esac clear_backupmode=${OCF_RESKEY_clear_backupmode:-"false"} shutdown_method=${OCF_RESKEY_shutdown_method:-"checkpoint/abort"} case "${shutdown_method}" in "immediate") ;; "checkpoint/abort") ;; *) ocf_log err "unsupported shutdown_method, please read meta-data" esac if [ x = "x$OCF_RESKEY_sid" ] then ocf_log err "Please set OCF_RESKEY_sid to the Oracle SID !" exit $OCF_ERR_ARGS fi ora_info "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" LSB_STATUS_STOPPED=3 testoraenv rc=$? if [ $rc -ne 0 ]; then ocf_log info "Oracle environment for SID $ORACLE_SID does not exist" case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) ocf_log err "Oracle environment for SID $ORACLE_SID broken" exit $rc ;; esac fi setoraenv # important: set the environment for the SID envtmpf=`mktemp` dumporaenv > $envtmpf chmod 644 $envtmpf -trap "rm -f $envtmpf" EXIT -procs="ps -e -o pid,args | grep -i \"[o]ra[a-zA-Z0-9_]*$ORACLE_SID\"" +TMPFILES="$envtmpf" +rmtmpfiles() { + rm -f $TMPFILES + # our lock file? + if [ "$$" = "`cat $IPC_LOCKFILE 2>/dev/null`" ]; then + rm -f $IPC_LOCKFILE + fi +} +trap "rmtmpfiles" EXIT +procs="ps -e -o pid,args | grep -i \"[o]ra[a-zA-Z0-9_]*$ORACLE_SID\$\"" US=`id -u -n` if [ $US != root -a $US != $ORACLE_OWNER ] then ocf_log err "$0 must be run as root or $ORACLE_OWNER" exit $OCF_ERR_PERM fi if [ x = "x$OCF_RESKEY_ipcrm" ] then IPCRM="instance" else IPCRM="$OCF_RESKEY_ipcrm" fi # What kind of method was invoked? case "$1" in start) oracle_start exit $?;; stop) oracle_stop exit $?;; status) if is_oracle_up then echo Oracle instance $ORACLE_SID is running exit $OCF_SUCCESS else echo Oracle instance $ORACLE_SID is stopped exit $OCF_NOT_RUNNING fi ;; dumpinstipc) is_oracle_up && parseipc `dumpinstipc` exit $?;; showdbstat) showdbstat exit $?;; cleanup) if [ "$IPCRM" = "instance" ]; then ora_cleanup $(parseipc `dumpinstipc`) else ora_cleanup fi exit $?;; monitor) oracle_monitor exit $?;; validate-all) # OCF_RESKEY_sid was already checked by testoraenv(), # just exit successfully here. exit $OCF_SUCCESS;; *) oracle_methods exit $OCF_ERR_UNIMPLEMENTED;; esac # # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/heartbeat/oralsnr b/heartbeat/oralsnr index 1e4fe8d60..bc85abe01 100755 --- a/heartbeat/oralsnr +++ b/heartbeat/oralsnr @@ -1,429 +1,429 @@ #!/bin/sh # # # oralsnr # # Description: Manages an Oracle Listener as a High-Availability # resource # # # Author: Dejan Muhamedagic # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2006 International Business Machines, Inc. # # This code inspired by the DB2 resource script # written by Alan Robertson # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 oralsnr::sid::home::user::listener # # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_sid (mandatory; for the monitor op) # OCF_RESKEY_home (optional; else read it from /etc/oratab) # OCF_RESKEY_user (optional; user to run the listener) # OCF_RESKEY_listener (optional; defaults to LISTENER) # # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### SH=/bin/sh usage() { methods=`oralsnr_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages an Oracle Database instance as an HA resource. The 'start' operation starts the database. The 'stop' operation stops the database. The 'status' operation reports whether the database is running The 'monitor' operation reports whether the database seems to be working The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat < 1.0 Resource script for Oracle Listener. It manages an Oracle Listener instance as an HA resource. Manages an Oracle TNS listener The Oracle SID (aka ORACLE_SID). Necessary for the monitor op, i.e. to do tnsping SID. sid The Oracle home directory (aka ORACLE_HOME). If not specified, then the SID should be listed in /etc/oratab. home Run the listener as this user. user Listener instance to be started (as defined in listener.ora). Defaults to LISTENER. listener END } # # methods: What methods/operations do we support? # oralsnr_methods() { cat <<-! start stop status monitor validate-all methods meta-data usage ! } # Gather up information about our oralsnr instance ora_info() { ORACLE_SID=$1 ORACLE_HOME=$2 ORACLE_OWNER=$3 # get ORACLE_HOME from /etc/oratab if not set [ x = "x$ORACLE_HOME" ] && ORACLE_HOME=`awk -F: "/^$ORACLE_SID:/"'{print $2}' /etc/oratab` # there a better way to find out ORACLE_OWNER? [ x = "x$ORACLE_OWNER" ] && ORACLE_OWNER=`ls -ld $ORACLE_HOME/. 2>/dev/null | awk 'NR==1{print $3}'` sqlplus=$ORACLE_HOME/bin/sqlplus lsnrctl=$ORACLE_HOME/bin/lsnrctl tnsping=$ORACLE_HOME/bin/tnsping } testoraenv() { # Let's make sure a few important things are set... if [ x = "x$ORACLE_HOME" ]; then ocf_log info "ORACLE_HOME not set" return $OCF_ERR_CONFIGURED fi if [ x = "x$ORACLE_OWNER" ]; then ocf_log info "ORACLE_OWNER not set" return $OCF_ERR_CONFIGURED fi # and some important things are there if [ ! -x "$sqlplus" ]; then ocf_log info "$sqlplus does not exist" return $OCF_ERR_INSTALLED fi if [ ! -x "$lsnrctl" ]; then ocf_log err "$lsnrctl does not exist" return $OCF_ERR_INSTALLED fi if [ ! -x "$tnsping" ]; then ocf_log err "$tnsping does not exist" return $OCF_ERR_INSTALLED fi return 0 } setoraenv() { LD_LIBRARY_PATH=$ORACLE_HOME/lib LIBPATH=$ORACLE_HOME/lib TNS_ADMIN=$ORACLE_HOME/network/admin PATH=$ORACLE_HOME/bin:$ORACLE_HOME/dbs:$PATH export ORACLE_SID ORACLE_HOME ORACLE_OWNER TNS_ADMIN export LD_LIBRARY_PATH LIBPATH } dumporaenv() { cat</dev/null } # # is_oralsnr_up: is listener process running? # oralsnr_status: is the listener running? # is_oralsnr_up() { [ x != "x`eval $procs`" ] } oralsnr_status() { output=`$lsnrctl status $listener` echo "$output" | tail -1 | grep -qs 'completed successfully' RET=$? if [ $RET -ne 0 ]; then ocf_log info "$listener status failed: $output" fi return $RET } # and does it work? tnsping() { output=`$tnsping $ORACLE_SID` echo "$output" | tail -1 | grep -qs '^OK' RET=$? if [ $RET -ne 0 ]; then ocf_log info "$tnsping $ORACLE_SID failed: $output" fi return $RET } # # oralsnr_monitor: Can we connect to the listener? # oralsnr_monitor() { if oralsnr_status && tnsping then : good #ocf_log info "Listener $listener running" return $OCF_SUCCESS else ocf_log info "Listener $listener not running" return $OCF_NOT_RUNNING fi } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) oralsnr_methods exit $?;; *);; esac if [ x = "x$OCF_RESKEY_sid" ] then ocf_log err "Please set OCF_RESKEY_sid to the Oracle SID !" exit $OCF_ERR_ARGS fi ora_info "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" LSB_STATUS_STOPPED=3 testoraenv rc=$? if [ $rc -ne 0 ]; then ocf_log info "Oracle environment for SID $ORACLE_SID does not exist" case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) ocf_log err "Oracle environment for SID $ORACLE_SID broken" exit $rc ;; esac fi setoraenv # important: set the environment for the SID envtmpf=`mktemp` dumporaenv > $envtmpf chmod 644 $envtmpf trap "rm -f $envtmpf" EXIT # # default listener is "LISTENER" # listener=${OCF_RESKEY_listener:-"LISTENER"} # how to get listener processes -procs="ps -e -o pid,args | grep '[t]nslsnr' | grep -w $listener" +procs="ps -e -o pid,user,args | grep '[t]nslsnr' | grep -w $listener | grep -w $ORACLE_OWNER" US=`id -u -n` if [ $US != root -a $US != $ORACLE_OWNER ] then ocf_log err "$0 must be run as root or $ORACLE_OWNER" exit $OCF_ERR_PERM fi # What kind of method was invoked? case "$1" in start) oralsnr_start exit $?;; stop) oralsnr_stop exit $?;; status) if oralsnr_status then echo Listener $listener is running exit $OCF_SUCCESS else echo Listener $listener is stopped exit $OCF_NOT_RUNNING fi ;; monitor) oralsnr_monitor exit $?;; validate-all) # OCF_RESKEY_sid was already checked by ora_info(), # just exit successfully here. exit $OCF_SUCCESS;; *) oralsnr_methods exit $OCF_ERR_UNIMPLEMENTED;; esac # # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/heartbeat/pgsql b/heartbeat/pgsql index a55cf6619..adecd46c4 100755 --- a/heartbeat/pgsql +++ b/heartbeat/pgsql @@ -1,688 +1,1733 @@ #!/bin/sh # # Description: Manages a PostgreSQL Server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA # Florian Haas (florian@linbit.com) -- makeover +# Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication # -# Copyright: 2006-2010 Serge Dubrouski +# Copyright: 2006-2012 Serge Dubrouski # and other Linux-HA contributors # License: GNU General Public License (GPL) # ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # # Get PostgreSQL Configuration parameter # get_pgsql_param() { local config local param_name param_name=$1 #Check that config file exists if [ -n "$OCF_RESKEY_config" ]; then config=$OCF_RESKEY_config else config=$OCF_RESKEY_pgdata/postgresql.conf fi check_config "$config" [ $? -eq 0 ] || return perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) { \$dir=\$1; \$dir =~ s/\s*\#.*//; \$dir =~ s/^'(\S*)'/\$1/; print \$dir;}" perl -ne "$perl_code" < $config } # Defaults OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl OCF_RESKEY_psql_default=/usr/bin/psql OCF_RESKEY_pgdata_default=/var/lib/pgsql/data OCF_RESKEY_pgdba_default=postgres OCF_RESKEY_pghost_default="" OCF_RESKEY_pgport_default=5432 OCF_RESKEY_config_default="" OCF_RESKEY_start_opt_default="" OCF_RESKEY_pgdb_default=template1 OCF_RESKEY_logfile_default=/dev/null OCF_RESKEY_stop_escalate_default=30 OCF_RESKEY_monitor_user_default="" OCF_RESKEY_monitor_password_default="" OCF_RESKEY_monitor_sql_default="select now();" +# Defaults for replication +OCF_RESKEY_rep_mode_default=none +OCF_RESKEY_node_list_default="" +OCF_RESKEY_restore_command_default="" +OCF_RESKEY_master_ip_default="" +OCF_RESKEY_repuser_default="postgres" +OCF_RESKEY_primary_conninfo_opt_default="" +OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp" +OCF_RESKEY_xlog_check_count_default="3" +OCF_RESKEY_crm_attr_timeout_default="5" +OCF_RESKEY_stop_escalate_in_slave_default=30 : ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}} : ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}} : ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}} : ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}} : ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}} : ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}} : ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}} : ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}} : ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}} : ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}} : ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}} : ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}} +# for replication +: ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}} +: ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}} +: ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}} +: ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}} +: ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}} +: ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}} +: ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}} +: ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}} +: ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}} +: ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}} usage() { cat < 1.0 Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource. Manages a PostgreSQL database instance Path to pg_ctl command. pgctl Start options (-o start_opt in pg_ctl). "-i -p 5432" for example. start_opt Additional pg_ctl options (-w, -W etc..). ctl_opt Path to psql command. psql Path to PostgreSQL data directory. pgdata User that owns PostgreSQL. pgdba Hostname/IP address where PostgreSQL is listening pghost Port where PostgreSQL is listening pgport PostgreSQL user that pgsql RA will user for monitor operations. If it's not set pgdba user will be used. monitor_user Password for monitor user. monitor_password SQL script that will be used for monitor operations. monitor_sql - Path to the PostgreSQL configuration file for the instance Configuration file Database that will be used for monitoring. pgdb Path to PostgreSQL server log output file. logfile Unix socket directory for PostgeSQL socketdir Number of shutdown retries (using -m fast) before resorting to -m immediate stop escalation - + + + + + +Replication mode(none(default)/async/sync). +"async" and "sync" require PostgreSQL 9.1 or later. +If you use async or sync, it requires node_list, master_ip, restore_command +parameters, and needs setting postgresql.conf, pg_hba.conf up for +replication. +Please delete "include /../../rep_mode.conf" line in postgresql.conf +when you switch from sync to async. + +rep_mode + + + + + +All node names. Please separate each node name with a space. +This is required for replication. + +node list + + + + + +restore_command for recovery.conf. +This is required for replication. + +restore_command + + + + + +Master's floating IP address to be connected from hot standby. +This parameter is used for "primary_conninfo" in recovery.conf. +This is required for replication. + +master ip + + + + + +User used to connect to the master server. +This parameter is used for "primary_conninfo" in recovery.conf. +This is required for replication. + +repuser + + + + + +primary_conninfo options of recovery.conf except host, port, user and application_name. +This is optional for replication. + +primary_conninfo_opt + + + + + +Path to temporary directory. +This is optional for replication. + +tmpdir + + + + + +Number of checking xlog on monitor before promote. +This is optional for replication. + +xlog check count + + + + + +The timeout of crm_attribute forever update command. +Default value is 5 seconds. +This is optional for replication. + +The timeout of crm_attribute forever update command. + + + + + +Number of shutdown retries (using -m fast) before resorting to -m immediate +in Slave state. +This is optional for replication. + +stop escalation_in_slave + + + + + EOF } # -# Run the given command in the Resource owner environment... +# Run the given command in the Resource owner environment... # runasowner() { local quietrun="" local loglevel="-err" local var for var in 1 2 do case "$1" in "-q") quietrun="-q" shift 1;; "warn"|"err") loglevel="-$1" shift 1;; *) ;; esac done ocf_run $quietrun $loglevel su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*" } # # Shell escape # escape_string() { echo "$*" | sed -e "s/'/'\\\\''/g" } # # methods: What methods/operations do we support? # pgsql_methods() { - cat </dev/null 2>&1" return $? fi # No PID file false } # -# pgsql_monitor +# pgsql_real_monitor # -pgsql_monitor() { +pgsql_real_monitor() { local loglevel - local psql_options local rc + local output # Set the log level of the error message loglevel=${1:-err} if ! pgsql_status then - ocf_log info "PostgreSQL is down" - return $OCF_NOT_RUNNING + ocf_log info "PostgreSQL is down" + return $OCF_NOT_RUNNING fi - if [ -n "$OCF_RESKEY_monitor_user" ]; then - PGUSER=$OCF_RESKEY_monitor_user; export PGUSER - PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD - psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb" + if is_replication; then + #Check replication state + output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ + $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ + -Atc \"${CHECK_MS_SQL}\""` + rc=$? + if [ $rc -ne 0 ]; then + report_psql_error $rc $loglevel + return $OCF_ERR_GENERIC + fi + + case "$output" in + f) ocf_log debug "PostgreSQL is running as a primary." + if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then + return $OCF_RUNNING_MASTER + fi + ;; + + t) ocf_log debug "PostgreSQL is running as a hot standby." + return $OCF_SUCCESS;; + + *) ocf_log err "$CHECK_MS_SQL output is $output" + return $OCF_ERR_GENERIC;; + esac + fi + + OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"` + runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \ + -c '$OCF_RESKEY_monitor_sql'" + rc=$? + if [ $rc -ne 0 ]; then + report_psql_error $rc $loglevel + return $OCF_ERR_GENERIC + fi + + if is_replication; then + return $OCF_RUNNING_MASTER + fi + return $OCF_SUCCESS +} + +pgsql_replication_monitor() { + local rc + local rsc + local instance + local my_status + local data_status + local is_master="" + + rc=$1 + if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then + return $rc + fi + # If I am Master + if [ $rc -eq $OCF_RUNNING_MASTER ]; then + change_data_status "$NODENAME" "LATEST" + change_pgsql_status "$NODENAME" "PRI" + control_slave_status || return $OCF_ERR_GENERIC + return $rc + fi + + # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor, + # so I will get master node name using crm_mon -n + if output=`crm_mon -n1 | grep " Master"`; then + rsc=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` + instance=0 + while : + do + if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then + break + fi + if echo "$output" | grep "${rsc}:${instance}"; then + is_master="yes" + break + fi + instance=`expr $instance + 1` + done + fi + + if [ ! -n "$is_master" ]; then + # If I am Slave and Master is not exist + ocf_log info "Master does not exist." + change_pgsql_status "$NODENAME" "HS:alone" + have_master_right + if [ $? -eq 0 ]; then + rm -f ${XLOG_NOTE_FILE}.* + fi else - psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb" + output=`$CRM_ATTR_FOREVER -N "$NODENAME" \ + -n "$PGSQL_DATA_STATUS_ATTR" -G -q` + if [ "$output" = "DISCONNECT" ]; then + change_pgsql_status "$NODENAME" "HS:alone" + fi fi + return $rc +} - if [ -n "$OCF_RESKEY_pghost" ]; then - psql_options="$psql_options -h $OCF_RESKEY_pghost" +#pgsql_monitor: pgsql_real_monitor() wrapper for replication +pgsql_monitor() { + local rc + + pgsql_real_monitor + rc=$? + if ! is_replication; then + return $rc + else + pgsql_replication_monitor $rc + return $? + fi +} + +# pgsql_post_demote +pgsql_post_demote() { + DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1` + ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE" + if [ "$DEMOTE_NODE" != "$NODENAME" ]; then + if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | grep $NODENAME; then + show_master_baseline + change_pgsql_status "$NODENAME" "HS:alone" + fi + fi + return $OCF_SUCCESS +} + +pgsql_pre_promote() { + local master_baseline + local my_master_baseline + local cmp_location + local number_of_nodes + + # If my data is newer than new master's one, I fail my resource. + PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ + sed "s/ /\n/g" | head -1` + number_of_nodes=`echo $OCF_RESKEY_node_list | wc -w` + if [ $number_of_nodes -ge 3 -a \ + "$OCF_RESKEY_rep_mode" = "sync" -a \ + "$PROMOTE_NODE" != "$NODENAME" ]; then + master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \ + "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` + if [ $? -eq 0 ]; then + my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \ + "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` + # get older location + cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\ + sort | head -1` + if [ "$cmp_location" != "$my_master_baseline" ]; then + ocf_log err "My data is newer than new master's one. New master's location : $master_baseline" + $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY + return $OCF_ERR_GENERIC + fi + fi + fi + return $OCF_SUCCESS +} + +pgsql_notify() { + local type="${OCF_RESKEY_CRM_meta_notify_type}" + local op="${OCF_RESKEY_CRM_meta_notify_operation}" + local rc + + if ! is_replication; then + return $OCF_SUCCESS + fi + + ocf_log debug "notify: ${type} for ${op}" + case $type in + pre) + case $op in + promote) + pgsql_pre_promote + return $? + ;; + esac + ;; + post) + case $op in + promote) + delete_xlog_location + PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ + sed "s/ /\n/g" | head -1` + if [ "$PROMOTE_NODE" != "$NODENAME" ]; then + delete_master_baseline + fi + return $OCF_SUCCESS + ;; + demote) + pgsql_post_demote + return $? + ;; + start|stop) + if [ "$NODENAME " = "$OCF_RESKEY_CRM_meta_notify_master_uname" ]; then + control_slave_status + fi + return $OCF_SUCCESS + ;; + esac + ;; + esac + return $OCF_SUCCESS +} + +control_slave_status() { + local rc + local data_status + local target + local all_data_status + local tmp_data_status + local node_name + local number_of_nodes + + all_data_status=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ + $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ + -Atc \"${CHECK_REPLICATION_STATE_SQL}\""` + rc=$? + if [ $rc -eq 0 ]; then + if [ -n "$all_data_status" ]; then + all_data_status=`echo $all_data_status | sed "s/\n/ /g"` + fi + else + report_psql_error $rc warn + return 1 + fi + + number_of_nodes=`echo $OCF_RESKEY_node_list | wc -w` + for target in $OCF_RESKEY_node_list; do + if [ "$target" = "$NODENAME" ]; then + continue + fi + + data_status="DISCONNECT" + if [ -n "$all_data_status" ]; then + for tmp_data_status in $all_data_status; do + node_name=`echo $tmp_data_status | cut -d "|" -f 1` + state=`echo $tmp_data_status | cut -d "|" -f 2` + sync_state=`echo $tmp_data_status | cut -d "|" -f 3` + ocf_log debug "node=$node_name, state=$state, sync_state=$sync_state" + if [ "$node_name" = "$target" ];then + data_status="$state|$sync_state" + break + fi + done + fi + + case "$data_status" in + "STREAMING|SYNC") + change_data_status "$target" "$data_status" + change_master_score "$target" "$CAN_PROMOTE" + change_pgsql_status "$target" "HS:sync" + ;; + "STREAMING|ASYNC") + change_data_status "$target" "$data_status" + if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then + change_master_score "$target" "$CAN_NOT_PROMOTE" + if ! is_sync_mode "$target"; then + set_sync_mode "$target" + fi + else + if [ $number_of_nodes -le 2 ]; then + change_master_score "$target" "$CAN_PROMOTE" + else + # I can't determine which slave's data is newest in async mode. + change_master_score "$target" "$CAN_NOT_PROMOTE" + fi + fi + change_pgsql_status "$target" "HS:async" + ;; + "STREAMING|POTENTIAL") + change_data_status "$target" "$data_status" + change_master_score "$target" "$CAN_NOT_PROMOTE" + change_pgsql_status "$target" "HS:potential" + ;; + "DISCONNECT") + change_data_status "$target" "$data_status" + change_master_score "$target" "$CAN_NOT_PROMOTE" + if [ "$OCF_RESKEY_rep_mode" = "sync" ] && \ + is_sync_mode "$target"; then + set_async_mode "$target" + fi + ;; + *) + change_data_status "$target" "$data_status" + change_master_score "$target" "$CAN_NOT_PROMOTE" + if [ "$OCF_RESKEY_rep_mode" = "sync" ] && \ + is_sync_mode "$target"; then + set_async_mode "$target" + fi + change_pgsql_status "$target" "HS:connected" + ;; + esac + done + return 0 +} + +have_master_right() { + local old + local new + local output + local data_status + local node + local mylocation + local count + local newestXlog + local oldfile + local newfile + + ocf_log debug "Checking if I have a master right." + + data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \ + "$PGSQL_DATA_STATUS_ATTR" -G -q` + if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then + if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ + "$data_status" != "LATEST" ]; then + ocf_log warn "My data is out-of-date. status=$data_status" + return 1 + fi else - if [ -n "$OCF_RESKEY_socketdir" ]; then - psql_options="$psql_options -h $OCF_RESKEY_socketdir" + if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ + "$data_status" != "STREAMING|ASYNC" -a \ + "$data_status" != "LATEST" ]; then + ocf_log warn "My data is out-of-date. status=$data_status" + return 1 + fi + fi + ocf_log info "My data status=$data_status." + + show_xlog_location + if [ $? -ne 0 ]; then + ocf_log err "Failed to show my xlog location." + exit $OCF_ERR_GENERIC + fi + + old=0 + for count in `seq $OCF_RESKEY_xlog_check_count`; do + if [ -f ${XLOG_NOTE_FILE}.$count ]; then + old=$count + continue fi + break + done + new=`expr $old + 1` + + # get xlog locations of all nodes + for node in ${OCF_RESKEY_node_list}; do + output=`$CRM_ATTR_REBOOT -N "$node" -n \ + "$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null` + if [ $? -ne 0 ]; then + ocf_log warn "Can't get $node xlog location." + continue + else + ocf_log info "$node xlog location : $output" + echo "$node $output" >> ${XLOG_NOTE_FILE}.${new} + if [ "$node" = "$NODENAME" ]; then + mylocation=$output + fi + fi + done + + oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null` + newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null` + if [ "$oldfile" != "$newfile" ]; then + # reset counter + rm -f ${XLOG_NOTE_FILE}.* + printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 + return 1 fi - OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"` - runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options -c '$OCF_RESKEY_monitor_sql'" + if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then + newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \ + head -1 | cut -d " " -f 2` + if [ "$newestXlog" = "$mylocation" ]; then + ocf_log info "I have a master right." + $CRM_MASTER -v $PROMOTE_ME + return 0 + fi + change_data_status "$NODENAME" "DISCONNECT" + ocf_log info "I don't have correct master data." + # reset counter + rm -f ${XLOG_NOTE_FILE}.* + printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 + fi + + return 1 +} +is_replication() { + if [ "$OCF_RESKEY_rep_mode" != "none" ]; then + return 0 + fi + return 1 +} + +get_my_location() { + local rc + local output + local replay_loc + local receive_loc + local output1 + local output2 + local log1 + local log2 + local newer_location + + output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ + $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ + -Atc \"${CHECK_XLOG_LOC_SQL}\""` rc=$? - if [ $rc -ne 0 ]; then - ocf_log $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running" - if [ $rc -eq 1 ]; then - ocf_log err "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." - elif [ $rc -eq 2 ]; then - ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command." - elif [ $rc -eq 3 ]; then - ocf_log err "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." + if [ $rc -ne 0 ]; then + report_psql_error $rc warn + ocf_log err "Can't get my xlog location." + return 1 + fi + replay_loc=`echo $output | cut -d "|" -f 1` + receive_loc=`echo $output | cut -d "|" -f 2` + + output1=`echo "$replay_loc" | cut -d "/" -f 1` + output2=`echo "$replay_loc" | cut -d "/" -f 2` + log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` + log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` + replay_loc="${log1}${log2}" + + output1=`echo "$receive_loc" | cut -d "/" -f 1` + output2=`echo "$receive_loc" | cut -d "/" -f 2` + log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` + log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` + receive_loc="${log1}${log2}" + + newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1` + echo "$newer_location" + return 0 +} + +show_xlog_location() { + local location + + location=`get_my_location` || return 1 + $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location" +} + +delete_xlog_location() { + $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D +} + +show_master_baseline() { + local rc + local location + + runasowner -q err "$OCF_RESKEY_psql $psql_options \ + -U $OCF_RESKEY_pgdba -c 'CHECKPOINT'" + rc=$? + if [ $rc -ne 0 ]; then + report_psql_error $rc warn + fi + location=`get_my_location` + ocf_log info "My master baseline : $location." + $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location" +} + +delete_master_baseline() { + $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D +} + +set_async_mode_all() { + [ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0 + ocf_log info "Set all nodes into async mode." + runasowner -q err "echo "" > \"$REP_MODE_CONF\"" + if [ $? -ne 0 ]; then + ocf_log err "Can't set all nodes into async mode." + return 1 + fi + return 0 +} + +set_async_mode() { + local sync_node_in_conf + + sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` + if [ -n "$sync_node_in_conf" ]; then + ocf_log info "Setup $1 into async mode." + sync_node_in_conf=`echo $sync_node_in_conf | sed "s/$1//g" |\ + sed "s/^,//g" | sed "s/,,/,/g" | sed "s/,$//g"` + if [ -n $sync_node_in_conf ]; then + echo "synchronous_standby_names = '$sync_node_in_conf'" > "$REP_MODE_CONF" + else + echo "" > "$REP_MODE_CONF" fi - return $OCF_ERR_GENERIC + else + ocf_log info "$1 is already in async mode." + return 0 fi - return $OCF_SUCCESS + ocf_log info "All synced nodes : \"$sync_node_in_conf\"" + reload_conf +} + +set_sync_mode() { + local sync_node_in_conf + + sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` + if [ -n "$sync_node_in_conf" ]; then + ocf_log info "Setup $1 into sync mode." + echo "synchronous_standby_names = '$sync_node_in_conf,$1'" > "$REP_MODE_CONF" + else + ocf_log info "Setup $1 into sync mode." + echo "synchronous_standby_names = '$1'" > "$REP_MODE_CONF" + fi + + sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` + ocf_log info "All synced nodes : \"$sync_node_in_conf\"" + reload_conf +} + +is_sync_mode() { + cat $REP_MODE_CONF | grep -q -e "[,' ]$1[,' ]" +} + +reload_conf() { + # Invoke pg_ctl + runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload" + if [ $? -eq 0 ]; then + ocf_log info "Reload configuration file." + else + ocf_log err "Can't reload configuration file." + return 1 + fi + + return 0 +} + +make_recovery_conf() { + runasowner "touch $RECOVERY_CONF" + if [ $? -ne 0 ]; then + ocf_log err "Can't create recovery.conf." + return 1 + fi + +cat > $RECOVERY_CONF </dev/null` + if [ "$output" != "$2" ]; then + # If slave's disk is broken, RA cannot read PID file + # and misjudges the PostgreSQL as down while it is running. + # It causes overwriting of pgsql-status by Master because replication is still connected. + if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then + if [ "$1" != "$NODENAME" ]; then + ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited." + return 0 + fi + fi + ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2." + $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2" + if [ $? -ne 0 ]; then + ocf_log err "Can't change $PGSQL_STATUS_ATTR." + return 1 + fi + fi + return 0 +} + +# change pgsql-data-status. +# arg1:node, arg2: value +change_data_status() { + local output + + if ! node_exist $1; then + return 0 + fi + + while : + do + output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null` + if [ "$output" != "$2" ]; then + ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2." + exec_func_with_timeout "$CRM_ATTR_FOREVER" "-N $1 -n \ + $PGSQL_DATA_STATUS_ATTR -v \"$2\"" \ + $OCF_RESKEY_crm_attr_timeout + if [ $? -ne 0 ]; then + ocf_log err "Can't change $PGSQL_DATA_STATUS_ATTR." + return 1 + fi + else + break + fi + done + return 0 +} + +# change master-score +# arg1:node, arg2: score +change_master_score() { + local rsc + local instance + local current_score + + if ! is_node_online $1; then + return 0 + fi + + rsc=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` + instance=0 + while : + do + if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then + break + fi + if [ "${rsc}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then + instance=`expr $instance + 1` + continue + fi + + current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-${rsc}:${instance}" -G -q 2>/dev/null` + if [ -n "$current_score" -a "$current_score" != "$2" ]; then + ocf_log info "Changing ${rsc}:${instance} master score on $1 : $current_score->$2." + $CRM_ATTR_REBOOT -N "$target" -n "master-${rsc}:${instance}" -v "$2" + if [ $? -ne 0 ]; then + ocf_log err "Can't change master score." + return 1 + fi + fi + instance=`expr $instance + 1` + done + return 0 +} + +report_psql_error() +{ + local rc + local loglevel + + rc=$1 + loglevel=${2:-err} + + ocf_log $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running" + if [ $rc -eq 1 ]; then + ocf_log err "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." + elif [ $rc -eq 2 ]; then + ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command." + elif [ $rc -eq 3 ]; then + ocf_log err "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." + fi +} + +# +# timeout management function +# arg1 : command +# arg2 : command's args +# arg3 : timeout(s) +# +exec_func_with_timeout() { + local func_pid + local count + local rc + + $1 `eval echo $2` & + func_pid=$! + count=0 + while kill -s 0 $func_pid >/dev/null 2>&1; do + sleep 1 + count=`expr $count + 1` + if [ $count -ge $3 ]; then + ocf_log debug "Execute $1 time out." + kill -s 9 $func_pid >/dev/null 2>&1 + return 0 + fi + done + wait $func_pid +} + +is_node_online() { + crm_mon -1 -n | grep -e "^Node $1 " -e "^Node $1:" | grep -q -v "OFFLINE" +} + +node_exist() { + crm_mon -1 -n | grep -q "^Node $1" } check_binary2() { if ! have_binary "$1"; then ocf_log err "Setup problem: couldn't find command: $1" return 1 fi return 0 } check_config() { local rc=0 if [ ! -f "$1" ]; then if ocf_is_probe; then - ocf_log info "Configuration file $1 not readable during probe." + ocf_log info "Configuration file is $1 not readable during probe." rc=1 else ocf_log err "Configuration file $1 doesn't exist" rc=2 fi fi return $rc } # Validate most critical parameters pgsql_validate_all() { + local version + if ! check_binary2 "$OCF_RESKEY_pgctl" || ! check_binary2 "$OCF_RESKEY_psql"; then return $OCF_ERR_INSTALLED fi if [ -n "$OCF_RESKEY_config" -a ! -f "$OCF_RESKEY_config" ]; then check_config "$OCF_RESKEY_config" [ $? -eq 2 ] && return $OCF_ERR_INSTALLED fi getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "User $OCF_RESKEY_pgdba doesn't exist"; return $OCF_ERR_INSTALLED; fi if ocf_is_probe; then ocf_log info "Don't check $OCF_RESKEY_pgdata during probe" else if ! runasowner "test -w $OCF_RESKEY_pgdata"; then ocf_log err "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM; fi fi if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor password can't be empty" return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor_user has to be set if monitor_password is set" return $OCF_ERR_CONFIGURED fi + if is_replication; then + version=`cat $OCF_RESKEY_pgdata/PG_VERSION` + if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then + ocf_log err "Replication mode needs PostgreSQL 9.1 or higher." + return $OCF_ERR_INSTALLED + fi + if ! ocf_is_ms; then + ocf_log err "Replication requires Master/Slave configuration." + return $OCF_ERR_CONFIGURED + fi + if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then + ocf_log err "Invalid rep_mode : $OCF_RESKEY_rep_mode" + return $OCF_ERR_CONFIGURED + fi + if [ ! -n "$OCF_RESKEY_master_ip" ]; then + ocf_log err "master_ip can't be empty." + return $OCF_ERR_CONFIGURED + fi + if [ ! -n "$OCF_RESKEY_node_list" ]; then + ocf_log err "node_list can't be empty." + return $OCF_ERR_CONFIGURED + fi + if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then + if ! grep -q "include '$REP_MODE_CONF' # added by pgsql RA" $OCF_RESKEY_pgdata/postgresql.conf; then + echo "include '$REP_MODE_CONF' # added by pgsql RA" >> $OCF_RESKEY_pgdata/postgresql.conf + fi + fi + if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then + ocf_log err "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba" + return $OCF_ERR_PERM + fi + fi + return $OCF_SUCCESS } # # Check if we need to create a log file # check_log_file() { if [ ! -f "$1" ] then touch $1 > /dev/null 2>&1 chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1 fi #Check if $OCF_RESKEY_pgdba can write to the log file if ! runasowner "test -w $1" then return 1 fi return 0 } # # Check socket directory # check_socket_dir() { if [ ! -d "$OCF_RESKEY_socketdir" ]; then if ! mkdir "$OCF_RESKEY_socketdir"; then - ocf_log err "Cannot create directory $OCF_RESKEY_socketdir" + ocf_log err "Can't create directory $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chown $OCF_RESKEY_pgdba:`getent passwd \ $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir" then - ocf_log err "Cannot change ownership for $OCF_RESKEY_socketdir" + ocf_log err "Can't change ownership for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chmod 2775 "$OCF_RESKEY_socketdir"; then - ocf_log err "Cannot change permissions for $OCF_RESKEY_socketdir" + ocf_log err "Can't change permissions for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi else if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then - ocf_log err "$OCF_RESKEY_pgdba cannot create files in $OCF_RESKEY_socketdir" + ocf_log err "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi rm $OCF_RESKEY_socketdir/test.$$ fi } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_GENERIC fi PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label +if is_replication; then + RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf + REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf + PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock + XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note + + CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" + CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot" + CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever" + CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount" + + CAN_NOT_PROMOTE="-INFINITY" + CAN_PROMOTE="100" + PROMOTE_ME="1000" + + CHECK_MS_SQL="select pg_is_in_recovery()" + CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()" + CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication" + + PGSQL_STATUS_ATTR="pgsql-status" + PGSQL_DATA_STATUS_ATTR="pgsql-data-status" + PGSQL_XLOG_LOC_NAME="pgsql-xlog-loc" + PGSQL_MASTER_BASELINE="pgsql-master-baseline" + + NODENAME=`uname -n` + OPERATION=$1 +fi + case "$1" in methods) pgsql_methods exit $?;; meta-data) meta_data exit $OCF_SUCCESS;; esac # $OCF_RESKEY_pgdata has to be initialized at this momemnt : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`} pgsql_validate_all rc=$? [ "$1" = "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi US=`id -u -n` if [ $US != root -a $US != $OCF_RESKEY_pgdba ] then ocf_log err "$0 must be run as root or $OCF_RESKEY_pgdba" exit $OCF_ERR_GENERIC fi +# make psql command options +if [ -n "$OCF_RESKEY_monitor_user" ]; then + PGUSER=$OCF_RESKEY_monitor_user; export PGUSER + PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD + psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb" +else + psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb" +fi + +if [ -n "$OCF_RESKEY_pghost" ]; then + psql_options="$psql_options -h $OCF_RESKEY_pghost" +else + if [ -n "$OCF_RESKEY_socketdir" ]; then + psql_options="$psql_options -h $OCF_RESKEY_socketdir" + fi +fi + # What kind of method was invoked? case "$1" in status) if pgsql_status then ocf_log info "PostgreSQL is up" exit $OCF_SUCCESS else ocf_log info "PostgreSQL is down" exit $OCF_NOT_RUNNING fi;; monitor) pgsql_monitor exit $?;; start) pgsql_start exit $?;; + promote) pgsql_promote + exit $?;; + + demote) pgsql_demote + exit $?;; + + notify) pgsql_notify + exit $?;; + stop) pgsql_stop exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/pound b/heartbeat/pound new file mode 100755 index 000000000..06f721b18 --- /dev/null +++ b/heartbeat/pound @@ -0,0 +1,312 @@ +#!/bin/sh +# +# +# Pound +# +# Description: Manage pound instances as a HA resource +# +# Author: Taro Matsuzawa +# +# License: GNU General Public License (GPL) +# +# See usage() for more details +# +# OCF instance parameters: +# OCF_RESKEY_pid +# OCF_RESKEY_binary +# OCF_RESKEY_ctl_binary +# OCF_RESKEY_socket_path +# OCF_RESKEY_config +# OCF_RESKEY_name +# +####################################################################### +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### +# Set default paramenter values + +# Set these two first, as other defaults depend on it +OCF_RESKEY_name_default=${OCF_RESOURCE_INSTANCE} +: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} + +OCF_RESKEY_binary_default=pound +OCF_RESKEY_ctl_binary_default=poundctl +OCF_RESKEY_pid_default=/var/run/pound_${OCF_RESKEY_name}.pid +OCF_RESKEY_socket_path_default=/var/lib/pound/pound.cfg + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_ctl_binary=${OCF_RESKEY_client_ctl_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_socket_path=${OCF_RESKEY_socket_path_default}} + +meta_data() { + cat < + + +1.0 + + +The Pound Resource Agent can manage pound instances. + +Manage a Pound instance + + + + + +The Pound configuration file that pound should manage, for example +"/etc/pound.cfg". + +Pound configuration file + + + + + +Override the name of the instance that should be given to pound +(defaults to the resource identifier). + +Instance name + + + + + +Write the process's PID to the specified file. +The default will include the specified name, ie.: +"/var/run/pound_production.pid". Unlike what this help message shows. +It is most likely not necessary to change this parameter. + +Pidfile + + + + + +This is used to start pound server. +Normally use pound. + + + + + + + +This is used to watch pound status via unix socket. +Normally use poundctl. + + + + + + + +Write the process's unix socket. +This parameter is same 'Control' parameter in configuration file, ie.: +Control "/var/lib/pound/pound.cfg" + + + + + + + + + + + + + + + + +END +} + +####################################################################### + + +pound_usage() { + cat < - - -1.93 - - -Resource script for SAP databases. It manages a SAP database of any type as an HA resource. - -Manages any SAP database (based on Oracle, MaxDB, or DB2) - - - - The unique SAP system identifier. e.g. P01 - SAP system ID - - - - The full qualified path where to find sapstartsrv and sapcontrol. - path of sapstartsrv and sapcontrol - - - - The name of the database vendor you use. Set either: ORA,DB6,ADA - database vendor - - - - The Oracle TNS listener name. - listener name - - - - If you do not have a ABAP stack installed in the SAP database, set this to TRUE - only JAVA stack installed - - - - This is only needed if the DBJ2EE_ONLY parameter is set to true. Enter the path to the Java SDK which is used by the SAP WebAS Java - Path to Java SDK - - - - This controls how the resource agent monitors the database. If set to true, it will use SAP tools to test the connect to the database. Do not use with Oracle, because it will result in unwanted failovers in case of an archiver stuck - Activates application level monitoring - - - - The SAPDatabase resource agent tries to recover a failed start attempt automaticaly one time. This is done by running a forced abort of the RDBMS and/or executing recovery commands. - Enable or disable automatic startup recovery - - - - The full qualified path where to find the J2EE instance bootstrap directory. e.g. /usr/sap/P01/J00/j2ee/cluster/bootstrap - path to j2ee bootstrap directory - - - - The full qualified path where to find the J2EE security store directory. e.g. /usr/sap/P01/SYS/global/security/lib/tools - path to j2ee secure store directory - - - - The full qualified filename of the jdbc driver for the database connection test. It will be automaticaly read from the bootstrap.properties file in Java engine 6.40 and 7.00. For Java engine 7.10 and higher the parameter is mandatory. - file name of the jdbc driver - - - - The full qualified path where to find a script or program which should be executed before this resource gets started. - path to a pre-start script - - - - The full qualified path where to find a script or program which should be executed after this resource got started. - path to a post-start script - - - - The full qualified path where to find a script or program which should be executed before this resource gets stopped. - path to a pre-start script - - - - The full qualified path where to find a script or program which should be executed after this resource got stopped. - path to a post-start script - - - - - - - - - - - - - - -END -} trap_handler() { rm -f $TEMPFILE exit $OCF_ERR_GENERIC } # # listener_start: Start the given listener # listener_start() { local orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" local lrc=$OCF_SUCCESS local output output=`echo "lsnrctl start $NETSERVICENAME" | su - $orasid 2>&1` if [ $? -eq 0 ] then ocf_log info "Oracle Listener $NETSERVICENAME started: $output" lrc=$OCF_SUCCESS else ocf_log err "Oracle Listener $NETSERVICENAME start failed: $output" lrc=$OCF_ERR_GENERIC fi return $lrc } # # listener_stop: Stop the given listener # listener_stop() { local orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" local lrc=$OCF_SUCCESS if listener_status then : listener is running, trying to stop it later... else return $OCF_SUCCESS fi local output output=`echo "lsnrctl stop $NETSERVICENAME" | su - $orasid 2>&1` if [ $? -eq 0 ] then ocf_log info "Oracle Listener $NETSERVICENAME stopped: $output" else ocf_log err "Oracle Listener $NETSERVICENAME stop failed: $output" lrc=$OCF_ERR_GENERIC fi return $lrc } # # listener_status: is the given listener running? # listener_status() { local lrc=$OCF_SUCCESS local orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here # as the output might be to long. local cnt=`ps efo args --user $orasid | grep $NETSERVICENAME | grep -c tnslsnr` if [ $cnt -eq 1 ] then lrc=$OCF_SUCCESS else ocf_log info "listener process not running for $NETSERVICENAME for $SID" lrc=$OCF_ERR_GENERIC fi return $lrc } # # x_server_start: Start the given x_server # x_server_start() { local rc=$OCF_SUCCESS local output output=`echo "x_server start" | su - $sidadm 2>&1` if [ $? -eq 0 ] then ocf_log info "MaxDB x_server start: $output" lrc=$OCF_SUCCESS else ocf_log err "MaxDB x_server start failed: $output" lrc=$OCF_ERR_GENERIC fi return $lrc } # # x_server_stop: Stop the x_server # x_server_stop() { local lrc=$OCF_SUCCESS local output output=`echo "x_server stop" | su - $sidadm 2>&1` if [ $? -eq 0 ] then ocf_log info "MaxDB x_server stop: $output" else ocf_log err "MaxDB x_server stop failed: $output" lrc=$OCF_ERR_GENERIC fi return $lrc } # # x_server_status: is the x_server running? # x_server_status() { local lrc=$OCF_SUCCESS local sdbuser=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here # as the output might be to long. local cnt=`ps efo args --user $sdbuser | grep -c vserver` if [ $cnt -ge 1 ] then lrc=$OCF_SUCCESS else ocf_log info "x_server process not running" lrc=$OCF_ERR_GENERIC fi return $lrc } # # oracle_stop: Stop the Oracle database without any condition # oracle_stop() { echo '#!/bin/sh LOG=$HOME/stopdb.log date > $LOG if [ -x "${ORACLE_HOME}/bin/sqlplus" ] then SRVMGRDBA_EXE="${ORACLE_HOME}/bin/sqlplus" else echo "Can not find executable sqlplus" >> $LOG exit 1 fi $SRVMGRDBA_EXE /NOLOG >> $LOG << ! connect / as sysdba shutdown immediate exit ! rc=$? cat $LOG exit $rc' > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE if [ $retcode -eq 0 ]; then sapdatabase_status if [ $? -ne $OCF_NOT_RUNNING ]; then retcode=1 fi fi return $retcode } # # maxdb_stop: Stop the MaxDB database without any condition # maxdb_stop() { # x_Server must be running to stop database x_server_status if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi if [ $DBJ2EE_ONLY -eq 1 ]; then userkey=c_J2EE else userkey=c fi echo "#!/bin/sh LOG=\$HOME/stopdb.log date > \$LOG echo \"Stop database with xuserkey >$userkey<\" >> \$LOG dbmcli -U ${userkey} db_offline >> \$LOG 2>&1 exit \$?" > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE if [ $retcode -eq 0 ]; then sapdatabase_status if [ $? -ne $OCF_NOT_RUNNING ]; then retcode=1 fi fi return $retcode } # # db6udb_stop: Stop the DB2/UDB database without any condition # db6udb_stop() { echo '#!/bin/sh LOG=$HOME/stopdb.log date > $LOG echo "Shut down the database" >> $LOG $INSTHOME/sqllib/bin/db2 deactivate database $DB2DBDFT |tee -a $LOG 2>&1 $INSTHOME/sqllib/adm/db2stop force |tee -a $LOG 2>&1 exit $?' > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE if [ $retcode -eq 0 ]; then sapdatabase_status if [ $? -ne $OCF_NOT_RUNNING ]; then retcode=1 fi fi return $retcode } # # oracle_recover: try to clean up oracle after a crash # oracle_recover() { echo '#!/bin/sh LOG=$HOME/recover.log date > $LOG echo "Logfile written by heartbeat SAPDatabase resource agent" >> $LOG if [ -x "${ORACLE_HOME}/bin/sqlplus" ] then SRVMGRDBA_EXE="${ORACLE_HOME}/bin/sqlplus" else echo "Can not find executable sqlplus" >> $LOG exit 1 fi $SRVMGRDBA_EXE /NOLOG >> $LOG << ! connect / as sysdba shutdown abort startup mount alter database end backup; alter database open; exit ! rc=$? cat $LOG exit $rc' > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE return $retcode } # # maxdb_recover: try to clean up MaxDB after a crash # maxdb_recover() { # x_Server must be running to stop database x_server_status if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi if [ $DBJ2EE_ONLY -eq 1 ]; then userkey=c_J2EE else userkey=c fi echo "#!/bin/sh LOG=\$HOME/recover.log date > \$LOG echo \"Logfile written by heartbeat SAPDatabase resource agent\" >> \$LOG echo \"Cleanup database with xuserkey >$userkey<\" >> \$LOG echo \"db_stop\" >> \$LOG 2>&1 dbmcli -U ${userkey} db_stop >> \$LOG 2>&1 echo \"db_clear\" >> \$LOG 2>&1 dbmcli -U ${userkey} db_clear >> \$LOG 2>&1 echo \"db_online\" >> \$LOG 2>&1 dbmcli -U ${userkey} db_online >> \$LOG 2>&1 rc=\$? cat \$LOG exit \$rc" > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE return $retcode } # # db6udb_recover: try to recover DB/2 after a crash # db6udb_recover() { db2sid="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" echo '#!/bin/sh LOG=$HOME/recover.log date > $LOG echo "Logfile written by heartbeat SAPDatabase resource agent" >> $LOG $INSTHOME/sqllib/bin/db2_kill >> $LOG 2>&1 $INSTHOME/sqllib/adm/db2start >> $LOG 2>&1 $INSTHOME/sqllib/bin/db2 activate database $DB2DBDFT >> $LOG 2>&1 rc=$? cat $LOG exit $rc' > $TEMPFILE chmod 700 $TEMPFILE chown $db2sid $TEMPFILE su - $db2sid -c $TEMPFILE retcode=$? rm -f $TEMPFILE return $retcode } -# -# methods: What methods/operations do we support? -# -sapdatabase_methods() { - cat <<-! - start - stop - status - monitor - recover - validate-all - methods - meta-data - usage - ! -} - - -# -# sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. -# This specialties do not allow a totally generic SAP cluster resource agent. -# Someone should write a resource agent for each additional process you need, if it -# is required to monitor that process within the cluster manager. To enable -# you to extent this resource agent without developing a new one, this user exit -# was introduced. -# -sapuserexit() { - NAME="$1" - VALUE="$2" - - if [ -n "$VALUE" ] - then - if [ -x "$VALUE" ] - then - ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" - "$VALUE" >/dev/null 2>&1 - ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" - else - ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" - fi - fi - return 0 -} - # # sapdatabase_start : Start the SAP database # sapdatabase_start() { sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" case $DBTYPE in ADA) x_server_start ;; ORA) listener_start ;; esac output=`su - $sidadm -c $SAPSTARTDB` rc=$? if [ $DBJ2EE_ONLY -eq 1 ] then sapdatabase_monitor 1 rc=$? fi if [ $rc -ne 0 -a $OCF_RESKEY_AUTOMATIC_RECOVER -eq 1 ] then ocf_log warn "SAP database $SID start failed: $output" ocf_log warn "Try to recover database $SID" output='' sapdatabase_recover rc=$? fi if [ $rc -eq 0 ] then ocf_log info "SAP database $SID started: $output" rc=$OCF_SUCCESS sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" else ocf_log err "SAP database $SID start failed: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # sapdatabase_stop: Stop the SAP database # sapdatabase_stop() { sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" # use of the stopdb kernel script is not possible, because there are to may checks in that # script. We want to stop the database regardless of anything. #output=`su - $sidadm -c $SAPSTOPDB` case $DBTYPE in ORA) output=`oracle_stop` ;; ADA) output=`maxdb_stop` ;; DB6) output=`db6udb_stop` ;; esac if [ $? -eq 0 ] then ocf_log info "SAP database $SID stopped: $output" rc=$OCF_SUCCESS else ocf_log err "SAP database $SID stop failed: $output" rc=$OCF_ERR_GENERIC fi case $DBTYPE in ORA) listener_stop ;; ADA) x_server_stop ;; esac sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" return $rc } # # sapdatabase_monitor: Can the given database instance do anything useful? # sapdatabase_monitor() { strict=$1 sapdatabase_status rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi case $DBTYPE in ADA) x_server_status if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi ;; ORA) listener_status if [ $? -ne $OCF_SUCCESS ]; then listener_start; fi ;; esac if [ $strict -eq 0 ] then return $rc else if [ $DBJ2EE_ONLY -eq 0 ] then output=`echo "$SAPDBCONNECT -d -w /dev/null" | su $sidadm 2>&1` if [ $? -le 4 ] then rc=$OCF_SUCCESS else rc=$OCF_NOT_RUNNING fi else MYCP="" EXECMD="" # WebAS Java 6.40+7.00 IAIK_JCE="$SECSTORE"/iaik_jce.jar IAIK_JCE_EXPORT="$SECSTORE"/iaik_jce_export.jar EXCEPTION="$BOOTSTRAP"/exception.jar LOGGING="$BOOTSTRAP"/logging.jar OPENSQLSTA="$BOOTSTRAP"/opensqlsta.jar TC_SEC_SECSTOREFS="$BOOTSTRAP"/tc_sec_secstorefs.jar JDDI="$BOOTSTRAP"/../server0/bin/ext/jdbdictionary/jddi.jar ANTLR="$BOOTSTRAP"/../server0/bin/ext/antlr/antlr.jar FRAME="$BOOTSTRAP"/../server0/bin/system/frame.jar # only start jdbcconnect when all jars available if [ -f "$EXCEPTION" -a -f "$LOGGING" -a -f "$OPENSQLSTA" -a -f "$TC_SEC_SECSTOREFS" -a -f "$JDDI" -a -f "$ANTLR" -a -f "$FRAME" -a -f "$SAPDBCONNECT" ] then MYCP=".:$FRAME:$ANTLR:$JDDI:$IAIK_JCE_EXPORT:$IAIK_JCE:$EXCEPTION:$LOGGING:$OPENSQLSTA:$TC_SEC_SECSTOREFS:$DB_JARS:$SAPDBCONNECT" EXECMD="com.sap.inst.jdbc.connect.JdbcCon -sec $SID:$SID" else # WebAS Java 7.10 LAUNCHER=${BOOTSTRAP}/sap.com~tc~bl~offline_launcher~impl.jar if [ -f "$DB_JARS" -a -f "$SAPDBCONNECT" -a -f "$LAUNCHER" ] then MYCP="$LAUNCHER" EXECMD="com.sap.engine.offline.OfflineToolStart com.sap.inst.jdbc.connect.JdbcCon ${SAPDBCONNECT}:${SECSTORE}:${DB_JARS}:${BOOTSTRAP} -sec $SID:$SID" fi fi if [ -n "$EXECMD" ] then output=`${JAVA_HOME}/bin/java -cp $MYCP $EXECMD 2> /dev/null` if [ $? -le 0 ] then rc=$OCF_SUCCESS else rc=$OCF_NOT_RUNNING fi else output="Cannot find all jar files needed for database monitoring." rc=$OCF_ERR_GENERIC fi fi fi if [ $rc -ne $OCF_SUCCESS ] then ocf_log err "The SAP database $SID ist not running: $output" fi return $rc } # # sapdatabase_status: Are there any database processes on this host ? # sapdatabase_status() { case $DBTYPE in ADA) SEARCH="$SID/db/pgm/kernel" SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` SNUM=2 ;; ORA) SEARCH="ora_[a-z][a-z][a-z][a-z]_" SUSER="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" SNUM=4 ;; DB6) SEARCH="db2[a-z][a-z][a-z]" SUSER="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" SNUM=2 ;; esac # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here # as the output might be to long. cnt=`ps efo args --user $SUSER 2> /dev/null | grep -c "$SEARCH"` if [ $cnt -ge $SNUM ] then rc=$OCF_SUCCESS else # ocf_log info "Database Instance $SID is not running on `hostname`" rc=$OCF_NOT_RUNNING fi return $rc } # # sapdatabase_recover: # sapdatabase_recover() { case $DBTYPE in ORA) recoutput=`oracle_recover` ;; ADA) recoutput=`maxdb_recover` ;; DB6) recoutput=`db6udb_recover` ;; esac sapdatabase_monitor 1 retcode=$? if [ $retcode -eq $OCF_SUCCESS ] then ocf_log info "Recover of SAP database $SID was successful: $recoutput" else ocf_log err "Recover of SAP database $SID failed: $recoutput" fi return $retcode } # # sapdatabase_validate: Check the symantic of the input parameters # sapdatabase_validate() { rc=$OCF_SUCCESS if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] then ocf_log err "Parsing parameter SID: '$SID' is not a valid system ID!" rc=$OCF_ERR_ARGS fi case "$DBTYPE" in ORA|ADA|DB6) ;; *) ocf_log err "Parsing parameter DBTYPE: '$DBTYPE' is not a supported database type!" rc=$OCF_ERR_ARGS ;; esac return $rc } # -# 'main' starts here... +# sapdatabase_init: initialize global variables at the beginning # +sapdatabase_init() { -if - ( [ $# -ne 1 ] ) -then - usage - exit $OCF_ERR_ARGS -fi - -# Set a tempfile and make sure to clean it up again -TEMPFILE="/tmp/SAPDatabase.$$.tmp" -trap trap_handler INT TERM - -# These operations don't require OCF instance parameters to be set -case "$1" in - meta-data) meta_data - exit $OCF_SUCCESS;; - - usage) usage - exit $OCF_SUCCESS;; - - methods) sapdatabase_methods - exit $?;; - - *);; -esac - -US=`id -u -n` -US=`echo $US` -if - [ $US != root ] -then - ocf_log err "$0 must be run as root" - exit $OCF_ERR_PERM -fi - -# mandatory parameter check -if [ -z "$OCF_RESKEY_SID" ]; then - ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!" - exit $OCF_ERR_ARGS -fi -SID=`echo "$OCF_RESKEY_SID"` - -if [ -z "$OCF_RESKEY_DBTYPE" ]; then - ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ORA,ADA,DB6)!" - exit $OCF_ERR_ARGS -fi -DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr "[a-z]" "[A-Z]"` +ocf_log warn "Usage of SAPDatabase resource agent without SAPHostAgent is deprecated. Please read documentation of SAPDatabase resource agent and follow SAP note 1031096 for the installation of SAPHostAgent." # optional OCF parameters, we try to guess which directories are correct EXESTARTDB="startdb" EXESTOPDB="stopdb" EXEDBCONNECT="R3trans" if [ -z "$OCF_RESKEY_DBJ2EE_ONLY" ]; then DBJ2EE_ONLY=0 else case "$OCF_RESKEY_DBJ2EE_ONLY" in 1|true|TRUE|yes|YES) DBJ2EE_ONLY=1 EXESTARTDB="startj2eedb" EXESTOPDB="stopj2eedb" EXEDBCONNECT="jdbcconnect.jar" ;; 0|false|FALSE|no|NO) DBJ2EE_ONLY=0;; *) ocf_log err "Parsing parameter DBJ2EE_ONLY: '$DBJ2EE_ONLY' is not a boolean value!" exit $OCF_ERR_ARGS ;; esac fi if [ -z "$OCF_RESKEY_NETSERVICENAME" ]; then case "$DBTYPE" in ORA|ora) NETSERVICENAME="LISTENER";; *) NETSERVICENAME="";; esac else NETSERVICENAME="$OCF_RESKEY_NETSERVICENAME" fi if [ -z "$OCF_RESKEY_STRICT_MONITORING" ]; then OCF_RESKEY_STRICT_MONITORING=0 else case "$OCF_RESKEY_STRICT_MONITORING" in 1|true|TRUE|yes|YES) OCF_RESKEY_STRICT_MONITORING=1;; 0|false|FALSE|no|NO) OCF_RESKEY_STRICT_MONITORING=0;; *) ocf_log err "Parsing parameter STRICT_MONITORING: '$OCF_RESKEY_STRICT_MONITORING' is not a boolean value!" exit $OCF_ERR_ARGS ;; esac fi PATHLIST=" $OCF_RESKEY_DIR_EXECUTABLE /usr/sap/$SID/*/exe /usr/sap/$SID/SYS/exe/run /sapmnt/$SID/exe " DIR_EXECUTABLE="" for EXEPATH in $PATHLIST do if [ -x $EXEPATH/$EXESTARTDB -a -x $EXEPATH/$EXESTOPDB -a -x $EXEPATH/$EXEDBCONNECT ] then DIR_EXECUTABLE=$EXEPATH SAPSTARTDB=$EXEPATH/$EXESTARTDB SAPSTOPDB=$EXEPATH/$EXESTOPDB SAPDBCONNECT=$EXEPATH/$EXEDBCONNECT break fi done if [ -z "$DIR_EXECUTABLE" ] then ocf_log warn "Cannot find $EXESTARTDB,$EXESTOPDB and $EXEDBCONNECT executable, please set DIR_EXECUTABLE parameter!" exit $OCF_NOT_RUNNING fi if [ $DBJ2EE_ONLY -eq 1 ] then if [ -n "$OCF_RESKEY_DIR_BOOTSTRAP" ] then BOOTSTRAP="$OCF_RESKEY_DIR_BOOTSTRAP" else BOOTSTRAP=`ls -1d /usr/sap/$SID/*/j2ee/cluster/bootstrap | head -1` fi if [ -n "$OCF_RESKEY_DIR_SECSTORE" ] then SECSTORE="$OCF_RESKEY_DIR_SECSTORE" else SECSTORE=/usr/sap/$SID/SYS/global/security/lib/tools fi if [ -n "$OCF_RESKEY_JAVA_HOME" ] then JAVA_HOME="$OCF_RESKEY_JAVA_HOME" PATH=$JAVA_HOME/bin:$PATH else if [ -n "$JAVA_HOME" ] then PATH=$JAVA_HOME/bin:$PATH else ocf_log err "Cannot find JAVA_HOME directory, please set JAVA_HOME parameter!" exit $OCF_NOT_RUNNING fi fi if [ -n "$OCF_RESKEY_DB_JARS" ] then DB_JARS=$OCF_RESKEY_DB_JARS else if [ -f "$BOOTSTRAP"/bootstrap.properties ]; then DB_JARS=`cat $BOOTSTRAP/bootstrap.properties | grep -i rdbms.driverLocation | sed -e 's/\\\:/:/g' | awk -F= '{print $2}'` fi fi fi if [ -z "$OCF_RESKEY_AUTOMATIC_RECOVER" ] then OCF_RESKEY_AUTOMATIC_RECOVER=0 else case "$OCF_RESKEY_AUTOMATIC_RECOVER" in 1|true|TRUE|yes|YES) OCF_RESKEY_AUTOMATIC_RECOVER=1;; 0|false|FALSE|no|NO) OCF_RESKEY_AUTOMATIC_RECOVER=0;; esac fi # as root user we need the library path to the SAP kernel to be able to call executables if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH export LD_LIBRARY_PATH fi sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" +} -# What kind of method was invoked? -case "$1" in - - start) sapdatabase_start - exit $?;; - - stop) sapdatabase_stop - exit $?;; - - monitor) - sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING - exit $?;; - - status) - sapdatabase_status - exit $?;; - - recover) sapdatabase_recover - exit $?;; - - validate-all) sapdatabase_validate - exit $?;; - - *) sapdatabase_methods - exit $OCF_ERR_UNIMPLEMENTED;; -esac +# Set a tempfile and make sure to clean it up again +TEMPFILE="/tmp/SAPDatabase.$$.tmp" +trap trap_handler INT TERM \ No newline at end of file diff --git a/heartbeat/sapdb.sh b/heartbeat/sapdb.sh new file mode 100644 index 000000000..9a6ab307e --- /dev/null +++ b/heartbeat/sapdb.sh @@ -0,0 +1,336 @@ +# +# sapdb.sh - for systems having SAPHostAgent installed +# (sourced by SAPDatabase) +# +# Description: This code is separated from the SAPDatabase agent to +# introduce new functions for systems which having +# SAPHostAgent installed. +# Someday it might be merged back into SAPDatabase agein. +# +# Author: Alexander Krauth, September 2010 +# Support: linux@sap.com +# License: GNU General Public License (GPL) +# Copyright: (c) 2010, 2012 Alexander Krauth +# + + +# +# background_check_saphostexec : Run a request to saphostexec in a separat task, to be able to react on a hanging process +# +background_check_saphostexec() { + timeout=600 + count=0 + + $SAPHOSTCTRL -function ListDatabases >/dev/null 2>&1 & + pid=$! + + while kill -0 $pid > /dev/null 2>&1 + do + sleep 0.1 + count=$(( $count + 1 )) + if [ $count -ge $timeout ]; then + kill -9 $pid >/dev/null 2>&1 + ocf_log warn "saphostexec did not respond to the method 'ListDatabases' within 60 seconds" + return $OCF_ERR_GENERIC # Timeout + fi + done + + # child already has finished, now evaluate it's returncode + wait $pid +} + +# +# cleanup_saphostexec : make sure to cleanup the SAPHostAgent in case of any +# misbehavior +# +cleanup_saphostexec() { + pkill -9 -f "$SAPHOSTEXEC" + pkill -9 -f "$SAPHOSTSRV" + oscolpid=`pgrep -f "$SAPHOSTOSCOL"` # we check saposcol pid, because it + # might not run under control of + # saphostexec + + # cleanup saposcol shared memory, otherwise it will not start again + if [ -n "$oscolpid" ];then + kill -9 $oscolpid + oscolipc=`ipcs -m | grep "4dbe " | awk '{print $2}'` + if [ -n "$oscolipc" ]; then + ipcrm -m $oscolipc + fi + fi +} + +# +# check_saphostexec : Before using saphostctrl we make sure that the +# saphostexec is running on the current node. +# +check_saphostexec() { + chkrc=$OCF_SUCCESS + running=`pgrep -f "$SAPHOSTEXEC" | wc -l` + + if [ $running -gt 0 ]; then + if background_check_saphostexec; then + return $OCF_SUCCESS + else + ocf_log warn "saphostexec did not respond to the method 'ListDatabases' correctly (rc=$?), it will be killed now" + running=0 + fi + fi + + if [ $running -eq 0 ]; then + ocf_log warn "saphostexec is not running on node `hostname`, it will be started now" + cleanup_saphostexec + output=`$SAPHOSTEXEC -restart 2>&1` + + # now make sure the daemon has been started and is able to respond + srvrc=1 + while [ $srvrc -ne 0 -a `pgrep -f "$SAPHOSTEXEC" | wc -l` -gt 0 ] + do + sleep 1 + background_check_saphostexec + srvrc=$? + done + + if [ $srvrc -eq 0 ] + then + ocf_log info "saphostexec on node `hostname` was restarted !" + chkrc=$OCF_SUCCESS + else + ocf_log error "saphostexec on node `hostname` could not be started! - $output" + chkrc=$OCF_ERR_GENERIC + fi + fi + + return $chkrc +} + + +# +# sapdatabase_start : Start the SAP database +# +sapdatabase_start() { + + check_saphostexec + rc=$? + + if [ $rc -eq $OCF_SUCCESS ] + then + sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" + + DBINST="" + if [ -n "$OCF_RESKEY_DBINSTANCE" ] + then + DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE " + fi + FORCE="" + if ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER + then + FORCE="-force" + fi + output=`$SAPHOSTCTRL -function StartDatabase -dbname $SID -dbtype $DBTYPE $DBINST $FORCE -service` + + sapdatabase_monitor 1 + rc=$? + + if [ $rc -eq 0 ] + then + ocf_log info "SAP database $SID started: $output" + rc=$OCF_SUCCESS + + sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" + else + ocf_log err "SAP database $SID start failed: $output" + rc=$OCF_ERR_GENERIC + fi + fi + + return $rc +} + +# +# sapdatabase_stop: Stop the SAP database +# +sapdatabase_stop() { + + check_saphostexec + rc=$? + + if [ $rc -eq $OCF_SUCCESS ] + then + sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" + + DBINST="" + if [ -n "$OCF_RESKEY_DBINSTANCE" ] + then + DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE " + fi + output=`$SAPHOSTCTRL -function StopDatabase -dbname $SID -dbtype $DBTYPE $DBINST -force -service` + + if [ $? -eq 0 ] + then + ocf_log info "SAP database $SID stopped: $output" + rc=$OCF_SUCCESS + else + ocf_log err "SAP database $SID stop failed: $output" + rc=$OCF_ERR_GENERIC + fi + fi + + sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" + + return $rc +} + + +# +# sapdatabase_monitor: Can the given database instance do anything useful? +# +sapdatabase_monitor() { + strict=$1 + rc=$OCF_SUCCESS + + if ! ocf_is_true $strict + then + sapdatabase_status + rc=$? + else + check_saphostexec + rc=$? + + if [ $rc -eq $OCF_SUCCESS ] + then + count=0 + + DBINST="" + if [ -n "$OCF_RESKEY_DBINSTANCE" ] + then + DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE " + fi + output=`$SAPHOSTCTRL -function GetDatabaseStatus -dbname $SID -dbtype $DBTYPE $DBINST` + + # we have to parse the output, because the returncode doesn't tell anything about the instance status + for SERVICE in `echo "$output" | grep -i 'Component[ ]*Name *[:=] [A-Za-z][A-Za-z0-9_]* (' | sed 's/^.*Component[ ]*Name *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i'` + do + COLOR=`echo "$output" | grep -i "Component[ ]*Name *[:=] *$SERVICE (" | sed 's/^.*Status *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i'` + STATE=0 + + case $COLOR in + Running) STATE=$OCF_SUCCESS;; + *) STATE=$OCF_NOT_RUNNING;; + esac + + SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'` + if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ] + then + if [ $STATE -eq $OCF_NOT_RUNNING ] + then + ocf_log err "SAP database service $SERVICE is not running with status $COLOR !" + rc=$STATE + fi + count=1 + fi + done + + if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] + then + ocf_log err "The resource does not run any services which this RA could monitor!" + rc=$OCF_ERR_ARGS + fi + + if [ $rc -ne $OCF_SUCCESS ] + then + ocf_log err "The SAP database $SID ist not running: $output" + fi + fi + fi + return $rc +} + + +# +# sapdatabase_status: Are there any database processes on this host ? +# +sapdatabase_status() { + case $DBTYPE in + ADA) SEARCH="$SID/db/pgm/kernel" + SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` + SNUM=2 + ;; + ORA) SEARCH="ora_[a-z][a-z][a-z][a-z]_" + SUSER="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" + SNUM=4 + ;; + DB6) SEARCH="db2[a-z][a-z][a-z]" + SUSER="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" + SNUM=2 + ;; + SYB) SEARCH="dataserver" + SUSER="syb`echo $SID | tr '[:upper:]' '[:lower:]'`" + SNUM=1 + ;; + HDB) SEARCH="hdb[a-z]*server" + SUSER="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" + SNUM=1 + ;; + esac + + cnt=`ps -u $SUSER -o args 2> /dev/null | grep -c $SEARCH` + [ $cnt -ge $SNUM ] && return $OCF_SUCCESS + return $OCF_NOT_RUNNING +} + + +# +# sapdatabase_recover: +# +sapdatabase_recover() { + OCF_RESKEY_AUTOMATIC_RECOVER=1 + sapdatabase_stop + sapdatabase_start +} + + +# +# sapdatabase_validate: Check the symantic of the input parameters +# +sapdatabase_validate() { + rc=$OCF_SUCCESS + if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] + then + ocf_log err "Parsing parameter SID: '$SID' is not a valid system ID!" + rc=$OCF_ERR_ARGS + fi + + case "$DBTYPE" in + ORA|ADA|DB6|SYB|HDB) ;; + *) ocf_log err "Parsing parameter DBTYPE: '$DBTYPE' is not a supported database type!" + rc=$OCF_ERR_ARGS ;; + esac + + return $rc +} + +# +# sapdatabase_init: initialize global variables at the beginning +# +sapdatabase_init() { +OCF_RESKEY_AUTOMATIC_RECOVER_default=0 +: ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}} + +if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ] +then + case $DBTYPE in + ORA) export OCF_RESKEY_MONITOR_SERVICES="Instance|Database|Listener" + ;; + ADA) export OCF_RESKEY_MONITOR_SERVICES="Database" + ;; + DB6) db2sid="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" + export OCF_RESKEY_MONITOR_SERVICES="${SID}|${db2sid}" + ;; + SYB) export OCF_RESKEY_MONITOR_SERVICES="Server|Database" + ;; + HDB) export OCF_RESKEY_MONITOR_SERVICES="hdbindexserver" + ;; + esac +fi +} diff --git a/heartbeat/slapd b/heartbeat/slapd index 5ac200a6d..1429f3c70 100755 --- a/heartbeat/slapd +++ b/heartbeat/slapd @@ -1,572 +1,579 @@ #!/bin/bash # # Stand-alone LDAP Daemon (slapd) # # Description: Manages Stand-alone LDAP Daemon (slapd) as an OCF resource in # an high-availability setup. # # Authors: Jeroen Koekkoek # nozawat@gmail.com # John Keith Hohm # # License: GNU General Public License (GPL) # Copyright: (C) 2011 Pagelink B.V. # # The OCF code was inspired by the Postfix resource script written by # Raoul Bhatia . # # The code for managing the slapd instance is based on the the slapd init # script found in Debian GNU/Linux 6.0. # # OCF parameters: # OCF_RESKEY_slapd # OCF_RESKEY_ldapsearch # OCF_RESKEY_config # OCF_RESKEY_pidfile # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_services # OCF_RESKEY_watch_suffix # OCF_RESKEY_ignore_suffix # OCF_RESKEY_bind_dn # OCF_RESKEY_password # OCF_RESKEY_parameters # OCF_RESKEY_stop_escalate # ################################################################################ # Initialization: -: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat} -. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs : ${OCF_RESKEY_slapd="/usr/sbin/slapd"} : ${OCF_RESKEY_ldapsearch="ldapsearch"} : ${OCF_RESKEY_config=""} : ${OCF_RESKEY_pidfile=""} : ${OCF_RESKEY_user=""} : ${OCF_RESKEY_group=""} : ${OCF_RESKEY_services="ldap:///"} : ${OCF_RESKEY_watch_suffix=""} : ${OCF_RESKEY_ignore_suffix=""} : ${OCF_RESKEY_bind_dn=""} : ${OCF_RESKEY_password=""} : ${OCF_RESKEY_parameters=""} : ${OCF_RESKEY_stop_escalate=15} USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}" ORIG_IFS=$IFS NEWLINE=' ' ################################################################################ usage() { echo $USAGE >&2 } meta_data() { cat < 0.1 Resource script for Stand-alone LDAP Daemon (slapd). It manages a slapd instance as an OCF resource. Manages a Stand-alone LDAP Daemon (slapd) instance Full path to the slapd binary. For example, "/usr/sbin/slapd". Full path to slapd binary Full path to the ldapsearch binary. For example, "/usr/bin/ldapsearch". Full path to ldapsearch binary Full path to a slapd configuration directory or a slapd configuration file. For example, "/etc/ldap/slapd.d" or "/etc/ldap/slapd.conf". Full path to configuration directory or file File to read the PID from; read from olcPidFile/pidfile in config if not set. File to read PID from User name or id slapd will run with. The group id is also changed to this user's gid, unless the group parameter is used to override. User name or id slapd will run with Group name or id slapd will run with. Group name or id slapd will run with LDAP (and other scheme) URLs slapd will serve. For example, "ldap://127.0.0.1:389 ldaps:/// ldapi:///" LDAP (and other scheme) URLs to serve Suffix (database backend) that will be monitored for availability. Multiple suffixes can be specified by providing a space seperated list. By providing one or more suffixes here, the ignore_suffix parameter is discarded. All suffixes will be monitored if left blank. Suffix that will be monitored for availability. Suffix (database backend) that will not be monitored for availability. Multiple suffixes can be specified by providing a space seperated list. No suffix will be excluded if left blank. Suffix that will not be monitored for availability. Distinguished Name used to bind to the LDAP directory for testing. Leave blank to bind to the LDAP directory anonymously. Distinguished Name used to bind to the LDAP directory for testing. Password used to bind to the LDAP directory for testing. Password used to bind to the LDAP directory for testing. slapd may be called with additional parameters. Specify any of them here. Any additional parameters to slapd. Number of seconds to wait for shutdown (using SIGTERM) before resorting to SIGKILL Seconds before stop escalation to KILL END } terminate() { local pid=$1 local signal=$2 local recheck=${3-0} - local result + local rc local waited=0 - kill -$signal $pid >/dev/null 2>&1; result=$? + kill -$signal $pid >/dev/null 2>&1; rc=$? - while [ \( $result -eq 0 \) -a \( $recheck -eq 0 -o $waited -lt $recheck \) ]; do - kill -0 $pid >/dev/null 2>&1; result=$? + while [ \( $rc -eq 0 \) -a \( $recheck -eq 0 -o $waited -lt $recheck \) ]; do + kill -0 $pid >/dev/null 2>&1; rc=$? let "waited += 1" - if [ $result -eq 0 ]; then + if [ $rc -eq 0 ]; then sleep 1 fi done - if [ $result -ne 0 ]; then + if [ $rc -ne 0 ]; then return 0 fi return 1 } watch_suffix() { - local result + local rc if [ -n "$OCF_RESKEY_watch_suffix" ]; then if echo "'$OCF_RESKEY_watch_suffix'" | grep "'$1'" >/dev/null 2>&1; then - result=0 + rc=0 else - result=1 + rc=1 fi else if echo "'$OCF_RESKEY_ignore_suffix'" | grep "'$1'" >/dev/null 2>&1; then - result=1 + rc=1 else - result=0 + rc=0 fi fi - return $result + return $rc } slapd_pid() { local pid if [ -f "$pid_file" ]; then pid=`head -n 1 "$pid_file" 2>/dev/null` if [ "X$pid" != "X" ]; then echo "$pid" return $OCF_SUCCESS fi ocf_log err "slapd pid file '$pid_file' empty." return $OCF_ERR_GENERIC fi ocf_log info "slapd pid file '$pid_file' does not exist." return $OCF_NOT_RUNNING } slapd_status() { local pid=$1 local state=$? if [ $state -eq $OCF_SUCCESS ]; then if ! kill -0 $pid >/dev/null 2>&1; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi return $state } slapd_start() { local options local reason - local result + local rc local state slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_SUCCESS ]; then ocf_log info "slapd already running." return $state elif [ $state -eq $OCF_ERR_GENERIC ]; then return $state fi options="-u $user -g $group" if [ -d "$config" ]; then options="$options -F $config" elif [ -f "$config" ]; then options="$options -f $config" else ocf_log err "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi if [ -n "$parameters" ]; then options="$options $parameters" fi if [ -n "$services" ]; then - $slapd -h "$services" $options 2>&1; result=$? + $slapd -h "$services" $options 2>&1; rc=$? else - $slapd $options 2>&1; result=$? + $slapd $options 2>&1; rc=$? fi - if [ $result -ne 0 ]; then + if [ $rc -ne 0 ]; then ocf_log err "slapd returned error." return $OCF_ERR_GENERIC fi while true; do slapd_monitor start if [ $? = "$OCF_SUCCESS" ]; then break fi sleep 1 done ocf_log info "slapd started." return $OCF_SUCCESS } slapd_stop() { local pid - local result + local rc local state pid=`slapd_pid`; slapd_status $pid; state=$? if [ $state -eq $OCF_NOT_RUNNING ]; then ocf_log info "slapd already stopped." return $OCF_SUCCESS elif [ $state -eq $OCF_ERR_GENERIC ]; then return $state fi - terminate $pid TERM $OCF_RESKEY_stop_escalate; result=$? - if [ $result -ne 0 ]; then + terminate $pid TERM $OCF_RESKEY_stop_escalate; rc=$? + if [ $rc -ne 0 ]; then ocf_log err "slapd failed to stop. Escalating to KILL." - terminate $pid KILL; result=$? + terminate $pid KILL; rc=$? fi if [ -f "$pid_file" ]; then rm -f "$pid_file" >/dev/null 2>&1 fi ocf_log info "slapd stopped." return $OCF_SUCCESS } slapd_monitor() { local options - local result + local rc local state local suffix local suffixes local err_option="-err" slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_NOT_RUNNING ]; then if [ -z "$1" ];then if ! ocf_is_probe; then ocf_log err "slapd process not found." fi fi return $state elif [ $state -ne $OCF_SUCCESS ]; then ocf_log err "slapd returned error." return $state fi if [ -d "$config" ]; then for suffix in `find "$config"/'cn=config' -type f -name olcDatabase* -exec \ sed -ne 's/^[[:space:]]*olcSuffix:[[:space:]]\+\(.\+\)/\1/p' {} \;` do suffix=${suffix#\"*} suffix=${suffix%\"*} if watch_suffix $suffix; then suffixes="$suffixes $suffix" fi done elif [ -f "$config" ]; then for suffix in `sed -ne 's/^[[:space:]]*suffix[[:space:]]\+\(.\+\)/\1/p' "$config"` do suffix=${suffix#\"*} suffix=${suffix%\"*} if watch_suffix $suffix; then suffixes="$suffixes $suffix" fi done else ocf_log err "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi options="-LLL -s base -x" if [ -n "$bind_dn" ]; then options="$options -D '$bind_dn' -w '$password'" fi [ -z "$1" ] && err_option="" for suffix in $suffixes; do - ocf_run -q $err_option "$ldapsearch" -H "$services" -b "$suffix" $options >/dev/null 2>&1; result=$? + ocf_run -q $err_option "$ldapsearch" -H "$services" -b "$suffix" $options >/dev/null 2>&1; rc=$? - case "$result" in + case "$rc" in "0") ocf_log debug "slapd database with suffix '$suffix' reachable" ;; "49") ocf_log err "slapd database with suffix '$suffix' unreachable. Invalid credentials." return $OCF_ERR_CONFIGURED ;; *) - if [ -z "$1" ] || [ -n "$1" -a $result -ne 1 ]; then - ocf_log err "slapd database with suffix '$suffix' unreachable. exit code ($result)" + if [ -z "$1" ] || [ -n "$1" -a $rc -ne 1 ]; then + ocf_log err "slapd database with suffix '$suffix' unreachable. exit code ($rc)" state=$OCF_ERR_GENERIC fi ;; esac done return $state } slapd_validate_all() { check_binary "$slapd" check_binary "$ldapsearch" if [ -z "$pid_file" ]; then if [ -d "$config" ]; then pid_file=`sed -ne \ 's/^olcPidFile:[[:space:]]\+\(.\+\)[[:space:]]*/\1/p' \ "$config"/'cn=config.ldif' 2>/dev/null` elif [ -f "$config" ]; then pid_file=`sed -ne \ 's/^pidfile[[:space:]]\+\(.\+\)/\1/p' \ "$config" 2>/dev/null` else ocf_log err "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi fi if [ -z "$user" ]; then user=`id -nu 2>/dev/null` elif ! id "$user" >/dev/null 2>&1; then ocf_log err "slapd user '$user' does not exist" return $OCF_ERR_INSTALLED fi if [ -z "$group" ]; then group=`id -ng 2>/dev/null` elif ! grep "^$group:" /etc/group >/dev/null 2>&1; then ocf_log err "slapd group '$group' does not exist" return $OCF_ERR_INSTALLED fi + pid_dir=`dirname "$pid_file"` + if [ ! -d "$pid_dir" ]; then + mkdir -p "$pid_dir" + chown -R "$user" "$pid_dir" + chgrp -R "$group" "$pid_dir" + fi + return $OCF_SUCCESS } # # Main # slapd=$OCF_RESKEY_slapd ldapsearch=$OCF_RESKEY_ldapsearch config=$OCF_RESKEY_config user=$OCF_RESKEY_user group=$OCF_RESKEY_group services=$OCF_RESKEY_services bind_dn=$OCF_RESKEY_bind_dn password=$OCF_RESKEY_password parameters=$OCF_RESKEY_parameters pid_file=$OCF_RESKEY_pidfile if [ -z "$config" ]; then if [ -e "/etc/ldap/slapd.d" ]; then config="/etc/ldap/slapd.d" else config="/etc/ldap/slapd.conf" fi fi if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac slapd_validate_all rc=$? [ $rc -eq $OCF_SUCCESS ] || exit $rc case $1 in status) slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_SUCCESS ]; then ocf_log debug "slapd is running." elif [ $state -eq $OCF_NOT_RUNNING ]; then ocf_log debug "slapd is stopped." fi exit $state ;; start) slapd_start exit $? ;; stop) slapd_stop exit $? ;; monitor) slapd_monitor; state=$? exit $state ;; validate-all) exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/tomcat b/heartbeat/tomcat index fdc67be4c..c52537585 100755 --- a/heartbeat/tomcat +++ b/heartbeat/tomcat @@ -1,458 +1,496 @@ #!/bin/sh # # Description: Manages a Tomcat Server as an OCF High-Availability # resource under Heartbeat/LinuxHA control # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # Copyright (c) 2007 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # ####################################################################### # OCF parameters: # OCF_RESKEY_tomcat_name - The name of the resource. Default is tomcat # OCF_RESKEY_script_log - A destination of the log of this script. Default /var/log/OCF_RESKEY_tomcat_name.log # OCF_RESKEY_tomcat_stop_timeout - Time-out at the time of the stop. Default is 5. DEPRECATED # OCF_RESKEY_tomcat_suspend_trialcount - The re-try number of times awaiting a stop. Default is 10. DEPRECATED -# OCF_RESKEY_tomcat_user - A user name to start a resource. Default is root +# OCF_RESKEY_tomcat_user - A user name to start a resource. # OCF_RESKEY_statusurl - URL for state confirmation. Default is http://127.0.0.1:8080 # OCF_RESKEY_java_home - Home directory of Java. Default is none # OCF_RESKEY_java_opts - Options to pass to Java JVM for start and stop. Default is none # OCF_RESKEY_catalina_home - Home directory of Tomcat. Default is none # OCF_RESKEY_catalina_base - Base directory of Tomcat. Default is OCF_RESKEY_catalina_home # OCF_RESKEY_catalina_pid - A PID file name of Tomcat. Default is OCF_RESKEY_catalina_home/logs/catalina.pid # OCF_RESKEY_tomcat_start_opts - Start options of Tomcat. Default is none. # OCF_RESKEY_catalina_opts - CATALINA_OPTS environment variable. Default is none. # OCF_RESKEY_catalina_rotate_log - Control catalina.out logrotation flag. Default is NO. # OCF_RESKEY_catalina_rotatetime - catalina.out logrotation time span(seconds). Default is 86400. ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ############################################################################ # Usage usage() { cat <<-! usage: $0 action action: start start Tomcat stop stop Tomcat status return the status of Tomcat, up or down monitor return TRUE if Tomcat appears to be working. You have to have installed $WGETNAME for this to work. meta-data show meta data message validate-all validate the instance parameters ! } ############################################################################ # Check tomcat service availability isrunning_tomcat() { if ! have_binary $WGET; then ocf_log err "Monitoring not supported by $OCF_RESOURCE_INSTANCE" ocf_log info "Please make sure that wget is available" return $OCF_ERR_CONFIGURED fi $WGET -O /dev/null $RESOURCE_STATUSURL >/dev/null 2>&1 } ############################################################################ # isalive_tomcat() { - pgrep -f "${SEARCH_STR}" > /dev/null + if [ -f $CATALINA_PID ]; then + PID=`head -n 1 $CATALINA_PID` + # Retry message for restraint + kill -s 0 $PID >/dev/null 2>&1 + return $? + fi + # No PID file + false } + ############################################################################ # Check tomcat process and service availability monitor_tomcat() { isalive_tomcat || return $OCF_NOT_RUNNING isrunning_tomcat || return $OCF_NOT_RUNNING return $OCF_SUCCESS } ############################################################################ # Execute catalina.out log rotation rotate_catalina_out() { # Look for rotatelogs/rotatelogs2 if [ -x /usr/sbin/rotatelogs ]; then ROTATELOGS=/usr/sbin/rotatelogs elif [ -x /usr/sbin/rotatelogs2 ]; then ROTATELOGS=/usr/sbin/rotatelogs2 else ocf_log warn "rotatelogs command not found." return 1 fi # Clean up and set permissions on required files rm -rf "$CATALINA_HOME"/temp/* "$CATALINA_HOME/logs/catalina.out" mkfifo -m700 "$CATALINA_HOME/logs/catalina.out" chown --dereference "$RESOURCE_TOMCAT_USER" "$CATALINA_HOME/logs/catalina.out" || true # -s is required because tomcat5.5's login shell is /bin/false su - -s /bin/sh $RESOURCE_TOMCAT_USER \ -c "$ROTATELOGS -l \"$CATALINA_HOME/logs/catalina_%F.log\" $CATALINA_ROTATETIME" \ < "$CATALINA_HOME/logs/catalina.out" > /dev/null 2>&1 & } ############################################################################ # Start Tomcat start_tomcat() { cd "$CATALINA_HOME/bin" monitor_tomcat if [ $? = $OCF_SUCCESS ]; then return $OCF_SUCCESS fi # Remove $CATALINA_PID if it exists rm -f $CATALINA_PID #ocf_log debug "catalina.out rotation FLG = ${CATALINA_ROTATE_LOG}" if [ ${CATALINA_ROTATE_LOG} = "YES" ]; then rotate_catalina_out if [ $? = 0 ]; then ocf_log debug "Rotate catalina.out succeeded." else ocf_log warn "Rotate catalina.out failed. Starting tomcat without catalina.out rotation." fi fi echo "`date "+%Y/%m/%d %T"`: start ===========================" >> "$TOMCAT_CONSOLE" ocf_log debug "CATALINA_OPTS value = ${CATALINA_OPTS}" if [ "$RESOURCE_TOMCAT_USER" = RUNASIS ]; then "$CATALINA_HOME/bin/catalina.sh" start $TOMCAT_START_OPTS \ >> "$TOMCAT_CONSOLE" 2>&1 & else cat<<-END_TOMCAT_START | su - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1 & export JAVA_HOME=${JAVA_HOME} export JAVA_OPTS="${JAVA_OPTS}" export CATALINA_HOME=${CATALINA_HOME} export CATALINA_BASE=${CATALINA_BASE} export CATALINA_PID=${CATALINA_PID} export CATALINA_OPTS="${CATALINA_OPTS}" $CATALINA_HOME/bin/catalina.sh start ${TOMCAT_START_OPTS} END_TOMCAT_START fi while true; do monitor_tomcat if [ $? = $OCF_SUCCESS ]; then break fi ocf_log debug "start_tomcat[$TOMCAT_NAME]: retry monitor_tomcat" sleep 3 done return $OCF_SUCCESS } ############################################################################ # Stop Tomcat stop_tomcat() { STOP_TIMEOUT=$((OCF_RESKEY_CRM_meta_timeout/1000-1)) cd "$CATALINA_HOME/bin" echo "`date "+%Y/%m/%d %T"`: stop ###########################" >> "$TOMCAT_CONSOLE" if [ "$RESOURCE_TOMCAT_USER" = RUNASIS ]; then "$CATALINA_HOME/bin/catalina.sh" stop $STOP_TIMEOUT -force \ >> "$TOMCAT_CONSOLE" 2>&1 else cat<<-END_TOMCAT_STOP | su - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1 export JAVA_HOME=${JAVA_HOME} export JAVA_OPTS="${JAVA_OPTS}" export CATALINA_HOME=${CATALINA_HOME} export CATALINA_BASE=${CATALINA_BASE} export CATALINA_PID=${CATALINA_PID} $CATALINA_HOME/bin/catalina.sh stop $STOP_TIMEOUT -force END_TOMCAT_STOP fi lapse_sec=0 while isalive_tomcat; do sleep 1 lapse_sec=`expr $lapse_sec + 1` ocf_log debug "stop_tomcat[$TOMCAT_NAME]: stop failed, killing with SIGKILL ($lapse_sec)" - pkill -KILL -f "${SEARCH_STR}" + if [ -f $CATALINA_PID ]; then + PID=`head -n 1 $CATALINA_PID` + kill -KILL $PID + fi done if [ ${CATALINA_ROTATE_LOG} = "YES" ]; then rm -f "$CATALINA_PID" "${CATALINA_HOME}/logs/catalina.out" else rm -f "$CATALINA_PID" fi return $OCF_SUCCESS } -status_tomcat() -{ - return $OCF_SUCCESS -} - - metadata_tomcat() { cat < 1.0 Resource script for Tomcat. It manages a Tomcat instance as a cluster resource. Manages a Tomcat servlet environment instance to Tomcat process on start. Used to ensure process is still running and must be unique. ]]> The name of the resource Log file, used during start and stop operations. Log file Time-out for stop operation. DEPRECATED Time-out for the stop operation. DEPRECATED Maximum number of times to retry stop operation before suspending and killing Tomcat. DEPRECATED. Does not retry. Max retry count for stop operation. DEPRECATED The user who starts Tomcat. The user who starts Tomcat URL for state confirmation. URL for state confirmation Home directory of Java. Home directory of Java Java JVM options used on start and stop. Java options parsed to JVM, used on start and stop. Home directory of Tomcat. Home directory of Tomcat Instance directory of Tomcat Instance directory of Tomcat, defaults to catalina_home A PID file name for Tomcat. A PID file name for Tomcat Tomcat start options. Tomcat start options Catalina options, for the start operation only. Catalina options Rotate catalina.out flag. Rotate catalina.out flag catalina.out rotation interval (seconds). catalina.out rotation interval (seconds) END return $OCF_SUCCESS } validate_all_tomcat() { ocf_log info "validate_all_tomcat[$TOMCAT_NAME]" return $OCF_SUCCESS } # ### tomcat RA environment variables # +COMMAND=$1 TOMCAT_NAME="${OCF_RESKEY_tomcat_name-tomcat}" TOMCAT_CONSOLE="${OCF_RESKEY_script_log-/var/log/$TOMCAT_NAME.log}" RESOURCE_TOMCAT_USER="${OCF_RESKEY_tomcat_user-RUNASIS}" RESOURCE_STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}" JAVA_HOME="${OCF_RESKEY_java_home}" JAVA_OPTS="${OCF_RESKEY_java_opts}" CATALINA_HOME="${OCF_RESKEY_catalina_home}" CATALINA_BASE="${OCF_RESKEY_catalina_base-${OCF_RESKEY_catalina_home}}" CATALINA_PID="${OCF_RESKEY_catalina_pid-$CATALINA_HOME/logs/catalina.pid}" TOMCAT_START_OPTS="${OCF_RESKEY_tomcat_start_opts}" CATALINA_OPTS="-Dname=$TOMCAT_NAME ${OCF_RESKEY_catalina_opts}" CATALINA_ROTATE_LOG="${OCF_RESKEY_catalina_rotate_log-NO}" CATALINA_ROTATETIME="${OCF_RESKEY_catalina_rotatetime-86400}" -SEARCH_STR="\\""${CATALINA_OPTS}" + +LSB_STATUS_STOPPED=3 +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi +case "$COMMAND" in + meta-data) metadata_tomcat; exit $OCF_SUCCESS;; + help|usage) usage; exit $OCF_SUCCESS;; +esac + +if [ ! -d "$JAVA_HOME" -o ! -d "$CATALINA_HOME" -o ! -d "$CATALINA_BASE" ]; then + case $COMMAND in + stop) exit $OCF_SUCCESS;; + monitor) exit $OCF_NOT_RUNNING;; + status) exit $LSB_STATUS_STOPPED;; + esac + ocf_log err "JAVA_HOME or CATALINA_HOME or CATALINA_BASE does not exist." + exit $OCF_ERR_INSTALLED +fi export JAVA_HOME JAVA_OPTS CATALINA_HOME CATALINA_BASE CATALINA_PID CATALINA_OPTS JAVA=${JAVA_HOME}/bin/java +if [ ! -x "$JAVA" ]; then + case $COMMAND in + stop) exit $OCF_SUCCESS;; + monitor) exit $OCF_NOT_RUNNING;; + status) exit $LSB_STATUS_STOPPED;; + esac + ocf_log err "java command does not exist." + exit $OCF_ERR_INSTALLED +fi + # # ------------------ # the main script # ------------------ # - -COMMAND=$1 - case "$COMMAND" in start) ocf_log debug "[$TOMCAT_NAME] Enter tomcat start" start_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat start $func_status" exit $func_status ;; stop) ocf_log debug "[$TOMCAT_NAME] Enter tomcat stop" stop_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat stop $func_status" exit $func_status ;; status) - status_tomcat + if monitor_tomcat; then + echo tomcat instance $TOMCAT_NAME is running + exit $OCF_SUCCESS + else + echo tomcat instance $TOMCAT_NAME is stopped + exit $OCF_NOT_RUNNING + fi exit $? ;; monitor) #ocf_log debug "[$TOMCAT_NAME] Enter tomcat monitor" monitor_tomcat func_status=$? #ocf_log debug "[$TOMCAT_NAME] Leave tomcat monitor $func_status" exit $func_status ;; meta-data) metadata_tomcat exit $? ;; validate-all) validate_all_tomcat exit $? ;; usage|help) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/ldirectord/ldirectord.in b/ldirectord/ldirectord.in index 646d68727..12df5d072 100644 --- a/ldirectord/ldirectord.in +++ b/ldirectord/ldirectord.in @@ -1,5216 +1,5216 @@ #!/usr/bin/perl -w ###################################################################### # ldirectord http://www.vergenet.net/linux/ldirectord/ # Linux Director Daemon - run "perldoc ldirectord" for details # # 1999-2006 (C) Jacob Rief , # Horms and others # # License: GNU General Public License (GPL) # # Note: * The original author of this software was Jacob Rief circa 1999 # * It was maintained by Jacob Rief and Horms # from November 1999 to July 2003. # * From July 2003 Horms is the maintainer # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # 02111-1307 USA # ###################################################################### # A Brief history of versions: # # From oldest to newest # 1.1-1.144: ldirecord maintained in CVS HEAD branch # 1.145-1.186: ldirectord.in maintained in CVS HEAD BRANCH # 1.186-ha-VERSION: ldirectord.in maintained in mercurial =head1 NAME ldirectord - Linux Director Daemon Daemon to monitor remote services and control Linux Virtual Server =head1 SYNOPSIS B [B<-d|--debug>] [--] [I] B | B | B | B | B | B | B B [B<-h|-?|--help|-v|--version>] =head1 DESCRIPTION B is a daemon to monitor and administer real servers in a cluster of load balanced virtual servers. B typically is started from heartbeat but can also be run from the command line. On startup B reads the file B<@sysconfdir@/ha.d/conf/>I. After parsing the file, entries for virtual servers are created on the LVS. Now at regular intervals the specified real servers are monitored and if they are considered alive, added to a list for each virtual server. If a real server fails, it is removed from that list. Only one instance of B can be started for each configuration, but more instances of B may be started for different configurations. This helps to group clusters of services. Normally one would put an entry inside B<@sysconfdir@/ha.d/haresources> I to start ldirectord from heartbeat. =head1 OPTIONS I: This is the name for the configuration as specified in the file B<@sysconfdir@/ha.d/conf/>I B<-d|--debug> Don't start as daemon and log verbosely. B<-h|--help> Print user manual and exit. B<-v|--version> Print version and exit. B the daemon for the specified configuration. B the daemon for the specified configuration. This is the same as sending a TERM signal to the running daemon. B the daemon for the specified configuration. The same as stopping and starting. B the configuration file. This is only useful for modifications inside a virtual server entry. It will have no effect on adding or removing a virtual server block. This is the same as sending a HUP signal to the running daemon. B of the running daemon for the specified configuration. =head1 SYNTAX =head2 Description of how to write configuration files BI<(ip_address|hostname:portnumber|servicename)|firewall-mark> Defines a virtual service by IP-address (or hostname) and port (or servicename) or firewall-mark. A firewall-mark is an integer greater than zero. The configuration of marking packets is controlled using the C<-m> option to B(8). All real services and flags for a virtual service must follow this line immediately and be indented. BI Timeout in seconds for connect, external, external-perl and ping checks. If the timeout is exceeded then the real server is declared dead. If defined in a virtual server section then the global value is overridden. If undefined then the value of negotiatetimeout is used. negotiatetimeout is also a global value that may be overridden by a per-virtual setting. If both checktimeout and negotiatetimeout are unset, the default is used. Default: 5 seconds BI Timeout in seconds for negotiate checks. If defined in a virtual server section then the global value is overridden. If undefined then the value of connecttimeout is used. connecttimeout is also a global value that may be overridden by a per-virtual setting. If both negotiatetimeout and connecttimeout are unset, the default is used. Default: 30 seconds BI Defines the number of second between server checks. When fork=no this option defines the amount of time ldirectord sleeps between running all of the realserver checks in all virtual service pools. When fork=yes this option defines the amount of time each forked child sleeps per virtual service pool after running all realserver checks for that pool. If set in the virtual server section then the global value is overridden, but ONLY if using forking mode (BI). Default: 10 seconds BI This option is deprecated and slated for removal in a future version. Please see the 'failurecount' option. The number of times a check will be attempted before it is considered to have failed. Only works with ping checks. Note that the checktimeout/negotiatetimeout is additive, so if a connect check is used, checkcount is 3 and checktimeout is 2 seconds, then a total of 6 seconds worth of timeout will occur before the check fails. If defined in a virtual server section then the global value is overridden. Default: 1 BI The number of consecutive times a failure will have to be reported by a check before the realserver is considered to have failed. A value of 1 will have the realserver considered failed on the first failure. A successful check will reset the failure counter to 0. If defined in a virtual server section then the global value is overridden. Default: 1 BB | B Defines if should continuously check the configuration file for modification. If this is set to 'yes' and the configuration file changed on disk and its modification time (mtime) is newer than the previous version, the configuration is automatically reloaded. Default: no BIB<"> If this directive is defined, B automatically calls the executable I after the configuration file has changed on disk. This is useful to update the configuration file through B on the other heartbeated host. The first argument to the callback is the name of the configuration. This directive might also be used to restart B automatically after the configuration file changed on disk. However, if B is set to yes, the configuration is reloaded anyway. BI [B | B | B] the server onto which a webservice is redirected if all real servers are down. Typically this would be 127.0.0.1 with an emergency page. If defined in a virtual server section then the global value is overridden. BIB<"> If this directive is defined, the supplied script is executed whenever all real servers for a virtual service are down or when the first real server comes up again. In the first case, it is called with "start" as its first argument, in the latter with "stop". If defined in a virtual server section then the global value is overridden. BIB<">|syslog_facility An alternative logfile might be specified with this directive. If the logfile does not have a leading '/', it is assumed to be a syslog(3) facility name. Default: log directly to the file I. BI[, I]...B<"> A valid email address for sending alerts about the changed connection status to any real server defined in the virtual service. This option requires perl module MailTools to be installed. Automatically tries to send email using any of the built-in methods. See perldoc Mail::Mailer for more info on methods. Multiple addresses may be supplied, comma delimited. If defined in a virtual server section then the global value is overridden. BI A valid email address to use as the from address of the email alerts. You can use a plain email address or any RFC-compliant string for the From header in the body of an email message (such as: "ldirectord Alerts" ) Do not quote this string unless you want the quotes passed in as part of the From header. Default: unset, take system generated default (probably root@hostname) B I Delay in seconds between repeating email alerts while any given real server in the virtual service remains inaccessible. A setting of zero seconds will inhibit the repeating alerts. The email timing accuracy of this setting is dependent on the number of seconds defined in the checkinterval configuration option. If defined in a virtual server section then the global value is overridden. Default: 0 BB | B | B | B | B | B,... Comma delimited list of server states in which email alerts should be sent. B is a short-hand for "B,B,B,B". If B is specified, no other option may be specified, otherwise options are ored with each other. If defined in a virtual server section then the global value is overridden. Default: all BIB<"> A valid SMTP server address to use for sending email via SMTP. If defined in a virtual server section then the global value is overridden. BIB<"> Use this directive to start an instance of ldirectord for the named I. BB | B If I, then ldirectord does not go into background mode. All log-messages are redirected to stdout instead of a logfile. This is useful to run B supervised from daemontools. See http://untroubled.org/rpms/daemontools/ or http://cr.yp.to/daemontools.html for details. Default: I BB | B If I, then ldirectord will spawn a child process for every virtual server, and run checks against the real servers from them. This will increase response times to changes in real server status in configurations with many virtual servers. This may also use less memory then running many separate instances of ldirectord. Child processes will be automatically restarted if they die. Default: I BB | B If I, then when real or failback servers are determined to be down, they are not actually removed from the kernel's LVS table. Rather, their weight is set to zero which means that no new connections will be accepted. This has the side effect, that if the real server has persistent connections, new connections from any existing clients will continue to be routed to the real server, until the persistent timeout can expire. See L for more information on persistent connections. This side-effect can be avoided by running the following: echo 1 > /proc/sys/net/ipv4/vs/expire_quiescent_template If the proc file isn't present this probably means that the kernel doesn't have LVS support, LVS support isn't loaded, or the kernel is too old to have the proc file. Running ipvsadm as root should load LVS into the kernel if it is possible. If I, then the real or failback servers will be removed from the kernel's LVS table. The default is I. If defined in a virtual server section then the global value is overridden. Default: I BB | B If I, then when ldirectord exits it will remove all of the virtual server pools that it is managing from the kernel's LVS table. If I, then the virtual server pools it is managing and any real or failback servers listed in them at the time ldirectord exits will be left as-is. If you want to be able to stop ldirectord without having traffic to your realservers interrupted you will want to set this to I. If defined in a virtual server section then the global value is overridden. Default: I BI If this option is set ldirectord will look for a special file in the specified directory and, if found, force the status of the real server identified by the file to down, skipping the normal health check. This would be useful if you wish to force servers down for maintenance without having to modify the actual ldirectord configuration file. For example, given a realserver with IP 172.16.1.2, service on port 4444, and a resolvable reverse DNS entry pointing to "realserver2.example.com" ldirectord will check for the existence of the following files: =over =item 172.16.1.2:4444 =item 172.16.1.2 =item realserver2.example.com:4444 =item realserver2.example.com =item realserver2:4444 =item realserver2 =back If any one of those files is found then ldirectord will immediately force the status of the server to down as if the check had failed. Note: Since it checks for the IP/hostname without the port this means you can decide to place an entire realserver into maintenance across a large number of virtual service pools with a single file (if you were going to reboot the server, for instance) or include the port number and put just a particular service into maintenance. This option is not valid in a virtual server section. Default: disabled =head2 Section virtual The following commands must follow a B entry and must be indented with a minimum of 4 spaces or one tab. B Iip_address|hostname][:portnumber|servicename>] B | B | B [I] [B<">IB<", ">IB<">] Defines a real service by IP-address (or hostname) and port (or servicename). If the port is omitted then a 0 will be used, this is intended primarily for fwmark services where the port for real servers is ignored. Optionally a range of IPv4 addresses (or two hostnames) may be given, in which case each IPv4 address in the range will be treated as a real server using the given port. The second argument defines the forwarding method, must be B, B or B. The third argument is optional and defines the weight for that real server. If omitted then a weight of 1 will be used. The last two arguments are also optional. They define a request-receive pair to be used to check if a server is alive. They override the request-receive pair in the virtual server section. These two strings must be quoted. If the request string starts with I the IP-address and port of the real server is overridden, otherwise the IP-address and port of the real server is used. =head2 For TCP and UDP (non fwmark) virtual services, unless the forwarding method is B and the IP address of a real server is non-local (not present on a interface on the host running ldirectord) then the port of the real server will be set to that of its virtual service. That is, port-mapping is only available to if the real server is another machine and the forwarding method is B. This is due to the way that the underlying LVS code in the kernel functions. =head2 More than one of these entries may be inside a virtual section. The checktimeout, negotiatetimeout, checkcount, fallback, emailalert, emailalertfreq and quiescent options listed above may also appear inside a virtual section, in which case the global setting is overridden. BB | B | B | B | B | B | B | BI Type of check to perform. Negotiate sends a request and matches a receive string. Connect only attempts to make a TCP/IP connection, thus the request and receive strings may be omitted. If checktype is a number then negotiate and connect is combined so that after each N connect attempts one negotiate attempt is performed. This is useful to check often if a service answers and in much longer intervals a negotiating check is done. Ping means that ICMP ping will be used to test the availability of real servers. Ping is also used as the connect check for UDP services. Off means no checking will take place and no real or fallback servers will be activated. On means no checking will take place and real servers will always be activated. Default is I. BB | B | B | B | B | B | B | B | B | B | B | B | B | B | B | B | B | B | B | B The type of service to monitor when using checktype=negotiate. None denotes a service that will not be monitored. simpletcp sends the B string to the server and tests it against the B regexp. The other types of checks connect to the server using the specified protocol. Please see the B and B sections for protocol specific information. Default: =over 4 =item * Virtual server port is 21: ftp =item * Virtual server port is 25: smtp =item * Virtual server port is 53: dns =item * Virtual server port is 80: http =item * Virtual server port is 110: pop =item * Virtual server port is 119: nntp =item * Virtual server port is 143: imap =item * Virtual server port is 389: ldap =item * Virtual server port is 443: https =item * Virtual server port is 587: submission =item * Virtual server port is 993: imaps =item * Virtual server port is 995: pops =item * Virtual server port is 1521: oracle =item * Virtual server port is 1812: radius =item * Virtual server port is 3128: http_proxy =item * Virtual server port is 3306: mysql =item * Virtual server port is 5432: pgsql =item * Virtual server port is 5060: sip =item * Otherwise: none =back BIB<"> This setting is used if checktype is external or external-perl and is the command to be run to check the status of a real server. It should exit with status 0 if everything is ok, or non-zero otherwise. Four parameters are passed to the script: =over 4 =item * virtual server ip/firewall mark =item * virtual server port =item * real server ip =item * real server port =back If the checktype is external-perl then the command is assumed to be a Perl script and it is evaluated into an anonymous subroutine which is called at check time, avoiding a fork-exec. The argument signature and exit code conventions are identical to checktype external. That is, an external-perl checktype should also work as an external checktype. Default: /bin/true BI Number of port to monitor. Sometimes check port differs from service port. Default: port specified for each real server BIB<"> This object will be requested each checkinterval seconds on each real server. The string must be inside quotes. Note that this string may be overridden by an optional per real-server based request-string. For an HTTP/HTTPS check, this should be a relative URI, while it has to be absolute for the 'http_proxy' check type. In the latter case, this URI will be requested through the proxy backend that is being checked. For a DNS check this should the name of an A record, or the address of a PTR record to look up. For a MySQL, Oracle or PostgeSQL check, this should be an SQL SELECT query. The data returned is not checked, only that the answer is one or more rows. This is a required setting. For a simpletcp check, this string is sent verbatim except any occurrences of \n are replaced with a new line character. BIB<"> If the requested result contains this I, the real server is declared alive. The regexp must be inside quotes. Keep in mind that regexps are not plain strings and that you need to escape the special characters if they should as literals. Note that this regexp may be overridden by an optional per real-server based receive regexp. For a DNS check this should be any one the A record's addresses or any one of the PTR record's names. For a MySQL check, the receive setting is not used. B | B Sets the HTTP method which should be used to fetch the URI specified in the request-string. GET is the method used by default if the parameter is not set. If HEAD is used, the receive-string should be unset. Default: GET BIB<"> Used when using a negotiate check with HTTP or HTTPS. Sets the host header used in the HTTP request. In the case of HTTPS this generally needs to match the common name of the SSL certificate. If not set then the host header will be derived from the request url for the real server if present. As a last resort the IP address of the real server will be used. BIB<"> For FTP, IMAP, LDAP, MySQL, Oracle, POP and PostgreSQL, the username used to log in. For Radius the passwd is used for the attribute User-Name. For SIP, the username is used as both the to and from address for an OPTIONS query. Default: =over 4 =item * FTP: Anonymous =item * MySQL Oracle, and PostgreSQL: Must be specified in the configuration =item * SIP: ldirectord\@, hostname is derived as per the passwd option below. =item * Otherwise: empty string, which denotes that case authentication will not be attempted. =back BIB<"> Password to use to login to FTP, IMAP, LDAP, MySQL, Oracle, POP, PostgreSQL and SIP servers. For Radius the passwd is used for the attribute User-Password. Default: =over 4 =item * FTP: ldirectord\@, where hostname is the environment variable HOSTNAME evaluated at run time, or sourced from uname if unset. =item * Otherwise: empty string. In the case of LDAP, MySQL, Oracle, and PostgreSQL this means that authentication will not be performed. =back BIB<"> Database to use for MySQL, Oracle and PostgreSQL servers, this is the database that the query (set by B above) will be performed against. This is a required setting. BIB<"> Secret to use for Radius servers, this is the secret used to perform an Access-Request with the username (set by B above) and passwd (set by B above). Default: empty string B I Scheduler to be used by LVS for loadbalancing. For an information on the available sehedulers please see the ipvsadm(8) man page. Default: "wrr" B I Number of seconds for persistent client connections. B I | I Netmask to be used for granularity of persistent client connections. IPv4 netmask should be specified in dotted quad notation. IPv6 netmask should be specified as a prefix length between 1 and 128. B | B | B Protocol to be used. If the virtual is specified as an IP address and port then it must be one of tcp or udp. If a firewall mark then the protocol must be fwm. Default: =over 4 =item * Virtual is an IP address and port, and the port is not 53: tcp =item * Virtual is an IP address and port, and the port is 53: udp =item * Virtual is a firewall mark: fwm =back BIB<"> File to continuously log the real service checks to for this virtual service. This is useful for monitoring when and why real services were down or for statistics. The log format is: [timestamp|pid|real_service_id|status|message] Default: no separate logging of service checks. =head1 IPv6 Directives for IPv6 are virtual6, real6, fallback6. IPv6 addresses specified for virtual6, real6, fallback6 and a file of maintenance directory should be enclosed by brackets ([2001:db8::abcd]:80). Following checktype and service are supported. BB | B | B | B | B | B | BI BB | B | B | B | B =head1 FILES B<@sysconfdir@/ha.d/ldirectord.cf> B BIB<.pid> B =head1 SEE ALSO L, L Ldirectord Web Page: http://www.vergenet.net/linux/ldirectord/ =head1 AUTHORS Horms Jacob Rief =cut use strict; # Set defaults for configuration variables in the "set_defaults" function use vars qw( $VERSION_STR $AUTOCHECK $CHECKINTERVAL $LDIRECTORD $LDIRLOG $NEGOTIATETIMEOUT $DEFAULT_NEGOTIATETIMEOUT $RUNPID $CHECKTIMEOUT $DEFAULT_CHECKTIMEOUT $CHECKCOUNT $FAILURECOUNT $QUIESCENT $FORKING $EMAILALERT $EMAILALERTFREQ $EMAILALERTSTATUS $EMAILALERTFROM $SMTP $CLEANSTOP $MAINTDIR $CALLBACK $CFGNAME $CMD $CONFIG $DEBUG $FALLBACK $FALLBACK6 $FALLBACKCOMMAND $SUPERVISED $IPVSADM $checksum $DAEMON_STATUS $DAEMON_STATUS_STARTING $DAEMON_STATUS_RUNNING $DAEMON_STATUS_STOPPING $DAEMON_STATUS_RELOADING $DAEMON_STATUS_ALL $DAEMON_TERM $DAEMON_HUP $DAEMON_CHLD $opt_d $opt_h $stattime %LD_INSTANCE @OLDVIRTUAL @REAL @VIRTUAL $HOSTNAME %EMAILSTATUS %FORK_CHILDREN $SERVICE_UP $SERVICE_DOWN %check_external_perl__funcs $CRLF ); $VERSION_STR = "Linux Director v1.186-ha"; $DAEMON_STATUS_STARTING = 0x1; $DAEMON_STATUS_RUNNING = 0x2; $DAEMON_STATUS_STOPPING = 0x4; $DAEMON_STATUS_RELOADING = 0x8; $DAEMON_STATUS_ALL = $DAEMON_STATUS_STARTING | $DAEMON_STATUS_RUNNING | $DAEMON_STATUS_STOPPING | $DAEMON_STATUS_RELOADING; $SERVICE_UP = 0; $SERVICE_DOWN =1; # default values $DAEMON_TERM = undef; $DAEMON_HUP = undef; $LDIRECTORD = ld_find_cmd("ldirectord", 1); if (! defined $LDIRECTORD) { $LDIRECTORD = "@sbindir@/ldirectord"; } $RUNPID = "/var/run/ldirectord"; $CRLF = "\x0d\x0a"; # Set global configuration default values: set_defaults(); use Getopt::Long; use Pod::Usage; #use English; #use Time::HiRes qw( gettimeofday tv_interval ); use Socket; use Socket6; use Sys::Hostname; use POSIX qw(setsid :sys_wait_h); use Sys::Syslog qw(:DEFAULT setlogsock); BEGIN { # wrap exit() to preserve replacability *CORE::GLOBAL::exit = sub { CORE::exit(@_ ? shift : 0); }; } # command line options my @OLD_ARGV = @ARGV; my $opt_d = ''; my $opt_h = ''; my $opt_v = ''; Getopt::Long::Configure ("bundling", "no_auto_abbrev", "require_order"); GetOptions("debug|d" => \$opt_d, "help|h|?" => \$opt_h, "version|v" => \$opt_v) or usage(); # main code $DEBUG = $opt_d ? 3 : 0; if ($opt_h) { exec_wrapper("/usr/bin/perldoc -U $LDIRECTORD"); &ld_exit(127, "Exec failed"); } if ($opt_v) { print("$VERSION_STR\n" . "1999-2006 Jacob Rief, Horms and others\n" . "\n". "\n" . "ldirectord comes with ABSOLUTELY NO WARRANTY.\n" . "This is free software, and you are welcome to redistribute it\n". "under certain conditions. " . "See the GNU General Public Licence for details.\n"); &ld_exit(0, ""); } if ($DEBUG>0 and -f "./ipvsadm") { $IPVSADM="./ipvsadm"; } else { if (-x "/sbin/ipvsadm") { $IPVSADM="/sbin/ipvsadm"; } elsif (-x "/usr/sbin/ipvsadm") { $IPVSADM="/usr/sbin/ipvsadm"; } else { die "Can not find ipvsadm"; } } # There is a memory leak in perl's socket code when # the default IO layer is used. So use "perlio" unless # something else has been explicitly set. # http://archive.develooper.com/perl5-porters@perl.org/msg85468.html unless(defined($ENV{'PERLIO'})) { $ENV{'PERLIO'} = "perlio"; exec_wrapper($0, @OLD_ARGV); } $DAEMON_STATUS = $DAEMON_STATUS_STARTING; ld_init(); ld_setup(); ld_start(); ld_cmd_children("start", %LD_INSTANCE); $DAEMON_STATUS = $DAEMON_STATUS_RUNNING; ld_main(); &ld_rm_file("$RUNPID.$CFGNAME.pid"); &ld_exit(0, "Reached end of \"main\""); # functions sub ld_init { # install signal handlers (this covers TERM) #require Net::LDAP; $SIG{'INT'} = \&ld_handler_term; $SIG{'QUIT'} = \&ld_handler_term; $SIG{'ILL'} = \&ld_handler_term; $SIG{'ABRT'} = \&ld_handler_term; $SIG{'FPE'} = \&ld_handler_term; $SIG{'SEGV'} = \&ld_handler_term; $SIG{'TERM'} = \&ld_handler_term; $SIG{'BUS'} = \&ld_handler_term; $SIG{'SYS'} = \&ld_handler_term; $SIG{'XCPU'} = \&ld_handler_term; $SIG{'XFSZ'} = \&ld_handler_term; $SIG{'IOT'} = \&ld_handler_term; # This used to call a signal handler, that logged a message # However, this typically goes to syslog and if syslog # is playing up a loop will occur. $SIG{'PIPE'} = "IGNORE"; # HUP is actually used $SIG{'HUP'} = \&ld_handler_hup; # Reap Children $SIG{'CHLD'} = \&ld_handler_chld; if (defined $ENV{HOSTNAME}) { $HOSTNAME = "$ENV{HOSTNAME}"; } else { use POSIX "uname"; my ($s, $n, $r, $v, $m) = uname; $HOSTNAME = $n; } # search for the correct configuration file if ( !defined $ARGV[0] ) { usage(); } if ( defined $ARGV[0] && defined $ARGV[1] ) { $CONFIG = $ARGV[0]; if ($CONFIG =~ /([^\/]+)$/) { $CFGNAME = $1; } $CMD = $ARGV[1]; } elsif ( defined $ARGV[0] ) { $CONFIG = "ldirectord.cf"; $CFGNAME = "ldirectord"; $CMD = $ARGV[0]; } if ( $CMD ne "start" and $CMD ne "stop" and $CMD ne "status" and $CMD ne "restart" and $CMD ne "try-restart" and $CMD ne "reload" and $CMD ne "force-reload") { usage(); } if ( -f "@sysconfdir@/ha.d/$CONFIG" ) { $CONFIG = "@sysconfdir@/ha.d/$CONFIG"; } elsif ( -f "@sysconfdir@/ha.d/conf/$CONFIG" ) { $CONFIG = "@sysconfdir@/ha.d/conf/$CONFIG"; } elsif ( ! -f "$CONFIG" ) { init_error("Config file $CONFIG not found"); } read_config(); undef @OLDVIRTUAL; { my $log_str = "Invoking ldirectord invoked as: $0 "; for my $i (@ARGV) { $log_str .= $i . " "; } ld_log($log_str); } my $oldpid; my $filepid; if (open(FILE, "<$RUNPID.$CFGNAME.pid")) { $_ = ; chomp; $filepid = $_; close(FILE); # Check to make sure this isn't a stale pid file if (open(FILE, "; if (/ldirectord/) { $oldpid = $filepid; } close(FILE); } } if (defined $oldpid) { if ($CMD eq "start") { ld_exit(0, "Exiting from ldirectord $CMD"); } elsif ($CMD eq "stop") { kill 15, $oldpid; ld_exit(0, "Exiting from ldirectord $CMD"); } elsif ($CMD eq "restart" or $CMD eq "try-restart") { kill 15, $oldpid; while (-f "$RUNPID.$CFGNAME.pid") { # wait until old pid file is removed sleep 1; } # N.B Fall through } elsif ($CMD eq "reload" or $CMD eq "force-reload") { kill 1, $oldpid; ld_exit(0, "Exiting from ldirectord $CMD"); } else { # status print STDERR "ldirectord for $CONFIG is running with pid: $oldpid\n"; ld_cmd_children("status", %LD_INSTANCE); ld_log("ldirectord for $CONFIG is running with pid: $oldpid"); ld_log("Exiting from ldirectord $CMD"); ld_exit(0, "Exiting from ldirectord $CMD"); } } else { if ($CMD eq "start" or $CMD eq "restart") { ; } elsif ($CMD eq "stop" or $CMD eq "try-restart") { ld_exit(0, "Exiting from ldirectord $CMD"); } elsif ($CMD eq "status") { my $status; if (defined $filepid) { print STDERR "ldirectord stale pid file " . "$RUNPID.$CFGNAME.pid for $CONFIG\n"; ld_log("ldirectord stale pid file " . "$RUNPID.$CFGNAME.pid for $CONFIG"); $status = 1; } else { $status = 3; } print "ldirectord is stopped for $CONFIG\n"; ld_exit($status, "Exiting from ldirectord $CMD"); } else { ld_log("ldirectord is stopped for $CONFIG"); ld_exit(1, "Exiting from ldirectord $CMD"); } } # Run as daemon if ($SUPERVISED eq "yes" || $opt_d) { &ld_log("Starting $VERSION_STR with pid: $$"); } else { &ld_log("Starting $VERSION_STR as daemon"); open(FILE, ">$RUNPID.$CFGNAME.pid") || init_error("Can not open $RUNPID.$CFGNAME.pid"); &ld_daemon(); print FILE "$$\n"; close(FILE); } } sub usage { pod2usage(-input => $LDIRECTORD, -exitval => -1); } sub init_error { my $msg = shift; chomp($msg); &ld_log("$msg"); unless ($opt_d) { print STDERR "$msg\n"; } ld_exit(1, "Initialisation Error"); } # ld_handler_term # If we get a signal then log it and quit sub ld_handler_term { my ($signal) = (@_); if (defined $DAEMON_TERM) { $SIG{'__DIE__'} = "IGNORE"; $SIG{"$signal"} = "IGNORE"; die("Exit Handler Repeatedly Called\n"); } $DAEMON_TERM = $signal; $DAEMON_STATUS = $DAEMON_STATUS_STOPPING; } sub ld_process_term { $DAEMON_STATUS = $DAEMON_STATUS_STOPPING; ld_cmd_children("stop", %LD_INSTANCE); ld_stop(); &ld_log("Linux Director Daemon terminated on signal: $DAEMON_TERM"); &ld_rm_file("$RUNPID.$CFGNAME.pid"); &ld_exit(0, "Linux Director Daemon terminated on signal: $DAEMON_TERM"); } sub ld_handler_hup { $DAEMON_HUP=1; } sub ld_process_hup { &ld_log("Reloading Linux Director Daemon config on signal"); $DAEMON_HUP = undef; &reread_config(); } sub ld_handler_chld { $DAEMON_CHLD=1; # NOTE: calling waitpid here would mess up $? } sub ld_process_chld { my $i = 0; undef $DAEMON_CHLD; while (waitpid(-1, WNOHANG) > 0) { print "child: $i\n"; $i++; } } sub check_signal { if (defined $DAEMON_TERM) { ld_process_term(); } if (defined $DAEMON_HUP) { ld_process_hup(); } if (defined $DAEMON_CHLD) { ld_process_chld(); } } sub reread_config { @OLDVIRTUAL = @VIRTUAL; @VIRTUAL = (); my %OLD_INSTANCE = %LD_INSTANCE; my %RELOAD; my %STOP; my %START; my $child; $DAEMON_STATUS = $DAEMON_STATUS_RELOADING; eval { &read_config(); foreach $child (keys %LD_INSTANCE) { if (defined $OLD_INSTANCE{$child}) { $RELOAD{$child} = 1; } else { $START{$child} = 1; } } foreach $child (keys %OLD_INSTANCE) { if (not defined $LD_INSTANCE{$child}) { $STOP{$child} = 1; } } &ld_cmd_children("stop", %STOP); &ld_cmd_children("reload_or_start", %RELOAD); &ld_cmd_children("start", %START); foreach my $vid (keys %FORK_CHILDREN) { &ld_log("Killing child $vid (PID=$FORK_CHILDREN{$vid})"); kill 15, $FORK_CHILDREN{$vid}; } &ld_setup(); &ld_start(); }; if ($@) { @VIRTUAL = @OLDVIRTUAL; %LD_INSTANCE = %OLD_INSTANCE; } $DAEMON_STATUS = $DAEMON_STATUS_RUNNING; undef @OLDVIRTUAL; } sub parse_emailalertstatus { my ($line, $arg) = (@_); my @s = split/\s*,\s*/, $arg; my $none = 0; my $status = 0; for my $i (@s) { if ($i eq "none") { $none++; } } for my $i (@s) { if ($i eq "none") { next; } elsif ($i eq "all") { $status = $DAEMON_STATUS_ALL; } elsif ($i eq "starting") { $status |= $DAEMON_STATUS_STARTING; } elsif ($i eq "stopping") { $status |= $DAEMON_STATUS_STOPPING; } elsif ($i eq "running") { $status |= $DAEMON_STATUS_RUNNING; } elsif ($i eq "reloading") { $status |= $DAEMON_STATUS_RELOADING; } else { &config_error($line, "invalid email alert status at: \"$i\"") } if ($none > 0) { &config_error($line, "invalid email alert status: " . "\"$i\" specified with \"none\""); } } return $status; } sub set_defaults { $AUTOCHECK = "no"; $CALLBACK = undef; $CHECKCOUNT = 1; $CHECKINTERVAL = 10; $CHECKTIMEOUT = -1; $CLEANSTOP = "yes"; $DEFAULT_CHECKTIMEOUT = 5; $DEFAULT_NEGOTIATETIMEOUT = 30; $EMAILALERT = ""; $EMAILALERTFREQ = 0; $EMAILALERTFROM = undef; $EMAILALERTSTATUS = $DAEMON_STATUS_ALL; $FAILURECOUNT = 1; $FALLBACK = undef; $FALLBACK6 = undef; $FALLBACKCOMMAND = undef; $FORKING = "no"; $LDIRLOG = "/var/log/ldirectord.log"; $MAINTDIR = undef; $NEGOTIATETIMEOUT = -1; $QUIESCENT = "no"; $SUPERVISED = "no"; $SMTP = undef; } sub read_emailalert { my ($line, $addr) = (@_); # Strip of enclosing quotes $addr =~ s/^\"([^"]*)\"$/$1/; $addr =~ /(.+)/ or &config_error($line, "no email address specified"); return $addr; } sub read_config { undef @VIRTUAL; undef @REAL; undef $CALLBACK; undef %LD_INSTANCE; undef $checksum; # Reset/set global config variables to defaults before parsing the config file. set_defaults(); $stattime = 0; my %virtual_seen; open(CFGFILE, "<$CONFIG") or &config_error(0, "can not open file $CONFIG"); my $line = 0; my $linedata; while() { $line++; $linedata = $_; outer_loop: if ($linedata =~ /^virtual(6)?\s*=\s*(.*)/) { my $af = defined($1) ? AF_INET6 : AF_INET; my $vattr = $2; my $ip_port = undef; my $fwm = undef; my $virtual_id; my $virtual_line = $line; my $virtual_port; my $fallback_line; my @rsrv_todo; if ($vattr =~ /^(\d+\.\d+\.\d+\.\d+):([0-9A-Za-z-_]+)/ && $af == AF_INET) { $ip_port = "$1:$2"; $virtual_port = $2; } elsif ($vattr =~ /^([0-9A-Za-z._+-]+):([0-9A-Za-z-_]+)/) { $ip_port = "$1:$2"; $virtual_port = $2; } elsif ($vattr =~ /^(\d+)/){ $fwm = $1; } elsif ($vattr =~ /^\[([0-9A-Fa-f:]+)\]:([0-9A-Za-z-_]+)/ && $af == AF_INET) { &config_error($line, "cannot specify an IPv6 address here. please use \"virtual6\" instead."); } elsif ($vattr =~ /^\[([0-9A-Fa-f:]+)\]:([0-9A-Za-z-_]+)/ && $af == AF_INET6) { my $v6addr = $1; my $v6port = $2; if (!inet_pton(AF_INET6,$v6addr)) { &config_error($line,"invalid ipv6 address for virtual server"); } $ip_port = "[$v6addr]:$v6port"; $virtual_port = $v6port; } else { &config_error($line, "invalid address for virtual server"); } my (%vsrv, @rsrv); if ($ip_port) { $vsrv{checktype} = "negotiate"; $vsrv{protocol} = "tcp"; if ($ip_port =~ /:(53|domain)$/) { $vsrv{protocol} = "udp"; } $vsrv{port} = $virtual_port; } else { $vsrv{fwm} = $fwm; $vsrv{checktype} = "negotiate"; $vsrv{protocol} = "fwm"; $vsrv{service} = "none"; $vsrv{port} = "0"; } $vsrv{addressfamily} = $af; $vsrv{real} = \@rsrv; $vsrv{scheduler} = "wrr"; $vsrv{checkcommand} = "/bin/true"; $vsrv{request} = "/"; $vsrv{receive} = ""; $vsrv{login} = ""; $vsrv{passwd} = ""; $vsrv{database} = ""; $vsrv{checktimeout} = -1; $vsrv{checkcount} = -1; $vsrv{negotiatetimeout} = -1; $vsrv{failurecount} = -1; $vsrv{num_connects} = 0; $vsrv{httpmethod} = "GET"; $vsrv{secret} = ""; push(@VIRTUAL, \%vsrv); while() { $line++; $linedata=$_; if(m/^\s*#/) { next; } s/#.*//; s/\t/ /g; unless (/^ {4,}(.+)/) { last; } my $rcmd = $1; if ($rcmd =~ /^(real(6)?)\s*=\s*(.*)/) { if ($af == AF_INET && defined($2) || $af == AF_INET6 && ! defined($2)) { &config_error($line, join("", ("cannot specify \"$1\" here. please use \"real", ($af == AF_INET) ? "" : "6", "\" instead"))); } push @rsrv_todo, [$3, $line]; } elsif ($rcmd =~ /^request\s*=\s*\"(.*)\"/) { $1 =~ /(.+)/ or &config_error($line, "no request string specified"); $vsrv{request} = $1; unless($vsrv{request}=~/^\//){ $vsrv{request} = "/" . $vsrv{request}; } } elsif ($rcmd =~ /^receive\s*=\s*\"(.*)\"/) { $1 =~ /(.+)/ or &config_error($line, "invalid receive string"); $vsrv{receive} = $1; } elsif ($rcmd =~ /^checktype\s*=\s*(.*)/){ if ($1 =~ /(\d+)/ && $1>=0) { $vsrv{num_connects} = $1; $vsrv{checktype} = "combined"; } elsif ( $1 =~ /([\w-]+)/ && ($1 eq "connect" || $1 eq "negotiate" || $1 eq "ping" || $1 eq "off" || $1 eq "on" || $1 eq "external" || $1 eq "external-perl") ) { $vsrv{checktype} = $1; } else { &config_error($line, "checktype must be \"connect\", \"negotiate\", \"on\", \"off\", \"ping\", \"external\", \"external-perl\" or a positive number"); } } elsif ($rcmd =~ /^checkcommand\s*=\s*\"(.*)\"/ or $rcmd =~ /^checkcommand\s*=\s*(.*)/){ $1 =~ /(.+)/ or &config_error($line, "invalid check command"); $vsrv{checkcommand} = $1; } elsif ($rcmd =~ /^checktimeout\s*=\s*(.*)/){ $1 =~ /(\d+)/ && $1 or &config_error($line, "invalid check timeout"); $vsrv{checktimeout} = $1; } elsif ($rcmd =~ /^connecttimeout\s*=\s*(.*)/){ &config_error($line, "connecttimeout directive " . "deprecated in favour of " . "negotiatetimeout"); } elsif ($rcmd =~ /^negotiatetimeout\s*=\s*(.*)/){ $1 =~ /(\d+)/ && $1 or &config_error($line, "invalid negotiate timeout"); $vsrv{negotiatetimeout} = $1; } elsif ($rcmd =~ /^checkcount\s*=\s*(.*)/){ $1 =~ /(\d+)/ && $1 or &config_error($line, "invalid check count"); $vsrv{checkcount} = $1; &config_warn($line, "checkcount option is deprecated and slated for removal. please see 'failurecount'"); } elsif ($rcmd =~ /^failurecount\s*=\s*(.*)/){ $1 =~ /(\d+)/ && $1 or &config_error($line, "invalid failure count"); $vsrv{failurecount} = $1; } elsif ($rcmd =~ /^checkinterval\s*=\s*(.*)/){ $1 =~ /(\d+)/ && $1 or &config_error($line, "invalid checkinterval"); $vsrv{checkinterval} = $1 } elsif ($rcmd =~ /^checkport\s*=\s*(.*)/){ $1 =~ /(\d+)/ or &config_error($line, "invalid port"); ( $1 > 0 && $1 < 65536 ) or &config_error($line, "checkport must be in range 1..65536"); $vsrv{checkport} = $1; } elsif ($rcmd =~ /^login\s*=\s*\"(.*)\"/) { $1 =~ /(.+)/ or &config_error($line, "invalid login string"); $vsrv{login} = $1; } elsif ($rcmd =~ /^passwd\s*=\s*\"(.*)\"/) { $1 =~ /(.+)/ or &config_error($line, "invalid password"); $vsrv{passwd} = $1; } elsif ($rcmd =~ /^database\s*=\s*\"(.*)\"/) { $1 =~ /(.+)/ or &config_error($line, "invalid database"); $vsrv{database} = $1; } elsif ($rcmd =~ /^secret\s*=\s*\"(.*)\"/) { $1 =~ /(.+)/ or &config_error($line, "invalid secret"); $vsrv{secret} = $1; } elsif ($rcmd =~ /^load\s*=\s*\"(.*)\"/) { $1 =~ /(\w+)/ or &config_error($line, "invalid string for load testing"); $vsrv{load} = $1; } elsif ($rcmd =~ /^scheduler\s*=\s*(.*)/) { # Intentionally ommit checking the # scheduler against a list of know # schedulers. This is because from # time to time new schedulers are # added. But ldirectord is # maintained distributed # independently of this. Thus # ldirectord needs to be manually # updated/upgraded. So just accept # any scheduler that matches # [a-z]+. I.e. is syntactically # correct (all schedulers so far # match that pattern). Ipvsadm will # report an error is a scheduler # isn't available / doesn't exist. $1 =~ /([a-z]+)/ or &config_error($line, "invalid scheduler, should be only lowercase letters (a-z)"); $vsrv{scheduler} = $1; } elsif ($rcmd =~ /^persistent\s*=\s*(.*)/) { $1 =~ /(\d+)/ or &config_error($line, "invalid persistent timeout"); $vsrv{persistent} = $1; } elsif ($rcmd =~ /^netmask\s*=\s*(.*)/) { my $val = $1; if ($vsrv{addressfamily} == AF_INET6) { if ($val !~ /^\d+$/ or ($val < 1 || $val > 128)) { &config_error($line, "invalid netmask: a prefix length between 1 and 128 is required"); } } else { if ($val !~ /^\d+\.\d+\.\d+\.\d+$/) { &config_error($line, "invalid netmask: dotted quad notation is required"); } } $vsrv{netmask} = $val; } elsif ($rcmd =~ /^protocol\s*=\s*(.*)/) { if ( $1 =~ /(\w+)/ ) { if ( $vsrv{protocol} eq "fwm" ) { if ($1 eq "fwm" ) { ; #Do nothing, it is already set } else { &config_error($line, "protocol must be fwm if the virtual service is a fwmark (a number)"); } } else { # tcp or udp if ($1 eq "tcp" || $1 eq "udp") { $vsrv{protocol} = $1; } else { &config_error($line, "protocol must be tcp or udp if the virtual service is an address and port"); } } } else { &config_error($line, "invalid protocol"); } } elsif ($rcmd =~ /^service\s*=\s*(.*)/) { $1 =~ /(\w+)/ && ($1 eq "dns" || $1 eq "ftp" || $1 eq "http" || $1 eq "https" || $1 eq "http_proxy" || $1 eq "imap" || $1 eq "imaps" || $1 eq "ldap" || $1 eq "nntp" || $1 eq "mysql" || $1 eq "none" || $1 eq "oracle"|| $1 eq "pop" || $1 eq "pops" || $1 eq "radius"|| $1 eq "pgsql" || $1 eq "sip" || $1 eq "smtp" || $1 eq "submission" || $1 eq "simpletcp") or &config_error($line, "service must " . "be dns, ftp, " . "http, https, " . "http_proxy, " . "imap, imaps, " . "ldap, nntp, " . "mysql, none, " . "oracle, " . "pop, pops, " . "radius, " . "pgsql, " . "simpletcp, " . "sip, smtp " . "or submission"); $vsrv{service} = $1; if($vsrv{service} eq "ftp" and $vsrv{login} eq "") { $vsrv{login} = "anonymous"; } elsif($vsrv{service} eq "sip" and $vsrv{login} eq "") { $vsrv{login} = "ldirectord\@$HOSTNAME"; } if($vsrv{service} eq "ftp" and $vsrv{passwd} eq "") { $vsrv{passwd} = "ldirectord\@$HOSTNAME"; } } elsif ($rcmd =~ /^httpmethod\s*=\s*(.*)/) { $1 =~ /(\w+)/ && (uc($1) eq "GET" || uc($1) eq "HEAD") or &config_error($line, "httpmethod must be GET or HEAD"); $vsrv{httpmethod} = uc($1); } elsif ($rcmd =~ /^virtualhost\s*=\s*(.*)/) { $1 =~ /\"?([^\"]*)\"?/ or &config_error($line, "invalid virtualhost"); $vsrv{virtualhost} = $1; } elsif ($rcmd =~ /^(fallback(6)?)\s*=\s*(.*)/) { # Allow specification of a virtual-specific fallback host if ($af == AF_INET && defined($2) || $af == AF_INET6 && ! defined($2)) { &config_error($line, join("", ("cannot specify \"$1\" here. please use \"fallback", ($af == AF_INET) ? "" : "6", "\" instead"))); } $fallback_line=$line; $vsrv{fallback} = parse_fallback($line, $3, \%vsrv); } elsif ($rcmd =~ /^fallbackcommand\s*=\s*\"(.*)\"/ or $rcmd =~ /^fallbackcommand\s*=\s*(.*)/) { $1 =~ /(.+)/ or &config_error($line, "invalid fallback command"); $vsrv{fallbackcommand} = $1; } elsif ($rcmd =~ /^quiescent\s*=\s*(.*)/) { ($1 eq "yes" || $1 eq "no") or &config_error($line, "quiescent must be 'yes' or 'no'"); $vsrv{quiescent} = $1; } elsif ($rcmd =~ /^emailalert\s*=\s*(.*)/) { $vsrv{emailalert} = read_emailalert($line, $1); } elsif ($rcmd =~ /^emailalertfreq\s*=\s*(\d*)/) { $1 =~ /(\d+)/ or &config_error($line, "invalid email alert frequency"); $vsrv{emailalertfreq} = $1; } elsif ($rcmd =~ /^emailalertstatus\s*=\s*(.*)/) { $vsrv{emailalertstatus} = &parse_emailalertstatus($line, $1); } elsif ($rcmd =~ /^monitorfile\s*=\s*\"(.*)\"/ or $rcmd =~ /^monitorfile\s*=\s*(.*)/) { my $monitorfile = $1; unless (open(MONITORFILE, ">>$monitorfile") and close(MONITORFILE)) { &config_error($line, "unable to open monitorfile $monitorfile: $!"); } $vsrv{monitorfile} = $monitorfile; } elsif ($rcmd =~ /^cleanstop\s*=\s*(.*)/) { ($1 eq "yes" || $1 eq "no") or &config_error($line, "cleanstop must be 'yes' or 'no'"); $vsrv{cleanstop} = $1; } elsif ($rcmd =~ /^smtp\s*=\s*(.*)/) { $1 =~ /(^([0-9A-Za-z._+-]+))/ or &config_error($line, "invalid SMTP server address"); $vsrv{smtp} = $1; } else { &config_error($line, "Unknown command \"$linedata\""); } undef $linedata; } # As the protocol needs to be known to call # getservbyname() all resolution must be # delayed until the protocol is finalised. # That is after the entire configuration # for a virtual service has been parsed. &_ld_read_config_fallback_resolve($fallback_line, $vsrv{protocol}, $vsrv{fallback}, $af); &_ld_read_config_virtual_resolve($virtual_line, \%vsrv, $ip_port, $af); &_ld_read_config_real_resolve(\%vsrv, \@rsrv_todo, $af); # Check for duplicate now we have all the # information to generate the id $virtual_id = get_virtual_id_str(\%vsrv); if (defined $virtual_seen{$virtual_id}) { &config_error($line, "duplicate virtual server"); } $virtual_seen{$virtual_id} = 1; unless(defined($linedata)) { last; } #Arggh a goto :( goto outer_loop; } next if ($linedata =~ /^\s*$/ || $linedata =~ /^\s*#/); if ($linedata =~ /^checktimeout\s*=\s*(.*)/) { ($1 =~ /(\d+)/ && $1 && $1>0) or &config_error($line, "invalid check timeout value"); $CHECKTIMEOUT = $1; } elsif ($linedata =~ /^connecttimeout\s*=\s*(.*)/) { &config_error($line, "connecttimeout directive " . "deprecated in favour of " . "negotiatetimeout"); } elsif ($linedata =~ /^negotiatetimeout\s*=\s*(.*)/) { ($1 =~ /(\d+)/ && $1 && $1>0) or &config_error($line, "invalid negotiate timeout value"); $NEGOTIATETIMEOUT = $1; } elsif ($linedata =~ /^checkinterval\s*=\s*(.*)/) { $1 =~ /(\d+)/ && $1 or &config_error($line, "invalid check interval value"); $CHECKINTERVAL = $1; } elsif ($linedata =~ /^checkcount\s*=\s*(.*)/) { $1 =~ /(\d+)/ && $1 or &config_error($line, "invalid check count value"); $CHECKCOUNT = $1; &config_warn($line, "checkcount option is deprecated and slated for removal. please see 'failurecount'"); } elsif ($linedata =~ /^failurecount\s*=\s*(.*)/) { $1 =~ /(\d+)/ && $1 or &config_error($line, "invalid failure count value"); $FAILURECOUNT = $1; } elsif ($linedata =~ /^fallback(6)?\s*=\s*(.*)/) { my $af = defined($1) ? AF_INET6 : AF_INET; my $tcp = parse_fallback($line, $2, undef); my $udp = parse_fallback($line, $2, undef); &_ld_read_config_fallback_resolve($line, "tcp", $tcp, $af); &_ld_read_config_fallback_resolve($line, "udp", $udp, $af); if ($af == AF_INET) { $FALLBACK = { "tcp" => $tcp, "udp" => $udp }; } else { $FALLBACK6 = { "tcp" => $tcp, "udp" => $udp }; } } elsif ($linedata =~ /^fallbackcommand\s*=\s*(.*)/) { $1 =~ /(.+)/ or &config_error($line, "invalid fallback command"); $FALLBACKCOMMAND = $1; } elsif ($linedata =~ /^autoreload\s*=\s*(.*)/) { ($1 eq "yes" || $1 eq "no") or &config_error($line, "autoreload must be 'yes' or 'no'"); $AUTOCHECK = $1; } elsif ($linedata =~ /^callback\s*=\s*\"(.*)\"/) { $CALLBACK = $1; } elsif ($linedata =~ /^logfile\s*=\s*\"(.*)\"/) { my $tmpLDIRLOG = $LDIRLOG; $LDIRLOG = $1; if (&ld_openlog()) { $LDIRLOG = $tmpLDIRLOG; &config_error($line, "unable to open logfile: $1"); } } elsif ($linedata =~ /^execute\s*=\s*(.*)/) { $LD_INSTANCE{$1} = 1; } elsif ($linedata =~ /^fork\s*=\s*(.*)/) { ($1 eq "yes" || $1 eq "no") or &config_error($line, "fork must be 'yes' or 'no'"); $FORKING = $1; } elsif ($linedata =~ /^supervised/) { if (($linedata =~ /^supervised\s*=\s*(.*)/) and ($1 eq "yes" || $1 eq "no")) { $SUPERVISED = $1; } elsif ($linedata =~ /^supervised\s*$/) { $SUPERVISED = "yes"; &config_warn($line, "please update your config not to " . "use a bare supervised directive"); } else { &config_error($line, "supervised must be 'yes' or 'no'"); } } elsif ($linedata =~ /^quiescent\s*=\s*(.*)/) { ($1 eq "yes" || $1 eq "no") or &config_error($line, "quiescent must be 'yes' or 'no'"); $QUIESCENT = $1; } elsif ($linedata =~ /^emailalert\s*=\s*(.*)/) { $EMAILALERT = read_emailalert($line, $1); } elsif ($linedata =~ /^emailalertfreq\s*=\s*(\d*)/) { $1 =~ /(\d+)/ or &config_error($line, "invalid email alert frequency"); $EMAILALERTFREQ = $1; } elsif ($linedata =~ /^emailalertstatus\s*=\s*(.*)/) { $EMAILALERTSTATUS = &parse_emailalertstatus($line, $1); } elsif ($linedata =~ /^emailalertfrom\s*=\s*(.*)/) { $1 =~ /(.+)/ or &config_error($line, "no email from address specified"); $EMAILALERTFROM = $1; } elsif ($linedata =~ /^cleanstop\s*=\s*(.*)/) { ($1 eq "yes" || $1 eq "no") or &config_error($line, "cleanstop must be 'yes' or 'no'"); $CLEANSTOP = $1; } elsif ($linedata =~ /^smtp\s*=\s*(.*)/) { $1 =~ /(^([0-9A-Za-z._+-]+))/ or &config_error($line, "invalid SMTP server address"); $SMTP = $1; } elsif ($linedata =~ /^maintenancedir\s*=\s*(.*)/) { $1 =~ /(.+)/ or &config_error($line, "maintenance directory not specified"); $MAINTDIR = $1; -d $MAINTDIR or &config_warn($line, "maintenance directory does not exist"); } else { if ($linedata =~ /^timeout\s*=\s*(.*)/) { &config_error($line, "timeout directive " . "deprecated in favour of " . "checktimeout and " . "negotiatetimeout"); } &config_error($line, "Unknown command $linedata "); } } close(CFGFILE); # Check for sensible use of checkinterval, warn if it is used in a virtual # service when fork=no if ($FORKING eq 'no') { foreach my $v (@VIRTUAL) { if (defined($$v{checkinterval})) { config_warn(-1, "checkinterval in virtual service ". get_virtual_id_str($v)." ignored when fork=no"); } } } return(0); } # _ld_read_config_virtual_resolve # Note: Should not need to be called directly, but won't do any damage if # you do. # Resolve the server (ip address) and port for a virtual service # pre: line: Line of configuration file fallback server was read from # Used for debugging messages # vsrv: Virtual Service to resolve server and port of # ip_port: server and port in the form # ip_address|hostname:port|service # af: Address family: AF_INET or AF_INET6 # post: Take ip_port, resolve it as per ld_gethostservbyname # and set $vsrv->{server} and $vsrv->{port} accordingly. # If $vsrv->{service} is not set, then set according to the value of # $vsrv->{port} # return: none # Debugging message will be reported and programme will exit # on error. sub _ld_read_config_virtual_resolve { my($line, $vsrv, $ip_port, $af)=(@_); if($ip_port){ $ip_port=&ld_gethostservbyname($ip_port, $vsrv->{protocol}, $af); if ($ip_port =~ /(\[[0-9A-Fa-f:]+\]):(\d+)/) { $vsrv->{server} = $1; $vsrv->{port} = $2; } elsif($ip_port){ ($vsrv->{server}, $vsrv->{port}) = split /:/, $ip_port; } else { &config_error($line, "invalid address for virtual service"); } if(!defined($vsrv->{service})){ $vsrv->{service} = ld_port_to_service($vsrv->{port}); } } } # ld_service_to_port # Resolve an ldirectord service name from its port number # pre: port: port number of the service # return: port name # "none" if the service is unknown sub ld_port_to_service { my ($port) = (@_); if ($port eq 21) { return "ftp"; } if ($port eq 25) { return "smtp"; } if ($port eq 53) { return "dns"; } if ($port eq 80) { return "http"; } if ($port eq 110) { return "pop"; } if ($port eq 119) { return "nntp"; } if ($port eq 143) { return "imap"; } if ($port eq 389) { return "ldap"; } if ($port eq 443) { return "https"; } if ($port eq 587) { return "submission"; } if ($port eq 995) { return "pops"; } if ($port eq 993) { return "imaps"; } if ($port eq 1521) { return "oracle"; } if ($port eq 1812) { return "radius"; } if ($port eq 3128) { return "http_proxy"; } if ($port eq 3306) { return "mysql"; } if ($port eq 5060) { return "sip"; } if ($port eq 5432) { return "pgsql"; } return "none"; } # ld_service_to_port # Resolve the port number from an ldirectord service name # pre: service: name of the service # return: port number # undef if the service is unknown sub ld_service_to_port { my ($service) = (@_); if ($service eq "ftp") { return 21; } if ($service eq "smtp") { return 25; } if ($service eq "dns") { return 53; } if ($service eq "http") { return 80; } if ($service eq "pop") { return 110; } if ($service eq "nntp") { return 119; } if ($service eq "imap") { return 143; } if ($service eq "ldap") { return 389; } if ($service eq "https") { return 443; } if ($service eq "submission") { return 587; } if ($service eq "imaps") { return 993; } if ($service eq "pops") { return 995; } if ($service eq "oracle") { return 1521; } if ($service eq "radius") { return 1812; } if ($service eq "http_proxy") { return 3128; } if ($service eq "mysql") { return 3306; } if ($service eq "sip") { return 5060; } if ($service eq "pgsql") { return 5432; } return undef; } # ld_checkport # Resolve the port to connect to for service checks # Note: Should only be used inside service checks, # as its not the same as the port of the real server # pre: v: virtual service # r: real server # return: port number # undef if the service is unknown sub ld_checkport { my ($v, $r) = (@_); if (defined $v->{checkport}) { return $v->{checkport}; } if ($r->{port} > 0) { return $r->{port}; } return ld_service_to_port($v->{service}); } # _ld_read_config_fallback_resolve # Note: Should not need to be called directly, but won't do any damage if # you do. # Resolve the fallback server for a virtual service # pre: line: Line of configuration file fallback server was read from # Used for debugging messages # vsrv: Virtual Service to resolve fallback server of # af: Address family: AF_INET or AF_INET6 # post: Take $vsrv->{fallback}, resolve it as per ld_gethostservbyname # and set $vsrv->{fallback} to the result # return: none # Debugging message will be reported and programme will exit # on error. sub _ld_read_config_fallback_resolve { my($line, $protocol, $fallback, $af)=(@_); my ($ipversion, $ipaddress); unless($fallback) { return; } if ($af == AF_INET) { $ipversion = "IPv4"; } elsif ($af == AF_INET6) { $ipversion = "IPv6"; } else { $ipversion = "IP??($af)"; } unless ($ipaddress = &ld_gethostbyname($fallback->{server}, $af)) { &config_error($line, "invalid $ipversion address or could not resolve for fallback server: " . $fallback->{server}); } $fallback->{server} = $ipaddress; unless($fallback->{"port"}) { return; } $fallback->{port} = &ld_getservbyname($fallback->{port}, $protocol) or &config_error($line, "invalid port for fallback server"); } # _ld_read_config_real_resolve # Note: Should not need to be called directly, but won't do any damage if # you do. # Run through the list of real servers read in the configuration file for a # virtual server and parse these entries # pre: vsrv: Virtual Service to parse real servers for # rsrv_todo: List of real servers read from config but not parsed. # List is a list of list reference. The first element in # each list reference is the line read from the # configuration after "real=". The second element is the # line number, used for error reporting # af: Address family: AF_INET or AF_INET6 # post: Run through rsrv_todo and parse real servers # return: none # Debugging message will be reported and programme will exit # on error. sub _ld_read_config_real_resolve { my ($vsrv, $rsrv_todo, $af)=(@_); my $i; my $str; my $line; my $ip1; my $ip2; my $port; my $resolved_ip1; my $resolved_ip2; my $resolved_port; my $flags; for $i (@$rsrv_todo) { ($str, $line)=@$i; $str =~ /(\d+\.\d+\.\d+\.\d+|[A-Za-z0-9.-]+|\[[0-9A-fa-f:]+\])(->(\d+\.\d+\.\d+\.\d+|[A-Za-z0-9.-]+|\[[0-9A-fa-f:]+\]))?(:(\d+|[A-Za-z0-9-_]+))?\s+(.*)/ or &config_error($line, "invalid address for real server" . " (wrong format)"); $ip1=$1; $ip2=$3; if(defined($5)){ $port=$5; } else { $port="0"; } $flags=$6; $resolved_ip1=&ld_gethostbyname($ip1, $af); unless( defined($resolved_ip1) ) { &config_error($line, "invalid address ($ip1) for real server" . " (could not resolve host)"); } if( defined($port) ){ $resolved_port=&ld_getservbyname($port); unless( defined($resolved_port) ){ &config_error($line, "invalid port ($port) for real server" . " (could not resolve port)"); } } if ( defined ($ip2) ) { $resolved_ip2=&ld_gethostbyname($ip2, $af); unless( defined ($resolved_ip2) ) { &config_error($line, "invalid address ($ip2) for " . "real server" . " (could not resolve end host)"); } &add_real_server_range($line, $vsrv, $resolved_ip1, $resolved_ip2, $resolved_port, $flags, $af); } else { &add_real_server($line, $vsrv, $resolved_ip1, $resolved_port, $flags); } } } # add_real_server_range # Add a real server for each IP address in a range # pre: line: line number real server was read from # Used for debugging information # vsrv: virtual server to add real server to # first: First IP address in range # last: First IP address in range # port: Port of real servers # flags: Flags for real servers. Should be of the form # gate|masq|ipip [] [">I", ""] # af: Address family: AF_INET or AF_INET6 # post: real servers are added to virtual server # return: none # Debugging message will be reported and programme will exit # on error. sub add_real_server_range { my ($line, $vsrv, $first, $last, $port, $flags, $af) = (@_); my (@tmp, $first_i, $last_i, $i, $rsrv); if ($af == AF_INET) { if ( ($first_i=&ip_to_int($first)) <0 ) { &config_error($line, "Invalid IP address: $first"); } if ( ($last_i=&ip_to_int($last)) <0 ) { &config_error($line, "Invalid IP address: $last"); } if ($first_i>$last_i) { &config_error($line, "Invalid Range: $first-$last: First value must be " . "greater than or equal to the second value"); } # A for loop didn't seem to want to work $i=$first_i; while ( $i le $last_i ) { &add_real_server($line, $vsrv, &int_to_ip($i), $port, $flags); $i++; } } elsif ($af == AF_INET6) { # not supported yet &config_error($line, "Address range for IPv6 is not supported yet"); } else { die "address family must be AF_INET or AF_INET6\n"; } } # add_real_server # Add a real server to a virtual # pre: line: line number real server was read from # Used for debugging information # vsrv: virtual server to add real server to # ip: IP address of real server # port: Port of real server # flags: Flags for real server. Should be of the form # gate|masq|ipip [] [">I", ""] # post: real server is added to virtual server # return: none # Debugging message will be reported and programme will exit # on error. sub add_real_server { my ($line, $vsrv, $ip, $port, $flags) = (@_); my $ref; my $realsrv=0; my $new_rsrv; my $rsrv; $new_rsrv = {"server"=>$ip, "port"=>$port}; $flags =~ /(\w+)(.*)/ && ($1 eq "gate" || $1 eq "masq" || $1 eq "ipip") or &config_error($line, "forward method must be gate, masq or ipip"); $new_rsrv->{"forward"} =$1; $flags = $2; $rsrv=$vsrv->{"real"}; if(defined($flags) and $flags =~ /\s+(\d+)(.*)/) { $new_rsrv->{"weight"} = $1; $flags = $2; } else { $new_rsrv->{"weight"} = 1; } if(defined($flags) and $flags =~ /\s+\"(.*)\"[, ]\s*\"(.*)\"(.*)/) { $new_rsrv->{"request"} = $1; unless ($new_rsrv->{request}=~/^\//) { $new_rsrv->{request} = "/" . $new_rsrv->{request}; } $new_rsrv->{"receive"} = $2; $flags = $3; } if (defined($flags) and $flags =~/\S/) { &config_error($line, "Invalid real server line, around " . "\"$flags\""); } push(@$rsrv, $new_rsrv); my $real = get_real_id_str($new_rsrv, $vsrv); my $virtual = get_virtual_id_str($vsrv); for my $r (@REAL){ if($r->{"real"} eq $real){ my $ref=$r->{"virtual"}; push(@$ref, $virtual); $realsrv=1; last; } } if($realsrv==0){ push(@REAL, { "real"=>$real, "virtual"=>[ $virtual ] }); } } # parse_fallback # Parse a fallback server # pre: line: line number real server was read from # fallback: line read from configuration file # Should be of the form # ip_address|hostname[:port|:service_name] [gate|masq|ipip] # post: fallback is parsed # return: Reference to hash of the form # { server => blah, forward => blah } # Debugging message will be reported and programme will exit # on error. sub parse_fallback { my ($line, $fallback, $vsrv) = (@_); my $parse_line; my $server; my $port; my $fwd; $parse_line = $fallback; if ($parse_line =~ /(\S+)(\s+(\S+))?\s*$/) { # get "ip:port" and a forwarding method $fwd = $3; $parse_line = $1; } if ($parse_line =~ /(:(\d+|[A-Za-z0-9-_]+))?$/) { # get host and port $port=$2; $parse_line =~ s/(:(\d+|[A-Za-z0-9-_]+))?$//; $server = $parse_line; } unless(defined($server)) { &config_error($line, "invalid fallback server: $fallback"); } if (not defined($port) and defined($vsrv)) { $port = $vsrv->{"port"}; } if($fwd) { ($fwd eq "gate" || $fwd eq "masq" || $fwd eq "ipip") or &config_error($line, "forward method must be gate, masq or ipip"); } else { $fwd="gate" } return({"server"=>$server, "port"=>$port, "forward"=>$fwd, "weight"=>1}); } sub __config_log { my ($line, $prefix, $msg) = @_; chomp($msg); $msg .= "\n"; my $msg_prefix = "$prefix [$$]"; if ($line > 0) { $msg_prefix .= " reading file $CONFIG at line $line"; } $msg = "$msg_prefix: $msg"; if ($opt_d or $DAEMON_STATUS == $DAEMON_STATUS_STARTING) { print STDERR $msg; } else { &ld_log("$msg"); } } sub config_warn { my ($line, $msg) = @_; __config_log($line, "Warning", $msg); } sub config_error { my ($line, $msg) = @_; __config_log($line, "Error", $msg); if ($DAEMON_STATUS == $DAEMON_STATUS_STARTING) { &ld_rm_file("$RUNPID.$CFGNAME.pid"); &ld_exit(2, "config_error: Configuration Error"); } else { die; } } sub ld_setup { for my $v (@VIRTUAL) { if ($$v{protocol} eq "tcp") { $$v{proto} = "-t"; } elsif ($$v{protocol} eq "udp") { $$v{proto} = "-u"; } elsif ($$v{protocol} eq "fwm") { $$v{proto} = "-f"; } $$v{flags} = "$$v{proto} " . &get_virtual_option($v) . " "; $$v{flags} .= "-s $$v{scheduler} " if defined ($$v{scheduler}); if (defined $$v{persistent}) { $$v{flags} .= "-p $$v{persistent} "; $$v{flags} .= "-M $$v{netmask} " if defined ($$v{netmask}); } my $real = $$v{real}; for my $r (@$real) { $$r{forw} = get_forward_flag($$r{forward}); my $port=ld_checkport($v, $r); my $schema = $$v{service}; if ($$v{service} eq 'http_proxy') { $schema = 'http'; } if (defined $$r{request} && defined $$r{receive}) { my $uri = $$r{request}; $uri =~ s/^\///g; if ($$r{request} =~ /$schema:\/\//) { $$r{url} = "$uri"; } else { $$r{url} = "$schema:\/\/$$r{server}:$port\/$uri"; } } else { my $uri = $$v{request}; $uri =~ s/^\///g; if ($$v{service} eq 'http_proxy') { $$r{url} = "$uri"; } else { $$r{url} = "$schema:\/\/$$r{server}:$port\/$uri"; } $$r{request} = $$v{request} unless defined $$r{request}; $$r{receive} = $$v{receive}; } if ($$v{checktype} eq "combined") { $$r{num_connects} = 999999; } else { $$r{num_connects} = -1; } } # checktimeout and negotiate timeout are # mutual defaults for each other, so calculate # checktimeout in a temporary variable so as not # to affect the calculation of negotiatetimeout. my $checktimeout = $$v{checktimeout}; if ($checktimeout < 0) { $checktimeout = $$v{negotiatetimeout}; } if ($checktimeout < 0) { $checktimeout = $CHECKTIMEOUT; } if ($checktimeout < 0) { $checktimeout = $NEGOTIATETIMEOUT; } if ($checktimeout < 0) { $checktimeout = $DEFAULT_CHECKTIMEOUT; } if ($$v{negotiatetimeout} < 0) { $$v{negotiatetimeout} = $$v{checktimeout}; } if ($$v{negotiatetimeout} < 0) { $$v{negotiatetimeout} = $NEGOTIATETIMEOUT; } if ($$v{negotiatetimeout} < 0) { $$v{negotiatetimeout} = $CHECKTIMEOUT; } if ($$v{negotiatetimeout} < 0) { $$v{negotiatetimeout} = $DEFAULT_NEGOTIATETIMEOUT; } $$v{checktimeout} = $checktimeout; if ($$v{checkcount} < 0) { $$v{checkcount} = $CHECKCOUNT; } if ($$v{failurecount} < 0) { $$v{failurecount} = $FAILURECOUNT; } } } # ld_read_ipvsadm # # Net::FTP seems to set the input record separator ($\) to null # putting IO into slurp (whole file at a time, rather than line at a time) # mode. Net::FTP does this using local $\, which should mean # that the change doesn' effect code here, but it does. It also # seems to be impossible to turn it off, by say setting $\ back to '\n' # Perhaps there is more to this than meets the eye. Perhaps it's a perl bug. # In any case, this should fix the problem. # # This should not affect pid or config file parsing as they are called # before Net::FTP and as this appears to be a bit of a work around, # I'd rather use it in as few places as possible # # Observed with perl v5.8.8 (Debian's perl 5.8.8-6) # -- Horms, 17th July 2005 sub ld_readline { my ($fd, $buf) = (@_); my $line; # Uncomment the following line to turn off this work around # return readline($fd); $line = shift @$buf; if (defined $line) { return $line . "\n"; } push @$buf, split /\n/, readline($fd); $line = shift @$buf; if (defined $line) { return $line . "\n"; } return undef; } # ld_read_ipvsadm # Parses the output of "ipvsadm -L -n" and puts into a structure of # the following from: # # { # (vip_address:vport|fwmark) protocol => { # "scheduler" => scheduler, # "persistent" => timeout, # May be omitted # "netmask" => netmask, # May be omitted # "real" => { # rip_address:rport => { # "forward" => forwarding_mechanism, # "weight" => weight # }, # ... # } # }, # ... # } # # where: # vip_address: IP address of virtual service # vport: Port of virtual service # fwmark: Firewall Mark of virtual service # scheduler: Scheduler for virtual service # timeout: Timeout for persistency. Omitted if service is not persistent. # nemask: Netmask for persistency. Omitted if service is not persistent. # # rip_address: IP address of real server # rport: Port of real server # forwarding_mechanism: Forwarding mechanism for real server. # One of: gate, ipip, masq. # weight: Weight of real server # # pre: none # post: ipvsadm -L -n is parsed # result: reference to sructure detailed above. sub ld_read_ipvsadm { my %oldsrv; my $real_service; my $fwd; my $buf = []; my $fh; my $line; # read status of current ipvsadm -L -n unless(open($fh, "$IPVSADM -L -n 2>&1|")){ &ld_exit(1, "Could not run $IPVSADM -L -n: $!"); } # Skip the first three lines $line = ld_readline($fh, $buf); $line = ld_readline($fh, $buf); $line = ld_readline($fh, $buf); while (1) { $line = ld_readline($fh, $buf); if (not defined $line) { last; } if ($line =~ /^(\w+)\s+(\d+\.\d+\.\d+\.\d+\:\d+|\[[0-9A-Fa-f:]+\]:\d+|\d+)( IPv6)?\s+(\w+)\s+persistent\s+(\d+)\s+mask\s+(.*)/) { $real_service = &gen_real_service_str($2, $1, $3); $oldsrv{"$real_service"} = {"real"=>{}, "scheduler"=>$4, "persistent"=>$5, "netmask"=>$6}; } elsif ($line =~ /^(\w+)\s+(\d+\.\d+\.\d+\.\d+\:\d+|\[[0-9A-Fa-f:]+\]:\d+|\d+)( IPv6)?\s+(\w+)\s+persistent\s+(\d+)/) { $real_service = &gen_real_service_str($2, $1, $3); $oldsrv{"$real_service"} = {"real"=>{}, "scheduler"=>$4, "persistent"=>$5}; } elsif ($line =~ /^(\w+)\s+(\d+\.\d+\.\d+\.\d+\:\d+|\[[0-9A-Fa-f:]+\]:\d+|\d+)( IPv6)?\s+(\w+)/) { $real_service = &gen_real_service_str($2, $1, $3); $oldsrv{"$real_service"} = {"real"=>{}, "scheduler"=>$4}; } elsif ($line =~ /^ ->\s+(\d+\.\d+\.\d+\.\d+\:\d+|\[[0-9A-Fa-f:]+\]:\d+)\s+(\w+)\s+(\d+)/) { if (not defined( $real_service)) { &ld_debug(2, "Real server read from ipvsadm " . "doesn't seem to be inside a " . "virtual service: \"$line\"\n"); next; } if ($2 eq "Route") { $fwd = "gate"; } elsif ($2 eq "Tunnel") { $fwd = "ipip"; } elsif ($2 eq "Masq") { $fwd = "masq"; } $oldsrv{"$real_service"}->{"real"}->{"$1"} = {"forward"=>$fwd, "weight"=>$3}; } else { &ld_debug(2, "Unknown line read from ipvsadm: " . "\"$line\"\n"); next; } } close($fh); return(\%oldsrv); } sub gen_real_service_str { my ($service_address, $protocol, $v6flag) = @_; return "$service_address ".lc($protocol).(defined($v6flag) ? "6" : ""); } sub get_real_service_str { my ($v) = (@_); if ($v->{"protocol"} eq "fwm") { - return &get_virtual($v) . " " . $v->{protocol} . ($v->{addressfamily} == AF_INET6 ? "6" : ""); + return &get_virtual($v) . " " . $v->{protocol} . (($v->{addressfamily} == AF_INET6) ? "6" : ""); } else { return &get_virtual($v) . " " . $v->{protocol}; } } sub ld_start { my $oldsrv; my $real_service; my $nv; my $nr; my $server_down = {}; # read status of current ipvsadm -L -n $oldsrv=&ld_read_ipvsadm(); # make sure virtual servers are up to date foreach $nv (@VIRTUAL) { my $real_service = &get_real_service_str($nv); if (exists($oldsrv->{"$real_service"})) { # service exists, modify it &system_wrapper("$IPVSADM -E $$nv{flags}"); &ld_log("Changed virtual server: " . &get_virtual($nv)); } else { # no such service, create a new one &system_wrapper("$IPVSADM -A $$nv{flags}"); &ld_log("Added virtual server: " . &get_virtual($nv)); } } # make sure real servers are up to date foreach $nv (@VIRTUAL) { my $nreal = $nv->{real}; my $ov = $oldsrv->{&get_real_service_str($nv)}; my $or = $ov->{real}; my $fallback = fallback_find($nv); if (defined($fallback)) { delete($or->{"$fallback->{server}:$fallback->{port}"}); } for $nr (@$nreal) { my $real_str = "$nr->{server}:$nr->{port}"; if (! defined($or->{$real_str}) or $or->{$real_str}->{weight} == 0) { $server_down->{$real_str} = [$nv, $nr]; #service_set($nv, $nr, "down", {force => 1}); } else { if (defined $server_down->{$real_str}) { delete($server_down->{$real_str}); } service_set($nv, $nr, "up", {force => 1}); } delete($or->{$real_str}); } # remove remaining entries for real servers for my $k (keys %$or) { purge_untracked_service($nv, $k, "start"); delete($$or{$k}); } delete($oldsrv->{&get_real_service_str($nv)}); &fallback_on($nv); } for my $k (keys (%$server_down)) { my $v = $server_down->{$k}; service_set(@$v[0], @$v[1], "down", {force => 1}); delete($server_down->{$k}); #sleep 5; } # remove remaining entries for virtual servers foreach $nv (@OLDVIRTUAL) { if (! defined($oldsrv->{&get_real_service_str($nv)})) { next; } purge_virtual($nv, "start"); } } sub ld_cmd_children { my ($cmd, %children) = (@_); # instantiate other ldirectord, if specified my $child; foreach $child (keys %children) { if ($cmd eq "reload_or_start") { if (&system_wrapper("$LDIRECTORD $child reload")) { &system_wrapper("$LDIRECTORD $child start"); } } else { &system_wrapper("$LDIRECTORD $child $cmd"); } } } sub ld_stop { # Kill children if ($FORKING eq 'yes') { foreach my $virtual_id (keys (%FORK_CHILDREN)) { my $pid = $FORK_CHILDREN{$virtual_id}; ld_log("Killing child $virtual_id PID=$pid"); kill 15, $pid; } } foreach my $v (@VIRTUAL) { next if ( (! defined($$v{cleanstop}) and $CLEANSTOP eq 'no') or (defined($$v{cleanstop}) and $$v{cleanstop} eq 'no') ); my $real = $$v{real}; foreach my $r (@$real) { if (defined $$r{virtual_status}) { purge_service($v, $r, "stop"); } } purge_virtual($v, "stop"); } } sub ld_main { # Main failover checking code while (1) { if ($FORKING eq 'yes') { foreach my $v (@VIRTUAL) { my $virtual_id = get_virtual_id_str($v); if (!exists($FORK_CHILDREN{$virtual_id})) { &ld_log("Child not running for $virtual_id, spawning"); my $pid = fork; if (!defined($pid)) { &ld_log("fork failed"); } elsif ($pid == 0) { run_child($v); } else { $FORK_CHILDREN{get_virtual_id_str($v)} = $pid; &ld_log("Spawned child $virtual_id PID=$pid"); } } elsif (waitpid($FORK_CHILDREN{get_virtual_id_str($v)}, WNOHANG)) { delete $FORK_CHILDREN{get_virtual_id_str($v)}; } } check_signal(); if (!check_cfgfile()) { sleep 1; } check_signal(); } else { my @real_checked; foreach my $v (@VIRTUAL) { my $real = $$v{real}; my $virtual_id = get_virtual_id_str($v); REAL: foreach my $r (@$real) { my $real_id = get_real_id_str($r, $v); check_signal(); foreach my $tmp_id (@real_checked) { if($real_id eq $tmp_id) { &ld_debug(3, "Already checked: real server=$real_id (virtual=$virtual_id)"); next REAL; } } _check_real($v, $r); push(@real_checked, $real_id); } } check_signal(); if (!check_cfgfile()) { sleep $CHECKINTERVAL; } check_signal(); ld_emailalert_resend(); check_signal(); } } } sub run_child { my $v = shift; # Just exit on signals $SIG{'INT'} = "DEFAULT"; $SIG{'QUIT'} = "DEFAULT"; $SIG{'ILL'} = "DEFAULT"; $SIG{'ABRT'} = "DEFAULT"; $SIG{'FPE'} = "DEFAULT"; $SIG{'SEGV'} = "DEFAULT"; $SIG{'TERM'} = "DEFAULT"; $SIG{'BUS'} = "DEFAULT"; $SIG{'SYS'} = "DEFAULT"; $SIG{'XCPU'} = "DEFAULT"; $SIG{'XFSZ'} = "DEFAULT"; $SIG{'IOT'} = "DEFAULT"; $SIG{'PIPE'} = "IGNORE"; $SIG{'HUP'} = sub { exit 1 }; my $real = $$v{real}; my $virtual_id = get_virtual_id_str($v); my $checkinterval = $$v{checkinterval} || $CHECKINTERVAL; $0 = "ldirectord $virtual_id"; while (1) { foreach my $r (@$real) { $0 = "ldirectord $virtual_id checking $$r{server}"; _check_real($v, $r); } $0 = "ldirectord $virtual_id"; sleep $checkinterval; ld_emailalert_resend(); } } sub _check_real { my $v = shift; my $r = shift; my $real_id = get_real_id_str($r, $v); my $virtual_id = get_virtual_id_str($v); if (_check_real_for_maintenance($r)) { service_set($v, $r, "down", {do_log => 1, force => 1}, "Server in maintenance"); return; } elsif ($$v{checktype} eq "negotiate" || $$r{num_connects}>=$$v{num_connects}) { &ld_debug(2, "Checking negotiate: real server=$real_id (virtual=$virtual_id)"); if (grep $$v{service} eq $_, ("http", "https", "http_proxy")) { $$r{num_connects} = 0 if (check_http($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "pop") { $$r{num_connects} = 0 if (check_pop($v, $r, 0) == $SERVICE_UP); } elsif ($$v{service} eq "pops") { $$r{num_connects} = 0 if (check_pop($v, $r, 1) == $SERVICE_UP); } elsif ($$v{service} eq "imap") { $$r{num_connects} = 0 if (check_imap($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "imaps") { $$r{num_connects} = 0 if (check_imaps($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "smtp" or $$v{service} eq "submission") { $$r{num_connects} = 0 if (check_smtp($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "ftp") { $$r{num_connects} = 0 if (check_ftp($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "ldap") { $$r{num_connects} = 0 if (check_ldap($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "nntp") { $$r{num_connects} = 0 if (check_nntp($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "dns") { $$r{num_connects} = 0 if (check_dns($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "sip") { $$r{num_connects} = 0 if (check_sip($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "radius") { $$r{num_connects} = 0 if (check_radius($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "mysql") { $$r{num_connects} = 0 if (check_mysql($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "pgsql") { $$r{num_connects} = 0 if (check_pgsql($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "oracle") { $$r{num_connects} = 0 if (check_oracle($v, $r) == $SERVICE_UP); } elsif ($$v{service} eq "simpletcp") { $$r{num_connects} = 0 if (check_simpletcp($v, $r) == $SERVICE_UP); } else { $$r{num_connects} = 0 if (check_none($v, $r) == $SERVICE_UP); } } elsif ($$v{checktype} eq "connect") { if ($$v{protocol} ne "udp") { &ld_debug(2, "Checking connect: real server=$real_id (virtual=$virtual_id)"); check_connect($v, $r); } else { &ld_debug(2, "Checking connect (ping): real server=$real_id (virtual=$virtual_id)"); check_ping($v, $r); } } elsif ($$v{checktype} eq "ping") { &ld_debug(2, "Checking ping: real server=$real_id (virtual=$virtual_id)"); check_ping($v, $r); } elsif ($$v{checktype} eq "external") { &ld_debug(2, "Checking external: real server=$real_id (virtual=$virtual_id)"); check_external($v, $r); } elsif ($$v{checktype} eq "external-perl") { &ld_debug(2, "Checking external-perl: real server=$real_id (virtual=$virtual_id)"); check_external_perl($v, $r); } elsif ($$v{checktype} eq "off") { &ld_debug(2, "Checking off: No real or fallback servers to be added\n"); } elsif ($$v{checktype} eq "on") { &ld_debug(2, "Checking on: Real servers are added without any checks\n"); &service_set($v, $r, "up"); } elsif ($$v{checktype} eq "combined") { &ld_debug(2, "Checking combined-connect: real server=$real_id (virtual=$virtual_id)"); if (check_connect($v, $r) == $SERVICE_UP) { $$r{num_connects}++; } else { $$r{num_connects} = 999999; } } } sub _check_real_for_maintenance { my $r = shift; return undef if(!$MAINTDIR); my $servername = ld_gethostbyaddr($$r{server}); # Extract just the first component of the full name so we can match short or FQDN names $servername =~ /^([a-z][a-z0-9\-]+)\./; my $servershortname = $1; if (-e "$MAINTDIR/$$r{server}:$$r{port}") { &ld_debug(2, "Server maintenance: Found file $$r{server}:$$r{port}"); return 1; } elsif (-e "$MAINTDIR/$$r{server}") { &ld_debug(2, "Server maintenance: Found file $$r{server}"); return 1; } elsif ($servername && -e "$MAINTDIR/$servername:$$r{port}") { &ld_debug(2, "Server maintenance: Found file $servername:$$r{port}"); return 1; } elsif ($servername && -e "$MAINTDIR/$servername") { &ld_debug(2, "Server maintenance: Found file $servername"); return 1; } elsif ($servershortname && -e "$MAINTDIR/$servershortname:$$r{port}") { &ld_debug(2, "Server maintenance: Found file $servershortname:$$r{port}"); return 1; } elsif ($servershortname && -e "$MAINTDIR/$servershortname") { &ld_debug(2, "Server maintenance: Found file $servershortname"); return 1; } return undef; } sub check_http { use LWP::UserAgent; use LWP::Debug; if($DEBUG > 2) { LWP::Debug::level('+'); } my ($v, $r) = @_; $$r{url} =~ /(http|https):\/\/([^:\/]+)(:([^\/]+))?(\/.*)/; my $host = $2; #my $port = $3; my $uri = $4; my $virtualhost = (defined $$v{virtualhost} ? $$v{virtualhost} : $host); &ld_debug(2, "check_http: url=\"$$r{url}\" " . "virtualhost=\"$virtualhost\""); my $ua = new LWP::UserAgent(); my $h = undef; if ($$v{service} eq "http_proxy") { my $port = ld_checkport($v, $r); $ua->proxy("http", "http://$$r{server}:$port/"); } else { $h = new HTTP::Headers("Host" => $virtualhost); } my $req = new HTTP::Request("$$v{httpmethod}", "$$r{url}", $h); my $res; # LWP does not seem to honour timeouts set using $ua->timeout() # for HTTPS. So use an alarm instead. This also has the advantage # of being cumulative timeout, rather than a per send/receive # timeout. eval { # LWP makes unguarded calls to eval # which throw a fatal exception if they fail # Needless to say, this is completely stupid. # Resetting of $SIG{'__DIE__'} is also # needed now that alarm() is used. local $SIG{'__DIE__'} = "DEFAULT"; local $SIG{'ALRM'} = sub { die "Timeout Alarm" }; &ld_debug(4, "Timeout is $$v{negotiatetimeout}"); &ld_debug(2, "Starting Check"); alarm $$v{negotiatetimeout}; &ld_debug(2, "Starting HTTP/HTTPS"); $res = $ua->request($req); &ld_debug(2, "Finished HTTP/HTTPS"); alarm 0; # Cancel the alarm }; if (not defined $res) { &ld_debug(2, "check_http: timeout"); goto down; } if ($$v{service} eq "https") { &ld_debug(2, "SSL-Cipher: " . $res->header('Client-SSL-Cipher')); &ld_debug(2, "SSL-Cert-Subject: " . $res->header('Client-SSL-Cert-Subject')); &ld_debug(2, "SSL-Cert-Issuer: " . $res->header('Client-SSL-Cert-Issuer')); } my $recstr = $$r{receive}; if ($res->is_success && (!($recstr =~ /.+/) || $res->content =~ /$recstr/)) { service_set($v, $r, "up", {do_log => 1}, $res->status_line); &ld_debug(2, "check_http: $$r{url} is up\n"); return $SERVICE_UP; } my $log_message = $res->is_success ? $res->content : $res->status_line; service_set($v, $r, "down", {do_log => 1}, $log_message); &ld_debug(3, "Headers " . $res->headers->as_string); down: &ld_debug(2, "check_http: $$r{url} is down\n"); return $SERVICE_DOWN; } sub check_smtp { require Net::SMTP; my ($v, $r) = @_; my $port = ld_checkport($v, $r); &ld_debug(2, "Checking $$v{service}: server=$$r{server} port=$port"); my $smtp = new Net::SMTP($$r{server}, Port => $port, Timeout => $$v{negotiatetimeout}); if ($smtp) { $smtp->quit; service_set($v, $r, "up", {do_log => 1}); return $SERVICE_UP; } else { service_set($v, $r, "down", {do_log => 1}); return $SERVICE_DOWN; } } sub check_pop { require Mail::POP3Client; my ($v, $r, $ssl) = @_; my $port = ld_checkport($v, $r); &ld_debug(2, "Checking pop server=$$r{server} port=$port ssl=$ssl"); my $pop = new Mail::POP3Client(USER => $$v{login}, PASSWORD => $$v{passwd}, HOST => $$r{server}, USESSL => $ssl, PORT => $port, DEBUG => 0, TIMEOUT => $$v{negotiatetimeout}); if (!$pop) { service_set($v, $r, "down", {do_log => 1}); return $SERVICE_DOWN; } if($$v{login} ne "") { my $authres = $pop->login(); $pop->close(); if (!$authres) { service_set($v, $r, "down", {do_log => 1}); return $SERVICE_DOWN; } } $pop->close(); service_set($v, $r, "up", {do_log => 1}); return $SERVICE_UP; } sub check_imap { require Net::IMAP::Simple; my ($v, $r) = @_; my $port = ld_checkport($v, $r); &ld_debug(2, "Checking imap server=$$r{server} port=$port"); my $imap = Net::IMAP::Simple->new($$r{server}, port => $port, timeout => $$v{negotiatetimeout}); if (!$imap) { service_set($v, $r, "down", {do_log => 1}); return $SERVICE_DOWN; } if($$v{login} ne "") { my $authres = $imap->login($$v{login},$$v{passwd}); $imap->quit; if (!$authres) { service_set($v, $r, "down", {do_log => 1}); return $SERVICE_DOWN; } } $imap->quit(); service_set($v, $r, "up", {do_log => 1}); return $SERVICE_UP; } sub check_imaps { require Net::IMAP::Simple::SSL; my ($v, $r) = @_; my $port = ld_checkport($v, $r); &ld_debug(2, "Checking imaps server=$$r{server} port=$port"); my $imaps = Net::IMAP::Simple::SSL->new($$r{server}, port => $port, timeout => $$v{negotiatetimeout}); if (!$imaps) { service_set($v, $r, "down", {do_log => 1}); return $SERVICE_DOWN; } if($$v{login} ne "") { my $authres = $imaps->login($$v{login},$$v{passwd}); $imaps->quit; if (!$authres) { service_set($v, $r, "down", {do_log => 1}); return $SERVICE_DOWN; } } $imaps->quit(); service_set($v, $r, "up", {do_log => 1}); return $SERVICE_UP; } sub check_ldap { my ($v, $r) = @_; require Net::LDAP; my $port = ld_checkport($v, $r); &ld_debug(2, "Checking ldap server=$$r{server} port=$port"); my $recstr = $$r{receive}; my $ldap = Net::LDAP->new("$$r{server}", port => $port, timeout => $$v{negotiatetimeout}); if(!$ldap) { service_set($v, $r, "down", {do_log => 1}, "Connection failed"); &ld_debug(4, "Connection failed"); return $SERVICE_DOWN; } my $mesg; if ($$v{login} && $$v{passwd}) { $mesg = $ldap->bind($$v{login}, password=>$$v{passwd}) ; } else { $mesg = $ldap->bind ; } if ($mesg->is_error) { service_set($v, $r, "down", {do_log => 1}, "Bind failed"); &ld_debug(4, "Bind failed"); return $SERVICE_DOWN; } &ld_debug(4, "Base : " . substr($$r{request},1)); my $result = $ldap->search ( base => substr($$r{request},1) . "", scope => "base", filter => "(objectClass=*)" ); if($result->count != 1) { service_set($v, $r, "down", {do_log => 1}, "No answer received"); &ld_debug(2, "Count failed : " . $result->count); return $SERVICE_DOWN; } my $href = $result->as_struct; my @arrayOfDNs = keys %$href ; if (!($recstr =~ /.+/) || $arrayOfDNs[0] =~ /$recstr/) { service_set($v, $r, "up", {do_log => 1}, "Success"); return $SERVICE_UP; } else { service_set($v, $r, "down", {do_log => 1}, "Response mismatch"); &ld_debug(4,"Message differs : " . ", " . $$r{receive} . ", " . $arrayOfDNs[0] . "."); return $SERVICE_DOWN; } } sub check_nntp { use IO::Socket; use IO::Socket::INET6; use IO::Select; my ($v, $r) = @_; my $sock; my $s; my $buf; my $port = ld_checkport($v, $r); my $status = 1; &ld_debug(2, "Checking nntp server=$$r{server} port=$port"); unless ($sock = IO::Socket::INET6->new(PeerAddr => $$r{server}, PeerPort => $port, Proto => 'tcp', TimeOut => $$v{negotiatetimeout})) { service_set($v, $r, "down", {do_log => 1}); return $SERVICE_DOWN; } $s = IO::Select->new(); $s->add($sock); if (scalar($s->can_read($$v{negotiatetimeout})) == 0) { service_set($v, $r, "down", {do_log => 1}); } else { sysread($sock, $buf, 64); if ($buf =~ /^2/) { service_set($v, $r, "up", {do_log => 1}); $status = 0; } else { service_set($v, $r, "down", {do_log => 1}); } } $s->remove($sock); $sock->close; return $status; } sub check_radius { require Authen::Radius; my ($v, $r) = @_; &ld_debug(2, "Checking radius"); my $port = ld_checkport($v, $r); my $radius; my $result = ""; eval { local $SIG{'__DIE__'} = "DEFAULT"; local $SIG{'ALRM'} = sub { die "Timeout Alarm" }; &ld_debug(4, "Timeout is $$v{checktimeout}"); &ld_debug(2, "Starting Check"); alarm $$v{checktimeout}; &ld_debug(2, "Starting Radius"); $radius = new Authen::Radius(Host => "$$r{server}:$port", Secret=>$$v{secret}, TimeOut=>$$v{negotiatetimeout}, Errmode=>'die'); $result = $radius->check_pwd($$v{login}, $$v{passwd}); &ld_debug(2, "Finished Radius"); alarm 0; # Cancel the alarm }; if ($result eq "") { &service_set($v, $r, "down", {do_log => 1}); &ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@"); &ld_debug(3, "Radius Error: ".$radius->get_error); return $SERVICE_DOWN; } else { &service_set($v, $r, "up", {do_log => 1}); &ld_debug(3, "Activated service $$r{server}:$$r{port}"); return $SERVICE_UP; } } sub check_mysql { return check_sql(@_, "mysql", "database"); } sub check_pgsql { return check_sql(@_, "Pg", "dbname"); } sub check_sql_log_errstr { my ($prefix, $errstr) = (@_); for $_ (split /\n/, $errstr) { &ld_debug(4, "$prefix $_\n"); } } sub check_oracle { return check_sql(@_, "Oracle", "sid"); } sub check_sql { require DBI; my ($v, $r, $dbd, $dbname) = @_; my $port = ld_checkport($v, $r); my ($dbh, $sth, $query, $rows, $result); $result = $SERVICE_DOWN; $query = $$r{request}; $query =~ s#^/##; unless ($$v{login} && $query) { &ld_log("Error: Must specify a login and request string " . "for MySQL, Oracle and PostgreSQL checks. " . "Not adding $$r{server}.\n"); goto err_down; } $result=2; # Set result flag. Only ok if ends up at zero. &ld_debug(2, "Checking $$v{server} server=$$r{server} port=$port\n"); $dbh = DBI->connect("dbi:$dbd:$dbname=$$v{database};" . "host=$$r{server};port=$port", $$v{login}, $$v{passwd}); unless ($dbh) { &ld_debug(2, "Failed to bind to $$r{server} with DBI->errstr\n"); check_sql_log_errstr("Failed to bind to $$r{server} with", DBI->errstr); goto err_down; } $result--; $sth = $dbh->prepare($query); unless ($sth) { &ld_debug(2, "Error preparing statement: $dbh->errstr\n"); check_sql_log_errstr("Error preparing statement:", $dbh->errstr); goto err_disconect; } # Test to see if any errors are returned $sth->execute; if ($dbh->err) { &ld_debug(2, "Error executing statement: $dbh->errstr : $dbh->err\n"); check_sql_log_errstr("Error executing statement:", $dbh->errstr, $dbh->err); goto err_finish; } # On error "execute" will return undef. # # Assuming you're using 'SELECT' you will get the number of rows # returned from the db when running execute: the 'rows' method is # only used when doing something that is NOT a select. I cannot # imagine that you would ever want to insert or update from a # regular polling on this system, so we will assume you are using # SELECT here. # # Ideally you will do something like this: 'select * from # director_slave where enabled=1' This way you can have, say, a # MEMORY table in MySQL where you insert a value into a row # (enabled) that says whether or not you want to actually use this # in the pool from ldirector / ipvs, and disable them without # actually turning off your sql server. $sth->execute; if ($dbd eq "Oracle") { $sth->fetchrow_hashref() } unless ($rows = $sth->rows) { check_sql_log_errstr("Error executing statement:", $dbh->errstr, $dbh->err); goto err_finish; } # Actually look to see if there was data returned in this statement, # else disable node if($rows > 0) { goto out; } else { goto err_finish; } out: $result = $SERVICE_UP; err_finish: $sth->finish(); err_disconnect: $dbh->disconnect(); err_down: service_set($v, $r, $result == $SERVICE_UP ? "up" : "down", {do_log => 1}); return $result; } sub check_connect { my ($v, $r) = @_; my $port = ld_checkport($v, $r); eval { local $SIG{'__DIE__'} = "DEFAULT"; local $SIG{'ALRM'} = sub { die "Timeout Alarm" }; &ld_debug(4, "Timeout is $$v{checktimeout}"); alarm $$v{checktimeout}; my $sock = &ld_open_socket($$r{server}, $port, $$v{protocol}); if ($sock) { close($sock); } else { alarm 0; # Cancel the alarm die("Socket Connect Failed"); } &ld_debug(3, "Connected to $$r{server} (port $port)"); alarm 0; # Cancel the alarm }; if ($@) { &service_set($v, $r, "down", {do_log => 1}); &ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@"); return $SERVICE_DOWN; } else { &service_set($v, $r, "up", {do_log => 1}); &ld_debug(3, "Activated service $$r{server}:$$r{port}"); return $SERVICE_UP; } } sub check_external { my ($v, $r) = @_; my $v_server; if (defined $$v{server}) { $v_server = $$v{server}; } else { $v_server = $$v{fwm}; } my $result = system_timeout($$v{checktimeout}, $$v{checkcommand}, $v_server, $$v{port}, $$r{server}, $$r{port}); if ($result) { &service_set($v, $r, "down", {do_log => 1}); &ld_debug(3, "Deactivated service $$r{server}:$$r{port}: " . "$@ after calling $$v{checkcommand} with result " . "$result"); return 0; } else { &service_set($v, $r, "up", {do_log => 1}); &ld_debug(3, "Activated service $$r{server}:$$r{port}"); return 1; } } sub check_external_perl { my ($v, $r) = @_; my $result; my $v_server; eval { local $SIG{'__DIE__'} = "DEFAULT"; local $SIG{'ALRM'} = sub { die "Timeout Alarm" }; &ld_debug(4, "Timeout is $$v{checktimeout}"); alarm $$v{checktimeout}; if (defined $$v{server}) { $v_server = $$v{server}; } else { $v_server = $$v{fwm}; } my $cmdfunc = $check_external_perl__funcs{$$v{checkcommand}}; if (!defined($cmdfunc)) { open(CMDFILE, "<$$v{checkcommand}") || die "cannot open external-perl checkcommand file: $$v{checkcommand}"; $cmdfunc = eval("sub { \@ARGV=\@_; " . join("", ) . " }"); close(CMDFILE); $check_external_perl__funcs{$$v{checkcommand}} = $cmdfunc; } no warnings 'redefine'; local *CORE::GLOBAL::exit = sub { $result = shift; goto external_exit; }; $cmdfunc->($v_server, $$v{port}, $$r{server}, $$r{port}); external_exit: alarm 0; }; if ($@ or $result != 0) { &service_set($v, $r, "down"); &ld_debug(3, "Deactivated service $$r{server}:$$r{port}: " . "$@ after calling (external-perl) $$v{checkcommand} with result " . "$result"); return 0; } else { &service_set($v, $r, "up"); &ld_debug(3, "Activated service $$r{server}:$$r{port}"); return 1; } } sub check_sip { my ($v, $r) = @_; my $sip_d_port = ld_checkport($v, $r); &ld_debug(2, "Checking sip server=$$r{server} port=$sip_d_port"); eval { use Socket; local $SIG{'__DIE__'} = "DEFAULT"; local $SIG{'ALRM'} = sub { die "Timeout Alarm" }; &ld_debug(4, "Timeout is $$v{checktimeout}"); alarm $$v{negotiatetimeout}; my $sock = &ld_open_socket($$r{server}, $sip_d_port, $$v{protocol}); unless ($sock) { alarm 0; die("Socket Connect Failed"); } my ($sip_s_addr_str, $sip_s_port) = &ld_get_addrport($sock); &ld_debug(3, "Connected from $sip_s_addr_str:$sip_s_port to " . $$r{server} . ":$sip_d_port"); select $sock; $|=1; select STDOUT; my $request = "OPTIONS sip:" . $$v{login} . " SIP/2.0\r\n" . "Via: SIP/2.0/UDP $sip_s_addr_str:$sip_s_port;" . "branch=z9hG4bKhjhs8ass877\r\n" . "Max-Forwards: 70\r\n" . "To: \r\n" . "From: ;tag=1928301774\r\n" . "Call-ID: " . (join "", map { unpack "H*", chr(rand(256)) } 1..8) . "\r\n" . "CSeq: 63104 OPTIONS\r\n" . "Contact: \r\n" . "Accept: application/sdp\r\n" . "Content-Length: 0\r\n\r\n"; print "Request:\n$request"; print $sock $request; my $ok; my $reply; while (<$sock>) { chomp; $/="\r"; chomp; $/="\n"; last if ($_ eq ""); if (!defined $ok) { # Check status $ok = $_; if ($ok !~ m/^SIP\/2.0 200 OK/) { alarm 0; # Cancel the alarm close($sock); die "$ok\n"; } next; } $reply .= "$_\n"; # Add more checks here as desired } alarm 0; # Cancel the alarm close($sock); if (!defined $ok) { die "No OK\n"; } print "Reply:\n$ok\n$reply\n"; }; if ($@) { &service_set($v, $r, "down", {do_log => 1}); &ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@"); return $SERVICE_DOWN; } else { &service_set($v, $r, "up", {do_log => 1}); &ld_debug(3, "Activated service $$r{server}:$$r{port}"); return $SERVICE_UP; } } sub check_simpletcp { my ($v, $r) = @_; my $d_port = ld_checkport($v, $r); &ld_debug(2, "Checking simpletcp server=$$r{server} port=$d_port"); eval { use Socket; local $SIG{'__DIE__'} = "DEFAULT"; local $SIG{'ALRM'} = sub { die "Timeout Alarm" }; &ld_debug(4, "Timeout is $$v{checktimeout}"); alarm $$v{negotiatetimeout}; my $sock = &ld_open_socket($$r{server}, $d_port, $$v{protocol}); unless ($sock) { alarm 0; die("Socket Connect Failed"); } my ($s_addr_str, $s_port) = &ld_get_addrport($sock); &ld_debug(3, "Connected from $s_addr_str:$s_port to " . $$r{server} . ":$d_port"); select $sock; $|=1; select STDOUT; my $request = substr($$r{request}, 1); $request =~ s/\\n/\n/g ; &ld_debug(2, "Checking simpletcp server=$$r{server} port=$d_port request:\n$request"); print $sock $request; shutdown($sock, SHUT_WR); my $ok; my $reply; while (<$sock>) { &ld_debug(2, "Checking simpletcp server=$$r{server} port=$d_port receive=" . $$r{receive} ." got: $_\n"); if ( $_ =~ /$$r{receive}/ ) { $ok = 1; last; } } alarm 0; # Cancel the alarm close($sock); if (!defined $ok) { die "No OK\n"; } }; if ($@) { &service_set($v, $r, "down", {do_log => 1}); &ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@"); return $SERVICE_DOWN; } else { &service_set($v, $r, "up", {do_log => 1}); &ld_debug(3, "Activated service $$r{server}:$$r{port}"); return $SERVICE_UP; } } sub check_ftp { require Net::FTP; my ($v, $r) = @_; my $ftp; my $memory; my $debug = ($DEBUG > 2) ? 1 : 0; my $port = ld_checkport($v, $r); &ld_debug(2, "Checking ftp server=$$r{server} port=$port"); &ld_debug(4, "Timeout is $$v{negotiatetimeout}"); open(TMP,'+>', undef); # In some cases Net::FTP dies if there is a timeout eval { unless ($ftp = Net::FTP->new("$$r{server}:$port", Timeout=>$$v{negotiatetimeout}, Debug=>$debug)) { die "Could not connect\n"; } $ftp->login($$v{login}, $$v{passwd}); $ftp->cwd("/"); $ftp->binary(); $ftp->pasv(); $ftp->get("$$r{request}", *TMP); $ftp->quit(); }; if ($@) { &ld_debug(2, "Warning: $@"); } seek TMP, 0, 0; local $/; $memory = ; close TMP; if ($memory =~ /$$r{receive}/) { service_set($v, $r, "up", {do_log => 1}); return $SERVICE_UP; } service_set($v, $r, "down", {do_log => 1}); return $SERVICE_DOWN; } sub check_dns { my $res; my $query; my $rr; my $request; my $server; my ($v,$r) = @_; { # Net::DNS makes unguarded calls to eval # which throw a fatal exception if they fail # Needless to say, this is completely stupid. local $SIG{'__DIE__'} = "DEFAULT"; # When fork=yes we need to ignore the child death local $SIG{'CHLD'} = "IGNORE"; require Net::DNS; } $res = new Net::DNS::Resolver; if($DEBUG > 2) { $res->debug(1); } $$r{"request"} =~ m/^\/?(.*)/; $request=$1; $server = &ld_strip_brackets($$r{server}); &ld_debug(2, "Checking dns: request=\"$request\" receive=\"" . $$r{"receive"} . "\"\n"); eval { local $SIG{'__DIE__'} = "DEFAULT"; local $SIG{'ALRM'} = sub { die "timeout\n"; }; alarm($$v{negotiatetimeout}); $res->nameservers($server); if ($$v{"protocol"} eq "tcp") { $res->usevc(1); } $query = $res->search($request); alarm(0); }; if (@$ eq "timeout\n" or ! $query) { service_set($v, $r, "down", {do_log => 1}, "Connection timed out"); return $SERVICE_DOWN; } foreach $rr ($query->answer) { if (($rr->type eq "A" and $rr->address eq $$r{"receive"}) or ($rr->type eq "PTR" and $rr->ptrdname eq $$r{"receive"})) { service_set($v, $r, "up", {do_log => 1}, "Success"); return $SERVICE_UP; } } service_set($v, $r, "down", {do_log => 1}, "Response mismatch"); return $SERVICE_DOWN; } sub check_ping { use Net::Ping; my ($v,$r) = (@_); &ld_debug(2, "Checking ping: " . "host=\"" . $$r{server} . "\" checktimeout=\"" . $$v{"checktimeout"} . "\" checkcount=\"" . $$v{"checkcount"} . "\"\n"); my $p = Net::Ping->new("icmp","1","64"); for (my $attempt = 0; $attempt < $$v{"checkcount"}; $attempt++) { if ($p->ping($$r{server}, $$v{"checktimeout"})) { &ld_debug(2, "pong from $$r{server}\n"); service_set($v, $r, "up", {do_log => 1}); return $SERVICE_UP; } &ld_debug(2, "ping to $$r{server} timed out " . "(attempt " . ($attempt + 1) . "/" . $$v{"checkcount"} . ")\n"); } service_set($v, $r, "down"); return $SERVICE_DOWN; } # check_none # Dummy function to check service if service type is none. # Just activates the real server sub check_none { my ($v, $r) = @_; &ld_debug(2, "Checking none"); service_set($v, $r, "up", {do_log => 1}); return $SERVICE_UP; } # service_set # Used to bring up and down real servers. # This is the function you should call if you want to bring a real # server up or down. # This function is safe to call regardless of the current state of a # real server. # Do _not_ call _service_up or _service_down directly. # pre: v: virtual that the real service belongs to # Only used to determine the protocol of the service # r: real server to take down # state: up or down # up to bring the real service up # down to bring the real service up # flags: hash with the following (optional) keys: # force => 1 - force setting of the specified state # do_log => 1 - log the state to the monitorfile # (when called as the result of a check) # post: The real server is brought up or down for each virtual service # it belongs to. # return: none sub service_set { my ($v, $r, $state, $flags, $log_msg) = @_; my ($real, $virtual, $virt, $now); if ($$flags{'do_log'}) { $now = localtime(); if (!defined($log_msg)) { $log_msg = "-"; } # URI-escape special log characters ('|' and newlines) $log_msg =~ s/([%|\r\n])/sprintf("%%%.2x", ord($1))/eg; } # Find the real server in @REAL foreach $real (@REAL) { if($real->{"real"} eq get_real_id_str($r, $v)) { $virtual = $real->{"virtual"}; last; } } return unless (defined($virtual)); # Check each virtual service for the real server and make # changes as necessary foreach $v (@VIRTUAL){ # Use found rather than relying on tmp_id being # set when we leave the foreach loop. There # seems to some weirdness in Perl (5.6.0 on Redhat 7.2) my $found = 0; my $tmp_id; my $virtual_id = get_virtual_id_str($v); my $real_id = get_real_id_str($r, $v); my $log_str = "real server=$real_id" . " (virtual=$virtual_id)"; foreach $tmp_id (@$virtual) { if($virtual_id eq $tmp_id) { $found = 1; last; } } if ($found == 1) { if ($state=~/up/i) { _service_up($v, $r, $$flags{"force"}); &ld_debug(2, "Enabled $log_str"); } elsif ($state=~/down/i) { _service_down($v, $r, $$flags{"force"}); &ld_debug(2, "Disabled $log_str"); } if ($$v{"monitorfile"} and $$flags{"do_log"}) { my $real_log_msg = $real_id; $real_log_msg =~ tr/:/ /s; $real_log_msg =~ s/\\//g; unless( open(CHECKLOG, ">>$$v{monitorfile}") and print CHECKLOG "[$now] [$$] $real_log_msg [$state] $log_msg\n" and close(CHECKLOG) ) { die("Error writing to monitorfile '$$v{monitorfile}': $!"); } } } } } # _remove_service # Remove a real server by either making it quiescent or deleting it # Should be called by _service_down or fallback_off # I.e. If you want to change the state of a real server call service_set. # If you call this function directly then ldirectord will lose track # of the state of real servers. # If the real server exists (which it should) make it quiescent or # delete it, depending on the global and per virtual service quiescent flag. # If it # doesn't exist, just leave it as it will be added by the # _service_up code as appropriate. # pre: v: reference to virtual service to with the real server belongs # rservice: service to restore. Of the form server:port for a tcp or # udp service. Of the form fwmark for a fwm service. # rforw: Forwarding mechanism of service. Sould be one of "-g" "-i" or # "-m" # tag: Tag to use for logging. Should be either "real" or "fallback" # post: real service is taken up from the respective virtual service # if it is inactive # return: none sub _remove_service { my ($v, $rservice, $rforw, $tag) = (@_); my $oldsrv; my $ov; my $or; my $ipvsadm_args; my $log_args; my $virtual_str; my $old_rservice; my $is_quiescent; $virtual_str = &get_virtual($v); $oldsrv=&ld_read_ipvsadm(); $ov=$oldsrv->{&get_real_service_str($v)}; if(!defined($ov)){ return; } if ($tag ne "fallback" and ((defined $$v{quiescent} and $$v{quiescent} eq "yes") or (!defined($$v{quiescent}) and $QUIESCENT eq "yes"))){ $is_quiescent = "quiescent"; } $or=$ov->{"real"}->{$rservice}; # If a virtual service is a IP/port service (not fwmark) # and a real-servers uses a forwarding mechanism other than masq # then the port will always be that of the virtual service. # This includes real-servers that LVS has set to use # the local forwarding mechanism because their IP address # is local. Thus, if $rservice does not exist test # for the same ip address with the virtual servers port. # N.B: This could cause strange things to happen if # there is a clash between two real servers on different ports # that LVS has mapped to being the same thing. if(!defined($or)) { $old_rservice = $rservice; $rservice =~ /(.*):(.*)/; $rservice = $1; $virtual_str =~ /(.*):(.*)/; $rservice .= ":" . $2; $or=$ov->{"real"}->{$rservice}; # If this doesn't exist either, use the original service. # Otherwise if masq and quiescence is in use, the # real server is not local, and it has an alternate port to # the virtual server, using the mapped service will # result in a quiescent service being created on the # virtual server's port, which is not wanted. if(!defined($or)) { $rservice = $old_rservice; $old_rservice = undef; } } if((!defined($or) and !defined($is_quiescent)) or (defined($is_quiescent) and defined($or) and $or->{"weight"} eq 0 and get_forward_flag($or->{"forward"}) eq $rforw)){ return; } $ipvsadm_args = "$$v{proto} " . &get_virtual_option($v) . " -r $rservice"; $log_args = "$tag server: $rservice "; if(defined($old_rservice)) { $log_args .= "mapped from $old_rservice " } $log_args .= "($virtual_str)"; my $server_str=$rservice . " " . $virtual_str; my $currenttime=time(); if(defined($is_quiescent)) { if (defined($or)) { &system_wrapper("$IPVSADM -e " . "$ipvsadm_args $rforw -w 0"); } else { &system_wrapper("$IPVSADM -a " . "$ipvsadm_args $rforw -w 0"); } &ld_log("Quiescent $log_args (Weight set to 0)"); &ld_emailalert_send("Quiescent $log_args (Weight set to 0)", $v, $rservice, $currenttime); } else { &system_wrapper("$IPVSADM -d $ipvsadm_args"); &ld_log("Deleted $log_args"); &ld_emailalert_send("Deleted $log_args", $v, $rservice, $currenttime); } } # _restore_service # Make a retore a real server. The opposite of _quiescent_server. # Should be called by _service_up or fallback_on # I.e. If you want to change the state of a real server call service_set. # If you call this function directly then ldirectord will lose track # of the state of real servers. # If the real server exists (which it should) make it quiescent. If it # doesn't exist, just leave it as it will be added by the _service_up code # as appropriate. # pre: v: reference to virtual service to with the real server belongs # rservice: service to restore. Of the form server:port for a tcp or # udp service. Of the form fwmark for a fwm service. # rforw: Forwarding mechanism of service. Sould be one of "-g" "-i" or # "-m" # rwght: Weight of service. Sold be of the form "" # e.g. "1" # tag: Tag to use for logging. Should be either "real" or "fallback" # post: real service is taken up from the respective virtual service # if it is inactive # return: none sub _restore_service { my ($v, $rservice, $rforw, $rwght, $tag) = (@_); my $oldsrv; my $ov; my $or; my $ipvsadm_args; my $log_args; $ipvsadm_args = "$$v{proto} " . &get_virtual_option($v) . " -r $rservice $rforw -w $rwght"; $log_args = "$tag server: $rservice " . "(" #. scalar(%{$v->{real_status}}) . &get_virtual($v) . ")"; #if the server exists then restore its weight # otherwise add the server $oldsrv=&ld_read_ipvsadm(); $ov=$oldsrv->{&get_real_service_str($v)}; if(defined($ov)){ $or=$ov->{"real"}->{$rservice}; } if(defined($or)){ unless($or->{"weight"} eq $rwght and get_forward_flag($or->{"forward"}) eq $rforw){ &system_wrapper("$IPVSADM -e $ipvsadm_args"); &ld_log("Restored $log_args (Weight set to $rwght)"); &ld_emailalert_send("Restored $log_args " . "(Weight set to $rwght)", $v, $rservice, 0); } } else { &system_wrapper("$IPVSADM -a $ipvsadm_args"); &ld_log("Added $log_args (Weight set to $rwght)"); &ld_emailalert_send("Added $log_args (Weight set to $rwght)", $v, $rservice, 0); } } # Check the status of a server # Should only be called from _status_up, _status_down, # _service_up, or _service_down # Returns 1 if the server is up, 0 if down sub _status_check { my ($v, $r, $is_fallback) = (@_); my $virtual_id = get_virtual_id_str($v); my $real_id = get_real_id_str($r, $v); if (defined($is_fallback)) { if (defined($v->{real_status}) or (defined($v->{fallback_status}) and $v->{fallback_status}->{"$real_id"})) { return 1; } } else { if (defined ($v->{real_status}) and $v->{real_status}->{"$real_id"}) { return 1; } } return 0; } # Set the status of a server as up # Should only be called from _service_up or _ld_start sub _status_up { my ($v, $r, $is_fallback) = (@_); my $virtual_id = get_virtual_id_str($v); my $real_id = get_real_id_str($r, $v); return undef if(_status_check($v, $r, $is_fallback)); $r->{virtual_status}->{"$virtual_id"} = 1; if (defined $is_fallback) { $v->{fallback_status}->{"$real_id"} = 1; } else { $v->{real_status}->{"$real_id"} = 1; } return 1; } # Set the status of a server as down # Should only be called from _service_down or ld_stop sub _status_down { my ($v, $r, $is_fallback) = (@_); my $virtual_id = get_virtual_id_str($v); my $real_id = get_real_id_str($r, $v); return undef if (!_status_check($v, $r, $is_fallback)); if (defined($is_fallback)) { delete $v->{fallback_status}->{"$real_id"}; if (! %{$v->{fallback_status}}) { $v->{fallback_status} = undef; } } else { delete $v->{real_status}->{"$real_id"}; if (! %{$v->{real_status}}) { $v->{real_status} = undef; } } delete $r->{virtual_status}->{"$virtual_id"}; if (! %{$r->{virtual_status}}) { $r->{virtual_status} = undef; } return 1; } # _service_up # Bring a real service up if it is down # Should be called by service_set only # I.e. If you want to change the state of a real server call service_set. # If you call this function directly then ldirectord will lose track # of the state of real servers. # pre: v: reference to virtual service to with the real server belongs # r: reference to the real server to take down # post: real service is taken up from the respective virtual service # if it is inactive # return: none sub _service_up { my ($v, $r, $force) = (@_); if ($r->{failcount} > 0) { ld_log("Resetting soft failure count: " . $r->{server} . ":" . $r->{port} . " (" . get_virtual_id_str($v) . ")"); } $r->{failcount} = 0; if (! _status_up($v, $r) and ! defined($force)) { return; } &_restore_service($v, $r->{server} . ":" . $r->{port}, $r->{forw}, $r->{weight}, "real"); &fallback_off($v); } # _service_down # Bring a real service down if it is up # Should be called by service_set only # I.e. if you want to change the state of a real server call service_set. # If you call this function directly then ldirectord will lose track # of the state of real servers. # pre: v: reference to virtual service to with the real server belongs # r: reference to the real server to take down # post: real service is taken down from the respective virtual service # if it is active # return: none sub _service_down { my ($v, $r, $force) = @_; if (!_status_check($v, $r) and !defined($force)) { return; } $r->{failcount}++; if (!defined($force) and _status_check($v, $r) and ($r->{failcount} < $v->{failurecount})) { ld_log("Soft failure real server: " . $r->{server} . ":" . $r->{port} . " (" . get_virtual_id_str($v) . ") failure " . $r->{failcount} . "/" . $v->{failurecount}); return; } _status_down($v, $r); &_remove_service($v, $r->{server} . ":" . $r->{port}, $r->{forw}, "real"); &fallback_on($v); } # fallback_on # Turn on the fallback server for a virtual service if it is inactive # pre: v: virtual to turn fallback service on for # post: fallback server is turned on if it was inactive # return: none sub fallback_on { my ($v, $force) = (@_); my $fallback=&fallback_find($v); if (defined($fallback) and (_status_up($v, $fallback, "fallback") or defined($force))) { &_restore_service($v, $fallback->{server} . ":" . $fallback->{port}, get_forward_flag($fallback->{forward}), "1", "fallback"); } if (!defined ($v->{real_status})) { &do_fallback_command($v, "start"); } } # fallback_off # Turn off the fallback server for a virtual service if it is active # pre: v: virtual to turn fallback service off for # post: fallback server is turned off if it was active # return: none sub fallback_off { my ($v, $force) = (@_); my $fallback=&fallback_find($v); if (defined($fallback) and (_status_down($v, $fallback, "fallback") or defined($force))) { &_remove_service($v, $fallback->{server} . ":" . $fallback->{port}, get_forward_flag($fallback->{forward}), "fallback"); } if (defined ($v->{real_status})) { &do_fallback_command($v, "stop"); } } # fallback_find # Determine the fallback for a virtual service # pre: virtual: reference to a virtual service # post: none # return: $virtual->{"fallback"} if defined # else $FALLBACK->{$virtual->{"protocol"}} if defined # else undef sub fallback_find { my ($virtual) = (@_); my($global_fallback_ptr); # fallback pointer my $ipv6p = ($virtual->{addressfamily} == AF_INET6) ? 1 : 0; if( defined $virtual->{"fallback"} ) { return($virtual->{"fallback"}); } elsif ( not defined($FALLBACK) and not $ipv6p ) { return undef; } elsif ( not defined($FALLBACK6) and $ipv6p ) { return undef; } if ($ipv6p) { # IPv6 $global_fallback_ptr = $FALLBACK6; } else { $global_fallback_ptr = $FALLBACK; } # If the global fallback has a port, it can be used as is if (defined($global_fallback_ptr->{$virtual->{"protocol"}}->{"port"})) { return $global_fallback_ptr->{$virtual->{"protocol"}}; } # Else create an anonymous fallback my %anon_fallback = %{$global_fallback_ptr->{$virtual->{"protocol"}}}; $anon_fallback{"port"} = $virtual->{"port"}; return \%anon_fallback; } # fallback_command # Execute the fallback command with the given status if it wasn't executed # with this status already for the supplied virtual service. sub do_fallback_command { my ($v, $status) = (@_); if (defined $v->{fallbackcommand_status} and $v->{fallbackcommand_status} eq $status) { return; } $v->{fallbackcommand_status} = $status; if (defined($v->{fallbackcommand})) { &system_wrapper($v->{fallbackcommand} . " " . $status); } elsif (defined($FALLBACKCOMMAND)) { &system_wrapper($FALLBACKCOMMAND . " " . $status); } } # Used during stop, start and reload to remove stale real servers from LVS sub purge_untracked_service { my ($v, $rservice, $tag) = (@_); my $log_arg = "Purged real server ($tag): $rservice (" . &get_virtual($v) . ")"; &system_wrapper("$IPVSADM -d $v->{proto} " . &get_virtual_option($v) . " -r $rservice"); &ld_log($log_arg); &ld_emailalert_send($log_arg, $v, $rservice, 0); } # Used during stop, start and reload to remove stale real servers from LVS sub purge_service { my ($v, $r, $tag) = (@_); purge_untracked_service($v, "$r->{server}:$r->{port}", $tag); _status_down($v, $r); } # Used during stop, start and reload to remove stale virtual services from LVS sub purge_virtual { my ($v, $tag) = (@_); &system_wrapper("$IPVSADM -D $v->{proto} " . &get_virtual_option($v)); &ld_log("Purged virtual server ($tag): " . &get_virtual($v)); } sub check_cfgfile { my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime) = stat($CONFIG); my ($status); return if ($stattime==$mtime); $stattime = $mtime; use Digest::MD5 qw(md5 md5_hex); my $ctx = Digest::MD5->new; unless (open(CFGFILE, "<$CONFIG")) { &config_warn(0, "can not open file $CONFIG for checking"); return 0; } $ctx->addfile(*CFGFILE); close(CFGFILE); my $digest = $ctx->hexdigest; if (defined $checksum && $checksum ne $digest) { &ld_log("Configuration file '$CONFIG' has changed on disk"); if ($AUTOCHECK eq "yes") { &ld_log(" - reread new configuration"); &reread_config(); } else { &ld_log(" - ignore new configuration\n"); } if (defined($CALLBACK) and -x $CALLBACK) { &system_wrapper("$CALLBACK $CONFIG"); } $status = 1; } $checksum = $digest; return $status; } # ld_openlog # Open logger # make log rotation work # pre: none # post: If logger is a file, it opened and closed again as a test # If logger is syslog, it is opened so it can be used without # needing to be opened again. # Otherwise, nothing is done. # return: 0 on success # 1 on error sub ld_openlog { if ($opt_d or $SUPERVISED eq "yes") { # Instantly do nothing return(0); } if( $LDIRLOG =~ /^\/(.*)/ ) { # Open and close the file as a test. # We open the file each time we want to log to it unless (open(LOGFILE, ">>$LDIRLOG") and close(LOGFILE)) { return 1; } } else { # Assume LDIRLOG is a logfacility, log to syslog setlogsock( "unix" ); openlog( "ldirectord", "pid", "$LDIRLOG" ); } return(0); } # ld_log # Log a message. # pre: message: Message to write # post: message and timetsamp is written to loged # If logger is a file, it is opened and closed again as a # primitive means to make log rotation work # return: 0 on success # 1 on error sub ld_log { my ($message) = (@_); my $now = localtime(); &ld_debug(2, $message); chomp $message; if ($opt_d) { print STDERR "$message\n"; } elsif ($SUPERVISED eq "yes") { print "[$now] $message\n"; } elsif ( $LDIRLOG =~ /^\/(.*)/ ) { unless (open(LOGFILE, ">>$LDIRLOG") and print LOGFILE "[$now|$CFGNAME|$$] $message\n" and close(LOGFILE)) { print STDERR "$message\n"; return 1; } } else { # Assume LDIRLOG is a logfacility, log to syslog syslog( "info", "$message" ); } return(0); } sub daemon_status_str { if ($DAEMON_STATUS == $DAEMON_STATUS_STARTING) { return "starting"; } elsif ($DAEMON_STATUS == $DAEMON_STATUS_RUNNING) { return "running"; } elsif ($DAEMON_STATUS == $DAEMON_STATUS_STOPPING) { return "stopping"; } elsif ($DAEMON_STATUS == $DAEMON_STATUS_RELOADING) { return "reloading"; } return "UNKNOWN"; } # ld_emailalert_send # Send email alerts per virtual server # pre: message: Message to email # post: message is emailed if emailalert defined for virtualserver # return: 0 on success # 1 on error sub ld_emailalert_send { my ($subject, $v, $rserver, $currenttime) = (@_); my $status = 0; my $to_addr; my $frequency; my $virtual_str; my $id; my $statusfilter; my $smtp_server; $frequency = defined $v->{emailalertfreq} ? $v->{emailalertfreq} : $EMAILALERTFREQ; $virtual_str = &get_virtual($v); $id = "$rserver ($virtual_str)"; if ($currenttime == 0 or $frequency == 0) { delete $EMAILSTATUS{"$id"}; } else { $EMAILSTATUS{$id}->{v} = $v; $EMAILSTATUS{$id}->{alerttime} = $currenttime; } $statusfilter = defined $v->{emailalertstatus} ? $v->{emailalertstatus} : $EMAILALERTSTATUS; if (($DAEMON_STATUS & $statusfilter) == 0) { return 0; } $to_addr = defined $v->{emailalert} ? $v->{emailalert} : $EMAILALERT; if ($to_addr eq "") { return 0; } $smtp_server = defined $v->{smtp} ? $v->{smtp} : $SMTP; &ld_log("emailalert: $subject"); if (defined $smtp_server) { $status = &ld_emailalert_net_smtp($smtp_server, $to_addr, $subject); } else { $status = &ld_emailalert_mail_send($to_addr, $subject); } return($status); } # ld_emailalert_net_smtp # Send email alerts via SMTP server # pre: smtp: SMTP server defined # post: message is emailed if SMTP server is valid and working # return: 0 on success # 1 on error sub ld_emailalert_net_smtp { my ($smtp_server, $to_addr, $subject) = (@_); my $status = 0; use Net::SMTP; use Sys::Hostname; my $hostname = hostname; my $smtp = Net::SMTP->new($smtp_server); if ($smtp) { $smtp->mail("$ENV{USER}\@$hostname"); $smtp->to($to_addr); $smtp->data(); if($EMAILALERTFROM) { $smtp->datasend("From: $EMAILALERTFROM\n"); } else { $smtp->datasend("From: $ENV{USER}\@$hostname\n"); } $smtp->datasend("To: $to_addr\n"); $smtp->datasend("Subject: $subject\n\n"); $smtp->datasend("ldirectord host: $hostname\n" . "Log-Message: $subject\n" . "Daemon-Status: " . &daemon_status_str() . "\n"); $smtp->dataend(); $smtp->quit; } else { &ld_log("failed to send SMTP email message\n"); $status = 1; } return($status); } # ld_emailalert_mail_send # Send email alerts via Mail::Send # pre: smtp: SMTP server not defined # post: message is emailed if one of the Mail::Send methods works # return: 0 on success # 1 on error sub ld_emailalert_mail_send { my ($to_addr, $subject) = (@_); my $emailmsg; my $emailfh; my $status = 0; use Mail::Send; $emailmsg = new Mail::Send Subject=>$subject, To=>$to_addr; $emailmsg->set('From', $EMAILALERTFROM) if ($EMAILALERTFROM); $emailfh = $emailmsg->open; print $emailfh "ldirectord host: " . hostname() . "\n" . "Log-Message: $subject\n" . "Daemon-Status: " . &daemon_status_str() . "\n"; unless ($emailfh->close) { &ld_log("failed to send email message\n"); $status = 1; } return($status); } # ld_emailalert_resend # Resend email alerts as necessary # pre: none # post: EMAILSTATUS array is updated and alerts are sent as necessary # return: none sub ld_emailalert_resend { my $currenttime = time(); my $es; my $id; my $rserver; my $frequency; foreach $id (keys %EMAILSTATUS) { $es = $EMAILSTATUS{$id}; $frequency = defined $es->{v}->{emailalertfreq} ? $es->{v}->{emailalertfreq} : $EMAILALERTFREQ; $id =~ m/(.*) /; $rserver = $1; if ($currenttime - $es->{alerttime} < $frequency) { next; } &ld_emailalert_send("Inaccessible real server: $id", $es->{v}, $rserver, $currenttime); } } # ld_debug # Log a message to a STDOUT. # pre: priority: priority of message # message: Message to write # post: message is written to STDOUT if $DEBUG >= priority # return: none sub ld_debug { my ($priority, $message) = (@_); if ( $DEBUG >= $priority ) { chomp $message; print STDERR "DEBUG${priority}: $message\n"; } } # system_wrapper # Wrapper around system() to log errors # # WARNING: Do not use alarm() together with this function. A internal # pipe will not be reclaimed (at least with Perl 5.8.8). This can # cause ldirectord to run out of file handles. # # pre: LIST: arguments to pass to system() # post: system() is called and if it returns non-zero a failure # message is logged # return: return value of system() sub system_wrapper { my (@args)=(@_); my $status; &ld_log("Running system(@args)") if $DEBUG>2; $status = system(@args); if($status != 0) { &ld_log("system(@args) failed: $!"); } return($status) } # system_timeout # Emulate system() with timeout via fork(), exec(), and waitpid() and # TERMinate the child on timeout. Set an alarm() for the timeout. # # This function does not suffer the deficiencies of system_wrapper() # of leaving pipes unreclaimed. Zombies are reaped by ld_handler_chld # and the related code. # # pre: timeout: timeout in seconds # LIST: arguments to pass to exec() # return: >= 0 exit status of the child process # 127 exec failed # -1 timeout # -2 fork failed sub system_timeout { my $timeout = shift; my (@args) = (@_); my $status; &ld_log("Running system_timeout($timeout, @args)") if $DEBUG>2; my $childpid = fork(); if (!defined($childpid)) { &ld_log("fork failed: $!"); return(-2); } elsif ($childpid) { # parent eval { local $SIG{'ALRM'} = sub { die "timeout\n"; }; alarm $timeout; waitpid($childpid, 0); $status = $? >> 8; # When die()-ing in the SIGALRM handler we # will never reach this point. Child/Zombie # is left behind. The grim reaper # (ld_handler_chld + ld_process_chld) will # take care of the zombie. }; alarm 0; if ($@) { # timeout if ($@ ne "timeout\n") { # log unexpected errors &ld_log("system_timeout($timeout, @args) " . "unexpected error: $@"); } else { &ld_log("system_timeout($timeout, @args) " . "timed out, kill -TERM child"); } # TERMinate child kill 15, $childpid; return(-1); } else { # did not timeout return($status); } } else { # child exec(@args) or &ld_exit(127, "exec(@args) failed: $!"); die "ld_exit() broken?, stopped"; } } # exec_wrapper # Wrapper around exec() to log errors # pre: LIST: arguments to pass to exec() # post: exec() is called and if it returns non-zero a failure # message is logged # return: return value of exec() on failure # does not return on success sub exec_wrapper { my (@args)=(@_); my $status; &ld_log("Running exec(@args)") if $DEBUG>2; $status = exec(@args) or &ld_log("exec(@args) failed"); return($status) } # ld_rm_file # Remove a file, symink, or anything that isn't a directory # and exists # pre: filename: file to delete # post: If filename does not exist or is a directory an # error state is reached # Else filename is delete # If $DEBUG >=2 errors are logged # return: 0 on success # -1 on error sub ld_rm_file { my ($filename)=(@_); my ($status); if(-d "$filename"){ &ld_debug(2, "ld_rm_file: $filename is a directory, skipping"); return(-1); } if(! -e "$filename"){ &ld_debug(2, "ld_rm_file: $filename doesn't exist, skipping"); return(-1); } $status = unlink($filename); if($status!=1){ &ld_debug(2, "ld_rm_file: Error deleting: $filename: $!"); } return(($status==1)?0:-1) } # is_octet # See if a number is an octet, that is >=0 and <=255 # pre: alleged_octet: the octet to test # post: alleged_octet is checked to see if it is valid # return: 1 if the alleged_octet is an octet # 0 otherwise sub is_octet { my ($alleged_octet)=(@_); if($alleged_octet<0){ return 0; } if($alleged_octet>255){ return 0; } return(1); } # is_ip # Check that a given string is an IP address # pre: alleged_ip: string representing ip address # post: alleged_ip is checked to see if it is valid # return: 1 if alleged_ip is a valid ip address # 0 otherwise sub is_ip { my ($alleged_ip)=(@_); if ($alleged_ip =~ /:/) { unless(inet_pton(AF_INET6,$alleged_ip)){ return 0; } return(1); } #If we don't have four, . delimited numbers then we have no hope unless($alleged_ip=~m/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) { return 0; } #Each octet mist be >=0 and <=255 unless(&is_octet($1)){ return 0; } unless(&is_octet($2)){ return 0; } unless(&is_octet($3)){ return 0; } unless(&is_octet($4)){ return 0; } return(1); } # ip_to_int # Turn an IP address given as a dotted quad into an integer # pre: ip_address: string representing IP address # post: post ip_address is converted to an integer # return: -1 if an error occurs # integer representation of IP address otherwise sub ip_to_int { my ($ip_address)=(@_); unless(&is_ip($ip_address)){ return(-1); } unless($ip_address=~m/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/){ return(-1); } return(((((($1<<8)+$2)<<8)+$3)<<8)+$4); } # int_to_ip # Turn an IP address given as a dotted quad into an integer # pre: ip_address: string representing IP address # post: Decimal is converted to a dotted quad # return: -1 if an error occurs # integer representation of IP address otherwise sub int_to_ip { my ($ip_address)=(@_); my $result = ""; return(sprintf( "%d.%d.%d.%d", ($ip_address>>24)&255, ($ip_address>>16)&255, ($ip_address>>8)&255, $ip_address&255 )); } # get_virtual # Get the service for a virtual # pre: nv: virtual to get the service for # post: none # return: fwmark of service if it is a fwm service # ip_address:port otherwise sub get_virtual { my ($nv) = (@_); if ($nv->{"protocol"} eq "fwm"){ return $nv->{"fwm"}; } else { return $nv->{"server"} . ":" . $nv->{"port"}; } } # get_virtual_option # Get the ipvsadm option corresponding to a virtual service # pre: nv: virtual to get the service for # post: none # return: fwmark of service if it is a fwm service # fwmark of service + "-6" if it is a fwm service and the address family is AF_INET6 # ip_address:port otherwise sub get_virtual_option { my ($nv) = (@_); my ($cmdline) = &get_virtual($nv); if ($nv->{"protocol"} eq "fwm" && $nv->{addressfamily} == AF_INET6) { $cmdline .= " -6"; } return $cmdline; } # get_real_id_str # Get an id string for a real server # pre: r: Real service. # protocol: protocol of the real service # tcp or udp # service: type of service # post: none # return: Id string for the real server sub get_real_id_str { my ($r, $v) = (@_); my $request = ""; my $receive = ""; my $checkport = ""; my $virtualhost = ""; my $check; my $real; if(defined($r->{"request"})) { $request = $r->{"request"}; } else { $request = $v->{"request"}; } if(defined($r->{"receive"})) { $receive = $r->{"receive"}; } else { $receive = $v->{"receive"}; } if($v->{"checktype"} eq "negotiate" or $v->{"checktype"} eq "combined") { $check = $v->{"checktype"} . ":" . $v->{"service"}; } elsif($v->{"checktype"} eq "external" or $v->{"checktype"} eq "external-perl") { $check = $v->{"checktype"} . ":" . $v->{"checkcommand"}; } else { $check = $v->{"checktype"}; } if(defined($v->{"checkport"})) { $checkport = $v->{"checkport"}; } if(defined($v->{"virtualhost"})) { $virtualhost = $v->{"virtualhost"}; } $real = $check . ":" . $v->{"protocol"} . ":" . $r->{"server"} . ":" . $r->{"port"} . ":" . $virtualhost . ":" . $checkport . ":" . $r->{"weight"} . ":" . $r->{"forward"} . ":" . quotemeta($request) . ":" . quotemeta($receive); } # get_virtual_id_str # Get an id string for a virtual service # pre: v: Virtual service # post: none # return: Id string for the virtual service sub get_virtual_id_str { my ($v) = (@_); if ($v->{"protocol"} eq "fwm") { - return $v->{"protocol"} . ($v->{addressfamily} == AF_INET6?"6":"") . ":" . &get_virtual($v); + return $v->{"protocol"} . (($v->{addressfamily} == AF_INET6)?"6":"") . ":" . &get_virtual($v); } else { return $v->{"protocol"} . ":" . &get_virtual($v); } } # get_forward_flag # Get the ipvsadm flag corresponding to a forwarding mechanism # pre: forward: Name of forwarding mechanism. u # Should be one of ipip, masq or gate # post: none # return: ipvsadm flag corresponding to the forwarding mechanism # " " if $forward is unknown sub get_forward_flag { my ($forward) = (@_); unless(defined($forward)) { return(" "); } if ($forward eq "masq") { return("-m"); } elsif ($forward eq "gate") { return("-g"); } elsif ($forward eq "ipip") { return("-i"); } return(" "); } # ld_exit # Exit and log a message # pre: exit_status: Integer exit status to exit with # 0 will be used if parameter is omitted # message: Message to log when exiting. May be omitted # post: If exit_status is non-zero or $DEBUG>2 then # message logged. # Programme exits with exit_status # return: does not return sub ld_exit { my ($exit_status, $message)=(@_); unless(defined($exit_status)) { $exit_status=0; } unless(defined($message)) { $message=""; } if ($exit_status!=0 or $DEBUG>2) { &ld_log("Exiting with exit_status $exit_status: $message"); } exit($exit_status); } # ld_open_socket # Open a socket connection # pre: remote: IP address as a dotted quad of remote host to connect to # port: port to connect to # protocol: Protocol to use. Should be either "tcp" or "udp" # post: A Socket connection is opened to the remote host # return: Open socket # undef on error sub ld_open_socket { my ($remote, $port, $protocol) = @_; my ($iaddr, $paddr, $pro, $result, $pf); local *SOCK; $remote = &ld_strip_brackets($remote); if (inet_pton(AF_INET6,$remote)) { $iaddr = inet_pton(AF_INET6,$remote); $paddr = pack_sockaddr_in6($port, $iaddr); $pf = PF_INET6; } else { $iaddr = inet_aton($remote) || die "no host: $remote"; $paddr = sockaddr_in($port, $iaddr); $pf = PF_INET; } $pro = getprotobyname($protocol); if ($protocol eq "udp") { socket(SOCK, $pf, SOCK_DGRAM, $pro) || die "socket: $!"; } else { socket(SOCK, $pf, SOCK_STREAM, $pro) || die "socket: $!"; } $result = connect(SOCK, $paddr); unless ($result) { return undef; } return *SOCK; } # daemon # Close and fork to become a daemon. # # Notes from unix programmer faq # http://www.landfield.com/faqs/unix-faq/programmer/faq/ # # Almost none of this is necessary (or advisable) if your daemon is being # started by `inetd'. In that case, stdin, stdout and stderr are all set up # for you to refer to the network connection, and the `fork()'s and session # manipulation should *not* be done (to avoid confusing `inetd'). Only the # `chdir()' step remains useful. # # Gratuitously over documented, because it can be # # Written by Horms, horms@verge.net.au for an unrelated project while # working for Zip World, http://www.zipworld.com.au/, 1997-1999. sub ld_daemon { # `fork()' so the parent can exit, this returns control to the command # line or shell invoking your program. This step is required so that # the new process is guaranteed not to be a process group leader. The # next step, `setsid()', fails if you're a process group leader. &ld_daemon_become_child(); # setsid()' to become a process group and session group leader. Since a # controlling terminal is associated with a session, and this new # session has not yet acquired a controlling terminal our process now # has no controlling terminal, which is a Good Thing for daemons. if(POSIX::setsid()<0){ &ld_exit(1, "ld_daemon: Could not setsid"); } # fork()' again so the parent, (the session group leader), can exit. # This means that we, as a non-session group leader, can never regain a # controlling terminal. &ld_daemon_become_child(); # `chdir("/")' to ensure that our process doesn't keep any directory in # use. Failure to do this could make it so that an administrator # couldn't unmount a filesystem, because it was our current directory. if(chdir("/")<0){ &ld_exit(1, "ld_daemon: Could not chdir"); } # `close()' fds 0, 1, and 2. This releases the standard in, out, and # error we inherited from our parent process. We have no way of knowing # where these fds might have been redirected to. Note that many daemons # use `sysconf()' to determine the limit `_SC_OPEN_MAX'. `_SC_OPEN_MAX' # tells you the maximum open files/process. Then in a loop, the daemon # can close all possible file descriptors. You have to decide if you # need to do this or not. If you think that there might be # file-descriptors open you should close them, since there's a limit on # number of concurrent file descriptors. close(STDIN); close(STDOUT); close(STDERR); # Establish new open descriptors for stdin, stdout and stderr. Even if # you don't plan to use them, it is still a good idea to have them open. # The precise handling of these is a matter of taste; if you have a # logfile, for example, you might wish to open it as stdout or stderr, # and open `/dev/null' as stdin; alternatively, you could open # `/dev/console' as stderr and/or stdout, and `/dev/null' as stdin, or # any other combination that makes sense for your particular daemon. # # This code used to open /dev/console for STDOUT and STDERR, # but that was changed to /dev/null to stop the code hanging in # the case where /dev/console is unavailable for some reason # http://www.osdl.org/developer_bugzilla/show_bug.cgi?id=1180 if(open(STDIN, ">/dev/null")<0){ &ld_exit(-1, "ld_daemon: Could not open /dev/null"); } if(open(STDERR, ">>/dev/null")<0){ &ld_exit(-1, "ld_daemon: Could not open /dev/null"); } } # ld_daemon_become_child # Fork, kill parent and return child process # pre: none # post: process forks and parent exits # All process exit with exit status -1 if an error occurs # return: parent: exits # child: none (this is the process that returns) # Written by Horms, horms@verge.net.au for an unrelated project while # working for Zip World, http://www.zipworld.com.au/, 1997-1999. sub ld_daemon_become_child { my($status); $status = fork(); if ($status<0){ &ld_exit(-1, "ld_daemon_become_child: Could not fork: $!"); } if ($status>0){ &ld_exit(0, "ld_daemon_become_child: Parent exiting as it should"); } } # ld_gethostbyname # Wrapper to gethostbyname. Look up the/an IP address of a hostname # If an IP address is given is it returned # pre: name: Hostname of IP address to lookup # af: Address Family: AF_INET etc.. # post: gethostbyname is called to find an IP address for $name # This is converted to a string # return: IP address # undef on error sub ld_gethostbyname { my ($name, $af)=(@_); if ($name =~ /\[(.*)\]/) { $name = $1; } my @host = getaddrinfo($name, 0, $af); if (!defined($host[3])) { return undef; } my @ret = getnameinfo($host[3], NI_NUMERICHOST | NI_NUMERICSERV); if ($host[0] == AF_INET6) { return "[$ret[0]]"; } else { return $ret[0]; } } # ld_gethostbyaddr # Wrapper to gethostbyaddr. Look up the hostname from an IP address. # If no reverse DNS record is found, return undef # pre: ip: IP address of host to lookup # post: gethostbyaddr is called to find a hostname for IP $ip # return: hostname # undef on error sub ld_gethostbyaddr { my ($ip)=(@_); $ip = &ld_strip_brackets($ip); my @host = getaddrinfo($ip,0); if (!defined($host[3])) { return undef; } my @ret = getnameinfo($host[3], NI_NAMEREQD); return undef unless(scalar(@ret) == 2); return $ret[0]; } # ld_getservbyname # Wrapper for getservbyname. Look up the port for a service name # If a port is given it is returned. # pre: name: Port or Service name to look up # post: if $name is a number # if 0<=$name<=65536 $name is returned # else undef is returned # else getservbyname is called to look up the port for the service # return: Port # undef on error sub ld_getservbyname { my ($name, $protocol)=(@_); if($name=~/^[0-9]+$/){ return(($name>=0 and $name<65536)?$name:undef); } my @serv=getservbyname($name, $protocol); return((@serv and defined($serv[2]))?$serv[2]:undef); } # ld_getservhostbyname # Wrapper for ld_gethostbyname and ld_getservbyname. Given a server of the # form ip_address|hostname[:port|servicename] return ip_address[:port] # pre: hostserv: Servver of the form ip_address|hostname[:port|servicename] # protocol: Protocol for service. Should be either "tcp" or "udp" # af: Address Family: AF_INET etc.. # post: lookups performed as per ld_getservbyname and ld_gethostbyname # return: ip_address[:port] # undef on error sub ld_gethostservbyname{ my ($hostserv, $protocol, $af) = (@_); my $ip; my $port; if ($hostserv =~ /(:(\d+|[A-Za-z0-9-_]+))?$/) { $port = $2; $ip = $hostserv; $ip =~ s/(:(\d+|[A-Za-z0-9-_]+))?$//; } else { $ip = $hostserv; } $ip=&ld_gethostbyname($ip, $af) or return(undef); if(defined($port)){ $port=&ld_getservbyname($port, $protocol); if (defined($port)) { return("$ip:$port"); } else { return(undef); } } return($ip); } # ld_find_cmd_path # Find executable in path # pre: cmd: command to find # path: ':' delimited paths to check # relative: if set, allow cmd to be a relative path, # which is checked first # return: path to command # undef if not found sub ld_find_cmd_path { my ($cmd, $path, $relative) = (@_); if (defined $relative and $relative and -f "$cmd" ) { return $cmd; } if ($cmd =~ /^\// and -x "$cmd" ) { return $cmd; } if ($cmd =~ /\//) { return undef; } for my $p (split /:/, $path) { if ( -x "$p/$cmd" ) { return "$p/$cmd"; } } return undef; } # ld_find_cmd_path # Find executable in $ENV{'PATH'} # pre: cmd: command to find # relative: if set, allow cmd to be a relative path, # which is checked first # return: path to command # undef if not found sub ld_find_cmd { return ld_find_cmd_path($_[0], $ENV{'PATH'}, $_[1]); } # ld_get_addrport # Get address string and port number from a given socket. # pre: socket # return: (address, port) # undef if cannot get sub ld_get_addrport { my($sock) = @_; my ($s_addr_str, $s_port, $s_addr, $len); my $s_sockaddr = getsockname($sock); $len = length($s_sockaddr); if ($len == 28) { # IPv6 ($s_port, $s_addr) = unpack_sockaddr_in6($s_sockaddr); $s_addr_str = inet_ntop(AF_INET6, $s_addr); $s_addr_str = "[$s_addr_str]"; } elsif ($len == 16) { # IPv4 ($s_port, $s_addr) = unpack_sockaddr_in($s_sockaddr); $s_addr_str = inet_ntop(AF_INET, $s_addr); } else { die "unexpected length of sockaddr\n"; } return ($s_addr_str, $s_port); } # ld_strip_brackets # Strip brackets in the string # pre: string # return: string sub ld_strip_brackets { my($str) = @_; $str =~ s/[\[\]]//g; return $str; } diff --git a/resource-agents.spec.in b/resource-agents.spec.in index b78d30868..15c6aeb58 100644 --- a/resource-agents.spec.in +++ b/resource-agents.spec.in @@ -1,305 +1,303 @@ # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed # upon. The license for this file, and modifications and additions to the # file, is the same license as for the pristine package itself (unless the # license for the pristine package is not an Open Source License, in which # case the license is the MIT License). An "Open Source License" is a # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. # %global rcver @rcver@ %global alphatag @alphatag@ %global numcomm @numcomm@ %global dirty @dirty@ # # Since this spec file supports multiple distributions, ensure we # use the correct group for each. # # SSLeay (required by ldirectord) %if 0%{?suse_version} %global SSLeay perl-Net_SSLeay %else %global SSLeay perl-Net-SSLeay %endif # determine the ras-set to process based on configure invokation %bcond_@rgmanager@ rgmanager %bcond_@linux-ha@ linuxha Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: @version@ Release: @specver@%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPLv2+ and LGPLv2+ URL: http://to.be.defined.com/ %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} Group: System Environment/Base %else Group: Productivity/Clustering/HA %endif Source0: %{name}-%{version}%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}.tar.bz2 Obsoletes: heartbeat-resources <= %{version} Provides: heartbeat-resources = %{version} ## Setup/build bits BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) # Build dependencies BuildRequires: automake autoconf pkgconfig BuildRequires: perl python-devel BuildRequires: libxslt glib2-devel BuildRequires: which %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} BuildRequires: cluster-glue-libs-devel BuildRequires: docbook-style-xsl docbook-dtds %if 0%{?rhel} == 0 BuildRequires: libnet-devel %endif %endif %if 0%{?suse_version} %if 0%{?suse_version} >= 1140 BuildRequires: libnet1 %else BuildRequires: libnet %endif BuildRequires: libglue-devel BuildRequires: libxslt docbook_4 docbook-xsl-stylesheets %endif ## Runtime deps ## These apply to rgmanager agents only to guarantee agents ## are functional %if %{with rgmanager} # system tools shared by several agents Requires: /bin/bash /bin/grep /bin/sed /bin/gawk Requires: /bin/ps /usr/bin/pkill /bin/hostname Requires: /sbin/fuser Requires: /sbin/findfs /bin/mount # fs.sh Requires: /sbin/quotaon /sbin/quotacheck Requires: /sbin/fsck Requires: /sbin/fsck.ext2 /sbin/fsck.ext3 /sbin/fsck.ext4 Requires: /sbin/fsck.xfs # ip.sh Requires: /sbin/ip /usr/sbin/ethtool Requires: /sbin/rdisc /usr/sbin/arping /bin/ping /bin/ping6 # lvm.sh Requires: /sbin/lvm # netfs.sh Requires: /sbin/mount.nfs /sbin/mount.nfs4 /sbin/mount.cifs Requires: /usr/sbin/rpc.nfsd /sbin/rpc.statd /usr/sbin/rpc.mountd %endif %description A set of scripts to interface with several services to operate in a High Availability environment for both Pacemaker and rgmanager service managers. %if %{with linuxha} %package -n ldirectord License: GPLv2+ Summary: A Monitoring Daemon for Maintaining High Availability Resources %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} Group: System Environment/Daemons %else Group: Productivity/Clustering/HA %endif Obsoletes: heartbeat-ldirectord <= %{version} Provides: heartbeat-ldirectord = %{version} Requires: %{SSLeay} perl-libwww-perl perl-MailTools Requires: ipvsadm logrotate %if 0%{?fedora_version} Requires: perl-Net-IMAP-Simple-SSL Requires(post): /sbin/chkconfig Requires(preun):/sbin/chkconfig %endif %description -n ldirectord The Linux Director Daemon (ldirectord) was written by Jacob Rief. ldirectord is a stand alone daemon for monitoring the services on real servers. Currently, HTTP, HTTPS, and FTP services are supported. lditrecord is simple to install and works with the heartbeat code (http://www.linux-ha.org/). See 'ldirectord -h' and linux-ha/doc/ldirectord for more information. %endif %prep %if 0%{?suse_version} == 0 && 0%{?fedora} == 0 && 0%{?centos_version} == 0 && 0%{?rhel} == 0 %{error:Unable to determine the distribution/version. This is generally caused by missing /etc/rpm/macros.dist. Please install the correct build packages or define the required macros manually.} exit 1 %endif %setup -q -n %{name}-%{version}%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}} %build if [ ! -f configure ]; then ./autogen.sh fi %if 0%{?fedora} >= 11 || 0%{?centos_version} > 5 || 0%{?rhel} > 5 CFLAGS="$(echo '%{optflags}')" %global conf_opt_fatal "--enable-fatal-warnings=no" %else CFLAGS="${CFLAGS} ${RPM_OPT_FLAGS}" %global conf_opt_fatal "--enable-fatal-warnings=yes" %endif %if %{with rgmanager} %global rasset rgmanager %endif %if %{with linuxha} %global rasset linux-ha %endif %if %{with rgmanager} && %{with linuxha} %global rasset all %endif export CFLAGS %configure \ %{?conf_opt_rsctmpdir:%conf_opt_rsctmpdir} \ %{conf_opt_fatal} \ --with-pkg-name=%{name} \ --with-ras-set=%{rasset} %if %{defined jobs} JFLAGS="$(echo '-j%{jobs}')" %else JFLAGS="$(echo '%{_smp_mflags}')" %endif make $JFLAGS %install rm -rf %{buildroot} make install DESTDIR=%{buildroot} ## tree fixup # remove docs (there is only one and they should come from doc sections in files) rm -rf %{buildroot}/usr/share/doc/resource-agents %if %{with linuxha} %if 0%{?suse_version} test -d %{buildroot}/sbin || mkdir %{buildroot}/sbin ( cd %{buildroot}/sbin ln -sf /%{_sysconfdir}/init.d/ldirectord rcldirectord ) || true %endif %endif %clean rm -rf %{buildroot} %files %defattr(-,root,root) %doc AUTHORS COPYING COPYING.GPLv3 ChangeLog %if %{with linuxha} %doc doc/README.webapps %doc %{_datadir}/%{name}/ra-api-1.dtd %endif %if %{with rgmanager} %{_datadir}/cluster %{_sbindir}/rhev-check.sh %endif %if %{with linuxha} %dir /usr/lib/ocf %dir /usr/lib/ocf/resource.d %dir /usr/lib/ocf/lib /usr/lib/ocf/lib/heartbeat /usr/lib/ocf/resource.d/heartbeat %if %{with rgmanager} /usr/lib/ocf/resource.d/redhat %endif %dir %{_datadir}/%{name} %dir %{_datadir}/%{name}/ocft %{_datadir}/%{name}/ocft/configs %{_datadir}/%{name}/ocft/caselib %{_datadir}/%{name}/ocft/README %{_datadir}/%{name}/ocft/README.zh_CN %{_sbindir}/ocf-tester %{_sbindir}/ocft %{_sbindir}/sfex_init %{_sbindir}/sfex_stat %{_includedir}/heartbeat %dir %attr (1755, root, root) %{_var}/run/resource-agents %{_mandir}/man7/*.7* %{_mandir}/man8/ocf-tester.8* %{_mandir}/man8/sfex_init.8* # For compatability with pre-existing agents %dir %{_sysconfdir}/ha.d %{_sysconfdir}/ha.d/shellfuncs %{_libdir}/heartbeat -%if %{with rgmanager} %post -n resource-agents +if [ $1 = 2 ]; then + if [ -d %{_var}/run/heartbeat/rsctmp ]; then + cp -fpr %{_var}/run/heartbeat/rsctmp/* %{_var}/run/resource-agents/ 1>/dev/null 2>&1 + rm -fr %{_var}/run/heartbeat/rsctmp + fi +fi +%if %{with rgmanager} ccs_update_schema > /dev/null 2>&1 ||: %endif %if 0%{?suse_version} %preun -n ldirectord %stop_on_removal ldirectord %postun -n ldirectord %insserv_cleanup %endif %if 0%{?fedora} %preun -n ldirectord /sbin/chkconfig --del ldirectord %postun -n ldirectord -p /sbin/ldconfig %post -n ldirectord /sbin/chkconfig --add ldirectord %endif %files -n ldirectord %defattr(-,root,root) %{_sbindir}/ldirectord %doc ldirectord/ldirectord.cf COPYING %{_mandir}/man8/ldirectord.8* %config(noreplace) %{_sysconfdir}/logrotate.d/ldirectord %dir %{_sysconfdir}/ha.d %dir %{_sysconfdir}/ha.d/resource.d %{_sysconfdir}/ha.d/resource.d/ldirectord %{_sysconfdir}/init.d/ldirectord %if 0%{?suse_version} /sbin/rcldirectord %endif %if 0%{?fedora} /usr/lib/ocf/resource.d/heartbeat/ldirectord %endif %endif -%post -if [ $1 = 2 ]; then -if [ -d %{_var}/run/heartbeat/rsctmp ]; then -cp -fpr %{_var}/run/heartbeat/rsctmp/* %{_var}/run/resource-agents/ -rm -fr %{_var}/run/heartbeat/rsctmp -fi -fi - %changelog * @date@ Autotools generated version - @version@-@specver@-@numcomm@.@alphatag@.@dirty@ - Autotools generated version diff --git a/rgmanager/src/resources/apache.sh b/rgmanager/src/resources/apache.sh index dd58d63cd..a42878054 100755 --- a/rgmanager/src/resources/apache.sh +++ b/rgmanager/src/resources/apache.sh @@ -1,286 +1,290 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # export LC_ALL=C export LANG=C export PATH=/bin:/sbin:/usr/bin:/usr/sbin . $(dirname $0)/ocf-shellfuncs . $(dirname $0)/utils/config-utils.sh . $(dirname $0)/utils/messages.sh . $(dirname $0)/utils/ra-skelet.sh if [ -x /usr/sbin/httpd ]; then declare APACHE_HTTPD=/usr/sbin/httpd elif [ -x /usr/sbin/apache2 ]; then declare APACHE_HTTPD=/usr/sbin/apache2 fi declare APACHE_serverConfigFile declare APACHE_pid_file="`generate_name_for_pid_file`" declare APACHE_conf_dir="`generate_name_for_conf_dir`" declare APACHE_genConfig="$APACHE_conf_dir/httpd.conf" declare APACHE_parseConfig=$(dirname $0)/utils/httpd-parse-config.pl apache_serverConfigFile() { if $(echo $OCF_RESKEY_config_file | grep -q "^/"); then APACHE_serverConfigFile="$OCF_RESKEY_config_file" else APACHE_serverConfigFile="$OCF_RESKEY_server_root/$OCF_RESKEY_config_file" fi return; } verify_all() { clog_service_verify $CLOG_INIT if [ -z "$OCF_RESKEY_name" ]; then clog_service_verify $CLOG_FAILED "Invalid Name Of Service" return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_service_name" ]; then clog_service_verify $CLOG_FAILED_NOT_CHILD return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_server_root" ]; then clog_service_verify $CLOG_FAILED "Invalid ServerRoot" return $OCF_ERR_ARGS fi if [ ! -d "$OCF_RESKEY_server_root" ]; then clog_service_verify $CLOG_FAILED "ServerRoot Directory Is Missing" return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_config_file" ]; then clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" return $OCF_ERR_ARGS fi if [ ! -r "$APACHE_serverConfigFile" ]; then clog_check_file_exist $CLOG_FAILED_NOT_READABLE "$APACHE_serverConfigFile" return $OCF_ERR_ARGS fi if [ -z "$APACHE_pid_file" ]; then clog_service_verify $CLOG_FAILED "Invalid name of PID file" return $OCF_ERR_ARGS fi clog_check_syntax $CLOG_INIT "$APACHE_serverConfigFile" "$APACHE_HTTPD" -t \ -D"$OCF_RESKEY_name" \ -d "$OCF_RESKEY_server_root" \ -f "$APACHE_serverConfigFile" \ $OCF_RESKEY_httpd_options &> /dev/null if [ $? -ne 0 ]; then clog_check_syntax $CLOG_FAILED "$APACHE_serverConfigFile" return $OCF_ERR_GENERIC fi clog_check_syntax $CLOG_SUCCEED "$APACHE_serverConfigFile" return 0 } generate_configFile() { declare originalConfigFile=$1; declare generatedConfigFile=$2; declare ip_addresses=$3; if [ -f "$generatedConfigFile" ]; then sha1_verify "$generatedConfigFile" if [ $? -ne 0 ]; then clog_check_sha1 $CLOG_FAILED return 0 fi fi clog_generate_config $CLOG_INIT "$originalConfigFile" "$generatedConfigFile" generate_configTemplate "$generatedConfigFile" "$1" cat >> "$generatedConfigFile" << EOT # From a cluster perspective, the key fields are: # Listen - must be set to service floating IP address. # ServerRoot - path to the ServerRoot (initial value is set in service conf) # EOT IFS_old="$IFS" IFS=$'\n' for i in `"$APACHE_parseConfig" -D"$OCF_RESKEY_name" < "$originalConfigFile" | grep -P '(^Listen)|(^Port)' `; do port=`echo $i | sed 's/^Listen \(.*\)/\1/;s/^Port \(.*\)/\1/'`; testcond=`echo $port|grep :` if [ $testcond ]; then port=`echo $port|awk -F : '{print $2}'` fi IFS=$' '; - for z in $ip_addresses; do - echo "Listen $z:$port" >> "$generatedConfigFile"; + for z in $ip_addresses; do + if [ "${z//:/}" != "$z" ]; then + echo "Listen [$z]:$port" >> "$generatedConfigFile"; + else + echo "Listen $z:$port" >> "$generatedConfigFile"; + fi done IFS=$'\n'; done; IFS="$IFS_old" echo "PidFile \"$APACHE_pid_file\"" >> "$generatedConfigFile"; echo >> "$generatedConfigFile" cat "$originalConfigFile" | sed 's/^Listen/### Listen/;s/^Port/### Port/;s/^PidFile/### PidFile/' | \ "$APACHE_parseConfig" -D"$OCF_RESKEY_name" >> "$generatedConfigFile" sha1_addToFile "$generatedConfigFile" clog_generate_config $CLOG_SUCCEED "$originalConfigFile" "$generatedConfigFile" } start() { if status; then ocf_log info "Starting Service $OCF_RESOURCE_INSTANCE > Already running" return $OCF_SUCCESS fi declare ip_addresses clog_service_start $CLOG_INIT create_pid_directory create_conf_directory "$APACHE_conf_dir" check_pid_file "$APACHE_pid_file" if [ $? -ne 0 ]; then clog_check_pid $CLOG_FAILED "$APACHE_pid_file" clog_service_start $CLOG_FAILED return $OCF_ERR_GENERIC fi clog_looking_for $CLOG_INIT "IP Addresses" get_service_ip_keys "$OCF_RESKEY_service_name" ip_addresses=`build_ip_list` if [ -z "$ip_addresses" ]; then clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" return $OCF_ERR_GENERIC fi clog_looking_for $CLOG_SUCCEED "IP Addresses" generate_configFile "$APACHE_serverConfigFile" "$APACHE_genConfig" "$ip_addresses" "$APACHE_HTTPD" \ "-D$OCF_RESKEY_name" \ -d "$OCF_RESKEY_server_root" \ -f "$APACHE_genConfig" \ $OCF_RESKEY_httpd_options \ -k start if [ $? -ne 0 ]; then clog_service_start $CLOG_FAILED return $OCF_ERR_GENERIC else clog_service_start $CLOG_SUCCEED fi return 0; } stop() { clog_service_stop $CLOG_INIT stop_generic "$APACHE_pid_file" "$OCF_RESKEY_shutdown_wait" if [ $? -ne 0 ]; then clog_service_stop $CLOG_FAILED return $OCF_ERR_GENERIC fi clog_service_stop $CLOG_SUCCEED return 0; } status() { clog_service_status $CLOG_INIT status_check_pid "$APACHE_pid_file" case $? in $OCF_NOT_RUNNING) clog_service_status $CLOG_FAILED "$APACHE_pid_file" return $OCF_NOT_RUNNING ;; 0) clog_service_status $CLOG_SUCCEED exit 0 ;; *) clog_service_status $CLOG_FAILED "$APACHE_pid_file" return $OCF_ERR_GENERIC ;; esac } if [ "$1" != "meta-data" ]; then apache_serverConfigFile fi; case $1 in meta-data) cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` exit 0 ;; validate-all|verify-all) verify_all exit $? ;; start) verify_all && start exit $? ;; stop) verify_all && stop exit $? ;; status|monitor) verify_all status exit $? ;; restart) verify_all stop start exit $? ;; *) echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/rgmanager/src/resources/lvm.sh b/rgmanager/src/resources/lvm.sh index 111563fc7..f18321fd5 100644 --- a/rgmanager/src/resources/lvm.sh +++ b/rgmanager/src/resources/lvm.sh @@ -1,152 +1,175 @@ #!/bin/bash # # LVM Failover Script. # NOTE: Changes to /etc/lvm/lvm.conf are required for proper operation. # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # LC_ALL=C LANG=C PATH=/bin:/sbin:/usr/bin:/usr/sbin export LC_ALL LANG PATH . $(dirname $0)/ocf-shellfuncs . $(dirname $0)/utils/member_util.sh . $(dirname $0)/lvm_by_lv.sh . $(dirname $0)/lvm_by_vg.sh rv=0 ################################################################################ # ha_lvm_proper_setup_check # ################################################################################ function ha_lvm_proper_setup_check { + ## + # Does the Volume Group exist? + # 1) User may have forgotten to create it + # 2) User may have misspelled it in the config file + ## + if ! vgs $OCF_RESKEY_vg_name --config 'global{locking_type=0}'>& /dev/null; then + ocf_log err "HA LVM: Unable to get volume group attributes for $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + ## + # Are we using the "tagging" or "CLVM" variant? + # The CLVM variant will have the cluster attribute set + ## + if [[ $(vgs -o attr --noheadings --config 'global{locking_type=0}' $OCF_RESKEY_vg_name 2>/dev/null) =~ .....c ]]; then + # Is clvmd running? + if ! ps -C clvmd >& /dev/null; then + ocf_log err "HA LVM: $OCF_RESKEY_vg_name has the cluster attribute set, but 'clvmd' is not running" + return $OCF_ERR_GENERIC + fi + return $OCF_SUCCESS + fi + + ## + # The "tagging" variant is being used if we have gotten this far. + ## + ## # The default for lvm.conf:activation/volume_list is empty, # this must be changed for HA LVM. ## if ! lvm dumpconfig activation/volume_list >& /dev/null; then ocf_log err "HA LVM: Improper setup detected" ocf_log err "* \"volume_list\" not specified in lvm.conf." return $OCF_ERR_GENERIC fi ## # Machine's cluster node name must be present as # a tag in lvm.conf:activation/volume_list ## if ! lvm dumpconfig activation/volume_list | grep $(local_node_name); then ocf_log err "HA LVM: Improper setup detected" ocf_log err "* @$(local_node_name) missing from \"volume_list\" in lvm.conf" return $OCF_ERR_GENERIC fi ## # The volume group to be failed over must NOT be in # lvm.conf:activation/volume_list; otherwise, machines # will be able to activate the VG regardless of the tags ## if lvm dumpconfig activation/volume_list | grep "\"$OCF_RESKEY_vg_name\""; then ocf_log err "HA LVM: Improper setup detected" ocf_log err "* $OCF_RESKEY_vg_name found in \"volume_list\" in lvm.conf" return $OCF_ERR_GENERIC fi ## # Next, we need to ensure that their initrd has been updated # If not, the machine could boot and activate the VG outside # the control of rgmanager ## # Fixme: we might be able to perform a better check... if [ "$(find /boot -name *.img -newer /etc/lvm/lvm.conf)" == "" ]; then ocf_log err "HA LVM: Improper setup detected" ocf_log err "* initrd image needs to be newer than lvm.conf" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ################################################################################ # MAIN ################################################################################ case $1 in start) - if ! [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then - ha_lvm_proper_setup_check || exit 1 - fi + ha_lvm_proper_setup_check || exit 1 if [ -z $OCF_RESKEY_lv_name ]; then vg_start || exit 1 else lv_start || exit 1 fi ;; status|monitor) ocf_log notice "Getting status" if [ -z $OCF_RESKEY_lv_name ]; then vg_status || exit 1 else lv_status || exit 1 fi ;; stop) - if ! [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then - if ! ha_lvm_proper_setup_check; then - ocf_log err "WARNING: An improper setup can cause data corruption!" - fi + if ! ha_lvm_proper_setup_check; then + ocf_log err "WARNING: An improper setup can cause data corruption!" fi if [ -z $OCF_RESKEY_lv_name ]; then vg_stop || exit 1 else lv_stop || exit 1 fi ;; recover|restart) $0 stop || exit $OCF_ERR_GENERIC $0 start || exit $OCF_ERR_GENERIC ;; meta-data) cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` ;; validate-all|verify-all) if [ -z $OCF_RESKEY_lv_name ]; then vg_verify || exit 1 else lv_verify || exit 1 fi ;; *) echo "usage: $0 {start|status|monitor|stop|restart|meta-data|validate-all}" exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $rv diff --git a/rgmanager/src/resources/lvm_by_lv.sh b/rgmanager/src/resources/lvm_by_lv.sh index 49a954279..2cabd13c2 100644 --- a/rgmanager/src/resources/lvm_by_lv.sh +++ b/rgmanager/src/resources/lvm_by_lv.sh @@ -1,429 +1,441 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # lv_verify # # Verify the parameters passed in # lv_verify() { # Anything to verify? Perhaps the names? return $OCF_SUCCESS } # lv_exec_resilient # # Sometimes, devices can come back. Their metadata will conflict # with the good devices that remain. This function filters out those # failed devices when executing the given command # # Finishing with vgscan resets the cache/filter lv_exec_resilient() { declare command=$1 declare all_pvs ocf_log notice "Making resilient : $command" if [ -z $command ]; then ocf_log err "lv_exec_resilient: Arguments not supplied" return $OCF_ERR_ARGS fi # pvs will print out only those devices that are valid # If a device dies and comes back, it will not appear # in pvs output (but you will get a Warning). all_pvs=(`pvs --noheadings -o pv_name | grep -v Warning`) # Now we use those valid devices in a filter which we set up. # The device will then be activated because there are no # metadata conflicts. command=$command" --config devices{filter=[" for i in ${all_pvs[*]}; do command=$command'"a|'$i'|",' done command=$command"\"r|.*|\"]}" ocf_log notice "Resilient command: $command" if ! $command ; then ocf_log err "lv_exec_resilient failed" vgscan return $OCF_ERR_GENERIC else vgscan return $OCF_SUCCESS fi } # lv_activate_resilient # # Sometimes, devices can come back. Their metadata will conflict # with the good devices that remain. We must filter out those # failed devices when trying to reactivate lv_activate_resilient() { declare action=$1 declare lv_path=$2 declare op="-ay" if [ -z $action ] || [ -z $lv_path ]; then ocf_log err "lv_activate_resilient: Arguments not supplied" return $OCF_ERR_ARGS fi if [ $action != "start" ]; then op="-an" fi if ! lv_exec_resilient "lvchange $op $lv_path" ; then ocf_log err "lv_activate_resilient $action failed on $lv_path" return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi } lv_status_clustered() { # # Check if device is active # if [[ ! $(lvs -o attr --noheadings $lv_path) =~ ....a. ]]; then return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # lv_status # # Is the LV active? lv_status_single() { declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" declare dev="/dev/$lv_path" declare realdev declare owner declare my_name # # Check if device is active # if [[ ! $(lvs -o attr --noheadings $lv_path) =~ ....a. ]]; then return $OCF_ERR_GENERIC fi if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." return $OCF_SUCCESS fi # # Check if all links/device nodes are present # if [ -h "$dev" ]; then realdev=$(readlink -f $dev) if [ $? -ne 0 ]; then ocf_log err "Failed to follow link, $dev" return $OCF_ERR_ARGS fi if [ ! -b $realdev ]; then ocf_log err "Device node for $lv_path is not present" return $OCF_ERR_GENERIC fi else ocf_log err "Symbolic link for $lv_path is not present" return $OCF_ERR_GENERIC fi # # Verify that we are the correct owner # owner=`lvs -o tags --noheadings $lv_path` my_name=$(local_node_name) if [ -z $my_name ]; then ocf_log err "Unable to determine local machine name" # FIXME: I don't really want to fail on 1st offense return $OCF_SUCCESS fi if [ -z $owner ] || [ $my_name != $owner ]; then ocf_log err "WARNING: $lv_path should not be active" ocf_log err "WARNING: $my_name does not own $lv_path" ocf_log err "WARNING: Attempting shutdown of $lv_path" lv_activate_resilient "stop" $lv_path return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } function lv_status { # We pass in the VG name to see of the logical volume is clustered if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then lv_status_clustered else lv_status_single fi } # lv_activate_and_tag lv_activate_and_tag() { declare action=$1 declare tag=$2 declare lv_path=$3 typeset self_fence="" case ${OCF_RESKEY_self_fence} in "yes") self_fence=1 ;; 1) self_fence=1 ;; *) self_fence="" ;; esac if [ -z $action ] || [ -z $tag ] || [ -z $lv_path ]; then ocf_log err "Supplied args: 1) $action, 2) $tag, 3) $lv_path" return $OCF_ERR_ARGS fi if [ $action == "start" ]; then ocf_log notice "Activating $lv_path" lvchange --addtag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Unable to add tag to $lv_path" return $OCF_ERR_GENERIC fi if ! lv_activate_resilient $action $lv_path; then ocf_log err "Unable to activate $lv_path" return $OCF_ERR_GENERIC fi else ocf_log notice "Deactivating $lv_path" if ! lv_activate_resilient $action $lv_path; then if [ "$self_fence" ]; then ocf_log err "Unable to deactivate $lv_path: REBOOTING" sync reboot -fn else ocf_log err "Unable to deactivate $lv_path" fi return $OCF_ERR_GENERIC fi ocf_log notice "Removing ownership tag ($tag) from $lv_path" lvchange --deltag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Unable to delete tag from $lv_path" - return $OCF_ERR_GENERIC + + # Newer versions of LVM require the missing PVs to + # be removed from the VG via a separate call before + # the tag can be removed. + ocf_log err "Attempting volume group clean-up and retry" + vgreduce --removemissing --force $OCF_RESKEY_vg_name + + # Retry tag deletion + lvchange --deltag $tag $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Failed to delete tag from $lv_path" + return $OCF_ERR_GENERIC + fi fi if [ `lvs --noheadings -o lv_tags $lv_path` == $tag ]; then ocf_log notice "Removing ownership tag ($tag) from $lv_path" lvchange --deltag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Unable to delete tag from $lv_path" return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } # lv_activate # $1: start/stop only # # Basically, if we want to [de]activate an LVM volume, # we must own it. That means that our tag must be on it. # This requires a change to /etc/lvm/lvm.conf: # volume_list = [ "root_volume", "@my_hostname" ] # where "root_volume" is your root volume group and # "my_hostname" is $(local_node_name) # # If there is a node failure, we may wish to "steal" the # LV. For that, we need to check if the node that owns # it is still part of the cluster. We use the tag to # determine who owns the volume then query for their # liveness. If they are dead, we can steal. lv_activate() { declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" declare owner=`lvs -o tags --noheadings $lv_path` declare my_name=$(local_node_name) if [ -z $my_name ]; then ocf_log err "Unable to determine cluster node name" return $OCF_ERR_GENERIC fi # # FIXME: This code block is repeated below... might be # nice to put it in a function # if [ ! -z $owner ] && [ $owner != $my_name ]; then if is_node_member_clustat $owner ; then ocf_log err "$owner owns $lv_path unable to $1" return $OCF_ERR_GENERIC fi ocf_log notice "Owner of $lv_path is not in the cluster" ocf_log notice "Stealing $lv_path" lvchange --deltag $owner $lv_path if [ $? -ne 0 ]; then ocf_log err "Failed to steal $lv_path from $owner" return $OCF_ERR_GENERIC fi # Warning --deltag doesn't always result in failure if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then ocf_log err "Failed to steal $lv_path from $owner." return $OCF_ERR_GENERIC fi fi if ! lv_activate_and_tag $1 $my_name $lv_path; then ocf_log err "Failed to $1 $lv_path" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if vgreduce --removemissing --force --config \ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ $OCF_RESKEY_vg_name; then ocf_log notice "$OCF_RESKEY_vg_name now consistent" owner=`lvs -o tags --noheadings $lv_path` if [ ! -z $owner ] && [ $owner != $my_name ]; then if is_node_member_clustat $owner ; then ocf_log err "$owner owns $lv_path unable to $1" return $OCF_ERR_GENERIC fi ocf_log notice "Owner of $lv_path is not in the cluster" ocf_log notice "Stealing $lv_path" lvchange --deltag $owner $lv_path if [ $? -ne 0 ]; then ocf_log err "Failed to steal $lv_path from $owner" return $OCF_ERR_GENERIC fi # Warning --deltag doesn't always result in failure if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then ocf_log err "Failed to steal $lv_path from $owner." return $OCF_ERR_GENERIC fi fi if ! lv_activate_and_tag $1 $my_name $lv_path; then ocf_log err "Failed second attempt to $1 $lv_path" return $OCF_ERR_GENERIC else ocf_log notice "Second attempt to $1 $lv_path successful" return $OCF_SUCCESS fi else ocf_log err "Failed to $1 $lv_path" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } function lv_start_clustered { if ! lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then ocf_log err "Failed to activate logical volume, $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" if ! lvconvert --repair --use-policies $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then ocf_log err "Failed to cleanup $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" return $OCF_ERR_GENERIC fi if ! lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" return $OCF_ERR_GENERIC fi ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name successful" return $OCF_SUCCESS fi return $OCF_SUCCESS } function lv_start_single { if ! lvs $OCF_RESKEY_vg_name >& /dev/null; then lv_count=0 else lv_count=`lvs --noheadings -o name $OCF_RESKEY_vg_name | grep -v _mlog | grep -v _mimage | grep -v nconsistent | wc -l` fi if [ $lv_count -gt 1 ]; then ocf_log err "HA LVM requires Only one logical volume per volume group." ocf_log err "There are currently $lv_count logical volumes in $OCF_RESKEY_vg_name" ocf_log err "Failing HA LVM start of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" exit $OCF_ERR_GENERIC fi if ! lv_activate start; then return 1 fi return 0 } function lv_start { # We pass in the VG name to see of the logical volume is clustered if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then lv_start_clustered else lv_start_single fi } function lv_stop_clustered { lvchange -aln $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name } function lv_stop_single { if ! lv_activate stop; then return 1 fi return 0 } function lv_stop { # We pass in the VG name to see of the logical volume is clustered if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then lv_stop_clustered else lv_stop_single fi } diff --git a/rgmanager/src/resources/lvm_by_vg.sh b/rgmanager/src/resources/lvm_by_vg.sh index 72433a3d3..317b1f381 100644 --- a/rgmanager/src/resources/lvm_by_vg.sh +++ b/rgmanager/src/resources/lvm_by_vg.sh @@ -1,419 +1,438 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # vg_owner # # Returns: # 1 == We are the owner # 2 == We can claim it # 0 == Owned by someone else function vg_owner { local owner=`vgs -o tags --noheadings $OCF_RESKEY_vg_name` local my_name=$(local_node_name) if [ -z $my_name ]; then ocf_log err "Unable to determine cluster node name" return 0 fi if [ -z $owner ]; then # No-one owns this VG yet, so we can claim it return 2 fi if [ $owner != $my_name ]; then if is_node_member_clustat $owner ; then return 0 fi return 2 fi return 1 } -function strip_tags +function _strip_tags { local i for i in `vgs --noheadings -o tags $OCF_RESKEY_vg_name | sed s/","/" "/g`; do ocf_log info "Stripping tag, $i" vgchange --deltag $i $OCF_RESKEY_vg_name done if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_vg_name` ]; then ocf_log err "Failed to remove ownership tags from $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } +function strip_tags +{ + if ! _strip_tags; then + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" + + if ! vgreduce --removemissing --force --config \ + "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ + $OCF_RESKEY_vg_name; then + + ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" + return $OCF_ERR_GENERIC + fi + + ocf_log notice "Cleanup of $OCF_RESKEY_vg_name successful" + fi + if ! _strip_tags; then + ocf_log err "Failed 2nd attempt to remove tags from, $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + function strip_and_add_tag { if ! strip_tags; then ocf_log err "Failed to remove tags from volume group, $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi vgchange --addtag $(local_node_name) $OCF_RESKEY_vg_name if [ $? -ne 0 ]; then ocf_log err "Failed to add ownership tag to $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ocf_log info "New tag \"$(local_node_name)\" added to $OCF_RESKEY_vg_name" return $OCF_SUCCESS } function vg_status_clustered { return $OCF_SUCCESS } # vg_status # # Are all the LVs active? function vg_status_single { local i local dev local my_name=$(local_node_name) # # Check that all LVs are active # for i in `lvs $OCF_RESKEY_vg_name --noheadings -o attr`; do if [[ ! $i =~ ....a. ]]; then return $OCF_ERR_GENERIC fi done # # Check if all links/device nodes are present # for i in `lvs $OCF_RESKEY_vg_name --noheadings -o name`; do dev="/dev/$OCF_RESKEY_vg_name/$i" if [ -h $dev ]; then realdev=$(readlink -f $dev) if [ $? -ne 0 ]; then ocf_log err "Failed to follow link, $dev" return $OCF_ERR_GENERIC fi if [ ! -b $realdev ]; then ocf_log err "Device node for $dev is not present" return $OCF_ERR_GENERIC fi else ocf_log err "Symbolic link for $lv_path is not present" return $OCF_ERR_GENERIC fi done # # Verify that we are the correct owner # vg_owner if [ $? -ne 1 ]; then ocf_log err "WARNING: $OCF_RESKEY_vg_name should not be active" ocf_log err "WARNING: $my_name does not own $OCF_RESKEY_vg_name" ocf_log err "WARNING: Attempting shutdown of $OCF_RESKEY_vg_name" # FIXME: may need more force to shut this down vgchange -an $OCF_RESKEY_vg_name return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ## # Main status function for volume groups ## function vg_status { if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then vg_status_clustered else vg_status_single fi } function vg_verify { # Anything to verify? return $OCF_SUCCESS } function vg_start_clustered { local a local results local all_pvs local resilience ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" if ! vgchange -aey $OCF_RESKEY_vg_name; then ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if ! vgreduce --removemissing --force $OCF_RESKEY_vg_name; then ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" return $OCF_ERR_GENERIC fi if ! vgchange -aey $OCF_RESKEY_vg_name; then ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful" return $OCF_SUCCESS else # The activation commands succeeded, but did they do anything? # Make sure all the logical volumes are active results=(`lvs -o name,attr --noheadings 2> /dev/null $OCF_RESKEY_vg_name`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then all_pvs=(`pvs --noheadings -o name 2> /dev/null`) resilience=" --config devices{filter=[" for i in ${all_pvs[*]}; do resilience=$resilience'"a|'$i'|",' done resilience=$resilience"\"r|.*|\"]}" vgchange -aey $OCF_RESKEY_vg_name $resilience break fi a=$(($a + 2)) done # We need to check the LVs again if we made the command resilient if [ ! -z $resilience ]; then results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then ocf_log err "Failed to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi a=$(($a + 2)) done ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" fi fi return $OCF_SUCCESS } function vg_start_single { local a local results local all_pvs local resilience ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" vg_owner case $? in 0) ocf_log info "Someone else owns this volume group" return $OCF_ERR_GENERIC ;; 1) ocf_log info "I own this volume group" ;; 2) ocf_log info "I can claim this volume group" ;; esac if ! strip_and_add_tag || ! vgchange -ay $OCF_RESKEY_vg_name; then ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if ! vgreduce --removemissing --force --config \ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ $OCF_RESKEY_vg_name; then ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" return $OCF_ERR_GENERIC fi - vg_owner - if [ $? -eq 0 ]; then - ocf_log err "Unable to claim ownership of $OCF_RESKEY_vg_name" - return $OCF_ERR_GENERIC - fi + ocf_log notice "Cleanup of $OCF_RESKEY_vg_name successful" if ! strip_and_add_tag || ! vgchange -ay $OCF_RESKEY_vg_name; then ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful" return $OCF_SUCCESS else # The activation commands succeeded, but did they do anything? # Make sure all the logical volumes are active results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then all_pvs=(`pvs --noheadings -o name 2> /dev/null`) resilience=" --config devices{filter=[" for i in ${all_pvs[*]}; do resilience=$resilience'"a|'$i'|",' done resilience=$resilience"\"r|.*|\"]}" vgchange -ay $OCF_RESKEY_vg_name $resilience break fi a=$(($a + 2)) done # We need to check the LVs again if we made the command resilient if [ ! -z $resilience ]; then results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then ocf_log err "Failed to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi a=$(($a + 2)) done ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" fi fi return $OCF_SUCCESS } ## # Main start function for volume groups ## function vg_start { if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then vg_start_clustered else vg_start_single fi } function vg_stop_clustered { local a local results typeset self_fence="" case ${OCF_RESKEY_self_fence} in "yes") self_fence=1 ;; 1) self_fence=1 ;; *) self_fence="" ;; esac # Shut down the volume group # Do we need to make this resilient? vgchange -aln $OCF_RESKEY_vg_name # Make sure all the logical volumes are inactive results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then if [ "$self_fence" ]; then ocf_log err "Unable to deactivate $lv_path REBOOT" sync reboot -fn else ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" fi return $OCF_ERR_GENERIC fi a=$(($a + 2)) done return $OCF_SUCCESS } function vg_stop_single { local a local results typeset self_fence="" case ${OCF_RESKEY_self_fence} in "yes") self_fence=1 ;; 1) self_fence=1 ;; *) self_fence="" ;; esac # Shut down the volume group # Do we need to make this resilient? vgchange -an $OCF_RESKEY_vg_name # Make sure all the logical volumes are inactive results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then if [ "$self_fence" ]; then ocf_log err "Unable to deactivate $lv_path REBOOT" sync reboot -fn else ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" fi return $OCF_ERR_GENERIC fi a=$(($a + 2)) done # Make sure we are the owner before we strip the tags vg_owner if [ $? -ne 0 ]; then strip_tags fi return $OCF_SUCCESS } ## # Main stop function for volume groups ## function vg_stop { if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then vg_stop_clustered else vg_stop_single fi } diff --git a/rgmanager/src/resources/utils/Makefile.am b/rgmanager/src/resources/utils/Makefile.am index 1cd320de9..48b0b6acf 100644 --- a/rgmanager/src/resources/utils/Makefile.am +++ b/rgmanager/src/resources/utils/Makefile.am @@ -1,50 +1,37 @@ # # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in -TARGET = config-utils.sh - commonscripts = fs-lib.sh \ httpd-parse-config.pl \ member_util.sh \ messages.sh \ named-parse-config.pl \ ra-skelet.sh \ - tomcat-parse-config.pl + tomcat-parse-config.pl \ + config-utils.sh EXTRA_DIST = $(commonscripts) \ - $(TARGET).in \ rhev-check.sh sbin_SCRIPTS = rhev-check.sh rasutilsdir = ${CLUSTERDATA}/utils -rasutils_SCRIPTS = $(commonscripts) \ - $(TARGET) - - -$(TARGET): $(TARGET).in - cat $^ | sed \ - -e 's#@''CONFDIR@#${CONFDIR}#g' \ - -e 's#@''CONFFILE@#${CONFFILE}#g' \ - > $@ - -clean-local: - rm -f $(TARGET) +rasutils_SCRIPTS = $(commonscripts) diff --git a/rgmanager/src/resources/utils/config-utils.sh.in b/rgmanager/src/resources/utils/config-utils.sh similarity index 99% rename from rgmanager/src/resources/utils/config-utils.sh.in rename to rgmanager/src/resources/utils/config-utils.sh index 299537a64..55e60a645 100644 --- a/rgmanager/src/resources/utils/config-utils.sh.in +++ b/rgmanager/src/resources/utils/config-utils.sh @@ -1,283 +1,283 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # declare RA_COMMON_pid_dir=/var/run/cluster -declare RA_COMMON_conf_dir=@CONFDIR@ +declare RA_COMMON_conf_dir=/etc/cluster declare -i FAIL=255 declare -a ip_keys generate_configTemplate() { cat > "$1" << EOT # # "$1" was created from the "$2" # # This template configuration was automatically generated, and will be # automatically regenerated if removed. Once this file has been altered, # automatic re-generation will stop. Remember to copy this file to all # other cluster members after making changes, or your service will not # operate correctly. # EOT } generate_configTemplateXML() { cat > "$1" << EOT EOT } sha1_addToFile() { declare sha1line="# rgmanager-sha1 $(sha1sum "$1")" echo $sha1line >> "$1" } sha1_addToFileXML() { declare sha1line="" echo $sha1line >> "$1" } sha1_verify() { declare sha1_new sha1_old declare oldFile=$1 ocf_log debug "Checking: SHA1 checksum of config file $oldFile" sha1_new=`cat "$oldFile" | grep -v "# rgmanager-sha1" | sha1sum | sed 's/^\([a-z0-9]\+\) .*$/\1/'` sha1_old=`tail -n 1 "$oldFile" | sed 's/^\( EOT } build_virsh_cmdline() { declare cmdline="" declare operation=$1 if [ -n "$OCF_RESKEY_hypervisor_uri" ]; then cmdline="$cmdline -c $OCF_RESKEY_hypervisor_uri" fi cmdline="$cmdline $operation $OCF_RESKEY_name" echo $cmdline } # this is only used on startup build_xm_cmdline() { declare operation=$1 # # Virtual domains should never restart themselves when # controlled externally; the external monitoring app # should. # declare cmdline="on_shutdown=\"destroy\" on_reboot=\"destroy\" on_crash=\"destroy\"" if [ -n "$OCF_RESKEY_path" ]; then operation="$operation --path=\"$OCF_RESKEY_path\"" fi if [ -n "$OCF_RESKEY_name" ]; then cmdline="$operation $OCF_RESKEY_name $cmdline" fi echo $cmdline } do_xm_start() { # Use /dev/null for the configuration file, if xmdefconfig # doesn't exist... # declare cmdline echo -n "Virtual machine $OCF_RESKEY_name is " do_status && return 0 cmdline="`build_xm_cmdline create`" ocf_log debug "xm $cmdline" eval xm $cmdline return $? } get_timeout() { declare -i default_timeout=60 declare -i tout=60 if [ -n "$OCF_RESKEY_RGMANAGER_meta_timeout" ]; then tout=$OCF_RESKEY_RGMANAGER_meta_timeout elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then tout=$OCF_RESKEY_CRM_meta_timeout fi if [ $tout -eq 0 ]; then echo $default_timeout return 0 fi if [ $tout -lt 0 ]; then echo $default_timeout return 0 fi echo $tout return 0 } # # Start a virtual machine given the parameters from # the environment. # do_virsh_start() { declare cmdline declare snapshotimage echo -n "Virtual machine $OCF_RESKEY_name is " do_status && return 0 snapshotimage="$OCF_RESKEY_snapshot/$OCF_RESKEY_name" if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then eval virsh restore $snapshotimage if [ $? -eq 0 ]; then rm -f $snapshotimage return 0 fi return 1 fi if [ -n "$OCF_RESKEY_xmlfile" -a -f "$OCF_RESKEY_xmlfile" ]; then # TODO: try to use build_virsh_cmdline for the hypervisor_uri cmdline="virsh create $OCF_RESKEY_xmlfile" else cmdline="virsh $(build_virsh_cmdline start)" fi ocf_log debug "$cmdline" $cmdline return $? } do_xm_stop() { declare -i timeout=60 declare -i ret=1 declare st for op in $*; do echo "CMD: xm $op $OCF_RESKEY_name" xm $op $OCF_RESKEY_name timeout=60 while [ $timeout -gt 0 ]; do sleep 5 ((timeout -= 5)) do_status&>/dev/null || return 0 while read dom state; do # # State is "stopped". Kill it. # if [ "$dom" != "$OCF_RESKEY_name" ]; then continue fi if [ "$state" != "---s-" ]; then continue fi xm destroy $OCF_RESKEY_name done < <(xm list | awk '{print $1, $5}') done done return 1 } # # Stop a VM. Try to shut it down. Wait a bit, and if it # doesn't shut down, destroy it. # do_virsh_stop() { declare -i timeout=$(get_timeout) declare -i ret=1 declare state state=$(do_status) [ $? -eq 0 ] || return 0 if [ -n "$OCF_RESKEY_snapshot" ]; then virsh save $OCF_RESKEY_name "$OCF_RESKEY_snapshot/$OCF_RESKEY_name" fi for op in $*; do echo virsh $op $OCF_RESKEY_name ... virsh $op $OCF_RESKEY_name timeout=$(get_timeout) while [ $timeout -gt 0 ]; do sleep 5 ((timeout -= 5)) state=$(do_status) [ $? -eq 0 ] || return 0 if [ "$state" = "paused" ]; then virsh destroy $OCF_RESKEY_name fi done done return 1 } do_start() { if [ "$OCF_RESKEY_use_virsh" = "1" ]; then do_virsh_start $* return $? fi do_xm_start $* return $? } do_stop() { declare domstate rv domstate=$(do_status) rv=$? ocf_log debug "Virtual machine $OCF_RESKEY_name is $domstate" if [ $rv -eq $OCF_APP_ERR_INDETERMINATE ]; then ocf_log crit "xend/libvirtd is dead; cannot stop $OCF_RESKEY_name" return 1 fi if [ "$OCF_RESKEY_use_virsh" = "1" ]; then do_virsh_stop $* return $? fi do_xm_stop $* return $? } # # Reconfigure a running VM. # reconfigure() { return 0 } xm_status() { service xend status &> /dev/null if [ $? -ne 0 ]; then # if xend died echo indeterminate return $OCF_APP_ERR_INDETERMINATE fi xm list $OCF_RESKEY_name &> /dev/null if [ $? -eq 0 ]; then echo "running" return 0 fi xm list migrating-$OCF_RESKEY_name &> /dev/null if [ $? -eq 0 ]; then echo "running" return 0 fi echo "not running" return $OCF_NOT_RUNNING } virsh_status() { declare state pid if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then service xend status &> /dev/null if [ $? -ne 0 ]; then echo indeterminate return $OCF_APP_ERR_INDETERMINATE fi fi # # libvirtd is required when using virsh even though # not specifically when also using Xen. This is because # libvirtd is required for migration. # pid=$(pidof libvirtd) if [ -z "$pid" ]; then echo indeterminate return $OCF_APP_ERR_INDETERMINATE fi state=$(virsh domstate $OCF_RESKEY_name) echo $state if [ "$state" = "running" ] || [ "$state" = "paused" ] || [ "$state" = "no state" ] || [ "$state" = "idle" ]; then return 0 fi if [ "$state" = "shut off" ]; then return $OCF_NOT_RUNNING fi return $OCF_ERR_GENERIC } # # Simple status check: Find the VM in the list of running # VMs # do_status() { if [ "$OCF_RESKEY_use_virsh" = "1" ]; then virsh_status return $? fi xm_status return $? } # # virsh "path" attribute support # check_config_file() { declare path=$1 if [ -f "$path/$OCF_RESKEY_name" ]; then echo $path/$OCF_RESKEY_name return 2 elif [ -f "$path/$OCF_RESKEY_name.xml" ]; then echo $path/$OCF_RESKEY_name.xml return 2 fi return 0 } parse_input() { declare delim=$1 declare input=$2 declare func=$3 declare inp declare value while [ -n "$input" ]; do value=${input/$delim*/} if [ -n "$value" ]; then eval $func $value if [ $? -eq 2 ]; then return 0 fi fi inp=${input/$value$delim/} if [ "$input" = "$inp" ]; then inp=${input/$value/} fi input=$inp done } search_config_path() { declare config_file=$(parse_input ":" "$OCF_RESKEY_path" check_config_file) if [ -n "$config_file" ]; then export OCF_RESKEY_xmlfile=$config_file return 0 fi return 1 } choose_management_tool() { declare -i is_xml # # Don't override user value for use_virsh if one is given # if [ -n "$OCF_RESKEY_use_virsh" ]; then return 0 fi which xmllint &> /dev/null if [ $? -ne 0 ]; then ocf_log warning "Could not find xmllint; assuming virsh mode" export OCF_RESKEY_use_virsh=1 unset OCF_RESKEY_path return 0 fi xmllint $OCF_RESKEY_xmlfile &> /dev/null is_xml=$? if [ $is_xml -eq 0 ]; then ocf_log debug "$OCF_RESKEY_xmlfile is XML; using virsh" export OCF_RESKEY_use_virsh=1 unset OCF_RESKEY_path else ocf_log debug "$OCF_RESKEY_xmlfile is not XML; using xm" export OCF_RESKEY_use_virsh=0 unset OCF_RESKEY_xmlfile fi return 0 } validate_all() { if [ "$(id -u)" != "0" ]; then ocf_log err "Cannot control VMs. as non-root user." return 1 fi # # If someone selects a hypervisor, honor it. # Otherwise, ask virsh what the hypervisor is. # if [ -z "$OCF_RESKEY_hypervisor" ] || [ "$OCF_RESKEY_hypervisor" = "auto" ]; then export OCF_RESKEY_hypervisor="`virsh version | grep \"Running hypervisor:\" | awk '{print $3}' | tr A-Z a-z`" if [ -z "$OCF_RESKEY_hypervisor" ]; then ocf_log err "Could not determine Hypervisor" return $OCF_ERR_ARGS fi echo Hypervisor: $OCF_RESKEY_hypervisor fi # # Xen hypervisor only for when use_virsh = 0. # if [ "$OCF_RESKEY_use_virsh" = "0" ]; then if [ "$OCF_RESKEY_hypervisor" != "xen" ]; then ocf_log err "Cannot use $OCF_RESKEY_hypervisor hypervisor without using virsh" return $OCF_ERR_ARGS fi if [ -n "$OCF_RESKEY_xmlfile" ]; then ocf_log err "Cannot use xmlfile if use_virsh is set to 0" return $OCF_ERR_ARGS fi else # # Virsh path support. # if [ -n "$OCF_RESKEY_path" ] && [ "$OCF_RESKEY_path" != "/etc/xen" ]; then if [ -n "$OCF_RESKEY_xmlfile" ]; then ocf_log warning "Using $OCF_RESKEY_xmlfile instead of searching $OCF_RESKEY_path" else search_config_path if [ $? -ne 0 ]; then ocf_log warning "Could not find $OCF_RESKEY_name or $OCF_RESKEY_name.xml in search path $OCF_RESKEY_path" unset OCF_RESKEY_xmlfile else ocf_log debug "Using $OCF_RESKEY_xmlfile" fi choose_management_tool fi else export OCF_RESKEY_use_virsh=1 fi fi if [ "$OCF_RESKEY_use_virsh" = "0" ]; then echo "Management tool: xm" which xm &> /dev/null if [ $? -ne 0 ]; then ocf_log err "Cannot find 'xm'; is it installed?" return $OCF_ERR_INSTALLED fi if [ "$OCF_RESKEY_hypervisor" != "xen" ]; then ocf_log err "Cannot use $OCF_RESKEY_hypervisor hypervisor without using virsh" return $OCF_ERR_ARGS fi else echo "Management tool: virsh" which virsh &> /dev/null if [ $? -ne 0 ]; then ocf_log err "Cannot find 'virsh'; is it installed?" return $OCF_ERR_INSTALLED fi fi # # Set the hypervisor URI # if [ -z "$OCF_RESKEY_hypervisor_uri" -o "$OCF_RESKEY_hypervisor_uri" = "auto" ] && [ "$OCF_RESKEY_use_virsh" = "1" ]; then # Virsh makes it easier to do this. Really. if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then OCF_RESKEY_hypervisor_uri="qemu:///system" fi # I just need to believe in it more. if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then OCF_RESKEY_hypervisor_uri="xen:///" fi echo Hypervisor URI: $OCF_RESKEY_hypervisor_uri fi # # Set the migration URI # if [ -z "$OCF_RESKEY_migration_uri" -o "$OCF_RESKEY_migration_uri" = "auto" ] && [ "$OCF_RESKEY_use_virsh" = "1" ]; then # Virsh makes it easier to do this. Really. if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then export OCF_RESKEY_migration_uri="qemu+ssh://%s/system" fi # I just need to believe in it more. if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then export OCF_RESKEY_migration_uri="xenmigr://%s/" fi [ -n "$OCF_RESKEY_migration_uri" ] && echo Migration URI format: $(printf $OCF_RESKEY_migration_uri target_host) fi if [ -z "$OCF_RESKEY_name" ]; then echo No domain name specified return $OCF_ERR_ARGS fi if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then export migrateuriopt="tcp:%s" fi #virsh list --all | awk '{print $2}' | grep -q "^$OCF_RESKEY_name\$" return $? } virsh_migrate() { declare target=$1 declare rv=1 # # Xen and qemu have different migration mechanisms # if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then cmd="virsh migrate $migrate_opt $OCF_RESKEY_name $OCF_RESKEY_hypervisor_uri $(printf $OCF_RESKEY_migration_uri $target)" ocf_log debug "$cmd" err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]}) rv=$? elif [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then - cmd="virsh migrate $migrate_opt $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target) $(printf $migrateuriopt $target)" + if [ -z "$tunnelled_opt" ]; then + cmd="virsh migrate $tunnelled_opt $migrate_opt $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target) $(printf $migrateuriopt $target)" + else + cmd="virsh migrate $tunnelled_opt $migrate_opt $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target)" + fi ocf_log debug "$cmd" err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]}) rv=$? fi if [ $rv -ne 0 ]; then ocf_log err "Migrate $OCF_RESKEY_name to $target failed:" ocf_log err "$err" if [ "$err" != "${err/does not exist/}" ]; then return $OCF_ERR_CONFIGURED fi if [ "$err" != "${err/Domain not found/}" ]; then return $OCF_ERR_CONFIGURED fi return $OCF_ERR_GENERIC fi return $rv } # # XM migrate # xm_migrate() { declare target=$1 declare errstr rv migrate_opt cmd rv=1 if [ "$OCF_RESKEY_migrate" = "live" ]; then migrate_opt="-l" fi # migrate() function sets target using migration_mapping; # no need to do it here anymore cmd="xm migrate $migrate_opt $OCF_RESKEY_name $target" ocf_log debug "$cmd" err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]}) rv=$? if [ $rv -ne 0 ]; then ocf_log err "Migrate $OCF_RESKEY_name to $target failed:" ocf_log err "$err" if [ "$err" != "${err/does not exist/}" ]; then return $OCF_NOT_RUNNING fi if [ "$err" != "${err/Connection refused/}" ]; then return $OCF_ERR_CONFIGURED fi return $OCF_ERR_GENERIC fi return $? } # # Virsh migrate # migrate() { declare target=$1 declare rv migrate_opt + declare tunnelled_opt if [ "$OCF_RESKEY_migrate" = "live" ]; then migrate_opt="--live" fi + case "$OCF_RESKEY_tunnelled" in + yes|true|1|YES|TRUE|on|ON) + tunnelled_opt="--tunnelled --p2p" + ;; + esac + # Patch from Marcelo Azevedo to migrate over private # LANs instead of public LANs if [ -n "$OCF_RESKEY_migration_mapping" ] ; then target=${OCF_RESKEY_migration_mapping#*$target:} target=${target%%,*} fi if [ "$OCF_RESKEY_use_virsh" = "1" ]; then virsh_migrate $target rv=$? else xm_migrate $target rv=$? fi return $rv } wait_start() { declare -i timeout_remaining=$(get_timeout) declare -i start_time declare -i end_time declare -i delta declare -i sleep_time if [ -z "$OCF_RESKEY_status_program" ]; then return 0 fi while [ $timeout_remaining -gt 0 ]; do start_time=$(date +%s) bash -c "$OCF_RESKEY_status_program" if [ $? -eq 0 ]; then return 0 fi end_time=$(date +%s) delta=$(((end_time - start_time))) sleep_time=$(((5 - delta))) ((timeout_remaining -= $delta)) if [ $sleep_time -gt 0 ]; then sleep $sleep_time ((timeout_remaining -= $sleep_time)) fi done ocf_log err "Start of $OCF_RESOURCE_INSTANCE has failed" ocf_log err "Timeout exceeded while waiting for \"$OCF_RESKEY_status_program\"" return 1 } # # # case $1 in start) validate_all || exit $OCF_ERR_ARGS do_start rv=$? if [ $rv -ne 0 ]; then exit $rv fi wait_start exit $? ;; stop) validate_all || exit $OCF_ERR_ARGS do_stop shutdown destroy exit $? ;; kill) validate_all || exit $OCF_ERR_ARGS do_stop destroy exit $? ;; recover|restart) exit 0 ;; status|monitor) validate_all || exit $OCF_ERR_ARGS echo -n "Virtual machine $OCF_RESKEY_name is " do_status rv=$? if [ $rv -ne 0 ]; then exit $rv fi [ -z "$OCF_RESKEY_status_program" ] && exit 0 [ -z "$OCF_CHECK_LEVEL" ] && exit 0 [ $OCF_CHECK_LEVEL -lt 10 ] && exit 0 bash -c "$OCF_RESKEY_status_program" &> /dev/null exit $? ;; migrate) validate_all || exit $OCF_ERR_ARGS migrate $2 # Send VM to this node rv=$? if [ $rv -eq $OCF_ERR_GENERIC ]; then # Catch-all: If migration failed with # an unhandled error, do a status check # to see if the VM is really dead. # # If the VM is still in good health, return # a value to rgmanager to indicate the # non-critical error # # OCF states that codes 150-199 are reserved # for application use, so we'll use 150 # do_status > /dev/null if [ $? -eq 0 ]; then rv=150 fi fi exit $rv ;; reload) exit 0 ;; reconfig) validate_all || exit $OCF_ERR_ARGS echo "$0 RECONFIGURING $OCF_RESKEY_memory" reconfigure exit $? ;; meta-data) meta_data exit 0 ;; validate-all) validate_all exit $? ;; *) echo "usage: $0 {start|stop|restart|status|reload|reconfig|meta-data|validate-all}" exit 1 ;; esac diff --git a/tools/findif.c b/tools/findif.c index 72452f74b..b911ae24e 100644 --- a/tools/findif.c +++ b/tools/findif.c @@ -1,831 +1,832 @@ /* * findif.c: Finds an interface which can route a given address * * It's really simple to write in C, but hard to write in the shell... * * This code is dependent on IPV4 addressing conventions... * Sorry. * * Copyright (C) 2000 Alan Robertson * Copyright (C) 2001 Matt Soffen * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * *********************************************************** * * All our arguments come through the environment as OCF * environment variables as below: * * OCF_RESKEY_ip * OCF_RESKEY_broadcast * OCF_RESKEY_nic * OCF_RESKEY_cidr_netmask * * If the CIDR netmask is omitted, we choose the netmask associated with * the route we selected. * * If the broadcast address was omitted, we assume the highest address * in the subnet. * * If the interface is omitted, we choose the interface associated with * the route we selected. * * * See http://www.doom.net/docs/netmask.html for a table explaining * CIDR address format and their relationship to life, the universe * and everything. * */ #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SOCKET_H #include #endif #ifdef HAVE_SYS_SOCKIO_H #include #endif #include #include #include #ifdef __linux__ #undef __OPTIMIZE__ /* * This gets rid of some silly -Wtraditional warnings on Linux * because the netinet header has some slightly funky constants * in it. */ #endif /* __linux__ */ #include #include #include #include #define DEBUG 0 #define EOS '\0' #define PROCROUTE "/proc/net/route" #define ROUTEPARM "-n get" #ifndef HAVE_STRNLEN /* Any system that don't provide strnlen() only has itself to blame */ #define strnlen(str, max) strlen(str) #endif /* * "route -n get iii.jjj.kkk.lll" can, on Solaris at least, * return the word "default" as the value from "mask" and "dest", * typically if the host is remote, reached over a default route. * We should probably treat such a mask as "0.0.0.0". * * Define "MASK_DEFAULT_TO_ZERO" to enable this interpretation. * * This is better for Solaris and is probably suitable (or irrelevant) * for others OSes also. But if it breaks another OS, then reduce the * "hash-if 1" below to exclude that OS. * (David Lee, Jan 2006) */ #if 1 # define MASK_DEFAULT_TO_ZERO #endif static int OutputInCIDR=0; /* * Different OSes offer different mechnisms to obtain this information. * Not all this can be determined at configure-time; need a run-time element. * * typedef ... SearchRoute ...: * For routines that interface on these mechanisms. * Return code: * <0: mechanism invalid, so try next mechanism * 0: mechanism worked: good answer * >0: mechanism worked: bad answer * On non-zero, errmsg may have been filled with an error message */ typedef int SearchRoute (char *address, struct in_addr *in , struct in_addr *addr_out, char *best_if, size_t best_iflen , unsigned long *best_netmask, char *errmsg , int errmsglen); static SearchRoute SearchUsingProcRoute; static SearchRoute SearchUsingRouteCmd; static SearchRoute *search_mechs[] = { &SearchUsingProcRoute, &SearchUsingRouteCmd, NULL }; void GetAddress (char **address, char **netmaskbits , char **bcast_arg, char **if_specified); void ValidateNetmaskBits (char *netmaskbits, unsigned long *netmask); int ValidateIFName (const char *ifname, struct ifreq *ifr); int netmask_bits (unsigned long netmask); char * get_first_loopback_netdev(char * ifname); int is_loopback_interface(char * ifname); char * get_ifname(char * buf, char * ifname); int ConvertQuadToInt(char *dest); static const char *cmdname = "findif"; #define OCF_SUCCESS 0 #define OCF_ERR_GENERIC 1 #define OCF_ERR_ARGS 2 #define OCF_ERR_UNIMPLEMENTED 3 #define OCF_ERR_PERM 4 #define OCF_ERR_INSTALLED 5 #define OCF_ERR_CONFIGURED 6 #define OCF_NOT_RUNNING 7 void usage(int ec); #define PATH_PROC_NET_DEV "/proc/net/dev" #define DELIM '/' #define BAD_BROADCAST (0L) #define MAXSTR 128 static int SearchUsingProcRoute (char *address, struct in_addr *in , struct in_addr *addr_out, char *best_if, size_t best_iflen , unsigned long *best_netmask , char *errmsg, int errmsglen) { unsigned long flags, refcnt, use, gw, mask; unsigned long dest; long metric = LONG_MAX; long best_metric = LONG_MAX; int rc = OCF_SUCCESS; char buf[2048]; char interface[MAXSTR]; FILE *routefd = NULL; if ((routefd = fopen(PROCROUTE, "r")) == NULL) { snprintf(errmsg, errmsglen , "Cannot open %s for reading" , PROCROUTE); rc = OCF_ERR_GENERIC; goto out; } /* Skip first (header) line */ if (fgets(buf, sizeof(buf), routefd) == NULL) { snprintf(errmsg, errmsglen , "Cannot skip first line from %s" , PROCROUTE); rc = OCF_ERR_GENERIC; goto out; } + *best_netmask = 0; while (fgets(buf, sizeof(buf), routefd) != NULL) { if (sscanf(buf, "%[^\t]\t%lx%lx%lx%lx%lx%lx%lx" , interface, &dest, &gw, &flags, &refcnt, &use , &metric, &mask) != 8) { snprintf(errmsg, errmsglen, "Bad line in %s: %s" , PROCROUTE, buf); rc = OCF_ERR_GENERIC; goto out; } if ( (in->s_addr&mask) == (in_addr_t)(dest&mask) - && metric < best_metric) { + && metric <= best_metric && mask >= *best_netmask) { best_metric = metric; *best_netmask = mask; strncpy(best_if, interface, best_iflen); } } if (best_metric == LONG_MAX) { snprintf(errmsg, errmsglen, "No route to %s\n", address); rc = OCF_ERR_GENERIC; } out: if (routefd) { fclose(routefd); } return(rc); } static int SearchUsingRouteCmd (char *address, struct in_addr *in , struct in_addr *addr_out, char *best_if, size_t best_iflen , unsigned long *best_netmask , char *errmsg, int errmsglen) { char mask[20]; char routecmd[MAXSTR]; int best_metric = INT_MAX; char buf[2048]; char interface[MAXSTR]; char *cp, *sp; int done = 0; FILE *routefd = NULL; uint32_t maskbits; /* Open route and get the information */ snprintf (routecmd, sizeof(routecmd), "%s %s %s" , ROUTE, ROUTEPARM, address); routefd = popen (routecmd, "r"); if (routefd == NULL) return (OCF_ERR_GENERIC); mask[0] = EOS; interface[0] = EOS; while ((done < 3) && fgets(buf, sizeof(buf), routefd)) { int buflen = strnlen(buf, sizeof(buf)); cp = buf; sp = buf + buflen; while (sp!=buf && isspace((int)*(sp-1))) { --sp; } *sp = EOS; buf[buflen] = EOS; if (strstr (buf, "mask:")) { /*strsep(&cp, ":");cp++;*/ cp = strtok(buf, ":"); cp = strtok(NULL, ":");cp++; strncpy(mask, cp, sizeof(mask)); done++; } if (strstr (buf, "interface:")) { /*strsep(&cp, ":");cp++;*/ cp = strtok(buf, ":"); cp = strtok(NULL, ":");cp++; strncpy(interface, cp, sizeof(interface)); done++; } } fclose(routefd); /* * Check to see if mask isn't available. It may not be * returned if multiple IP's are defined. * use 255.255.255.255 for mask then */ /* I'm pretty sure this is the wrong behavior... * I think the right behavior is to declare an error and give up. * The admin didn't define his routes correctly. Fix them. * It's useless to take over an IP address with no way to * return packets to the originator. Without the right subnet * mask, you can't reply to any packets you receive. */ if (strnlen(mask, sizeof(mask)) == 0) { strncpy (mask, "255.255.255.255", sizeof(mask)); } /* * Solaris (at least) can return the word "default" for mask and dest. * For the moment, let's interpret this as: * mask: 0.0.0.0 * This was manifesting itself under "BasicSanityCheck", which tries * to use a remote IP number; these typically use the "default" route. * Better schemes are warmly invited... */ #ifdef MASK_DEFAULT_TO_ZERO if (strncmp(mask, "default", sizeof("default")) == 0) { strncpy (mask, "0.0.0.0", sizeof(mask)); } #endif if (inet_pton(AF_INET, mask, &maskbits) <= 0) { snprintf(errmsg, errmsglen, "mask [%s] not valid.", mask); return(OCF_ERR_CONFIGURED); } if (inet_pton(AF_INET, address, addr_out) <= 0) { snprintf(errmsg, errmsglen , "IP address [%s] not valid.", address); return(OCF_ERR_CONFIGURED); } if ((in->s_addr & maskbits) == (addr_out->s_addr & maskbits)) { if (interface[0] == EOS) { snprintf(errmsg, errmsglen, "No interface found."); return(OCF_ERR_GENERIC); } best_metric = 0; *best_netmask = maskbits; strncpy(best_if, interface, best_iflen); } if (best_metric == INT_MAX) { snprintf(errmsg, errmsglen, "No route to %s\n", address); return(OCF_ERR_GENERIC); } return (OCF_SUCCESS); } /* * Getaddress gets all its real parameters from the OCF environment * variables that its callers already use. */ void GetAddress (char **address, char **netmaskbits , char **bcast_arg, char **if_specified) { /* * Here are out input environment variables: * * OCF_RESKEY_ip ip address * OCF_RESKEY_cidr_netmask netmask of interface * OCF_RESKEY_broadcast broadcast address for interface * OCF_RESKEY_nic interface to assign to * */ *address = getenv("OCF_RESKEY_ip"); *netmaskbits = getenv("OCF_RESKEY_cidr_netmask"); if (*netmaskbits == NULL || **netmaskbits == EOS) { *netmaskbits = getenv("OCF_RESKEY_netmask"); } *bcast_arg = getenv("OCF_RESKEY_broadcast"); *if_specified = getenv("OCF_RESKEY_nic"); } void ValidateNetmaskBits (char *netmaskbits, unsigned long *netmask) { if (netmaskbits != NULL && *netmaskbits != EOS) { size_t nmblen = strnlen(netmaskbits, 3); /* Maximum netmask is 32 */ if (nmblen > 2 || nmblen == 0 || (strspn(netmaskbits, "0123456789") != nmblen)) { fprintf(stderr, "Invalid netmask specification" " [%s]", netmaskbits); usage(OCF_ERR_CONFIGURED); /*not reached */ }else{ unsigned long bits = atoi(netmaskbits); if (bits < 1 || bits > 32) { fprintf(stderr , "Invalid netmask specification [%s]" , netmaskbits); usage(OCF_ERR_CONFIGURED); /*not reached */ exit(1); } bits = 32 - bits; *netmask = (1L<<(bits))-1L; *netmask = ((~(*netmask))&0xffffffffUL); *netmask = htonl(*netmask); } } } int ValidateIFName(const char *ifname, struct ifreq *ifr) { int skfd = -1; char *colonptr; if ( (skfd = socket(PF_INET, SOCK_DGRAM, 0)) == -1 ) { fprintf(stderr, "%s\n", strerror(errno)); return -2; } strncpy(ifr->ifr_name, ifname, IFNAMSIZ); /* Contain a ":"? Probably an error, but treat as warning at present */ if ((colonptr = strchr(ifname, ':')) != NULL) { fprintf(stderr, "%s: warning: name may be invalid\n", ifr->ifr_name); } if (ioctl(skfd, SIOCGIFFLAGS, ifr) < 0) { fprintf(stderr, "%s: unknown interface: %s\n" , ifr->ifr_name, strerror(errno)); close(skfd); /* return -1 only if ifname is known to be invalid */ return -1; } close(skfd); return 0; } int netmask_bits(unsigned long netmask) { int j; netmask = netmask & 0xFFFFFFFFUL; for (j=0; j <= 32; ++j) { if ((netmask >> j)&0x1) { break; } } return 32 - j; } char * get_first_loopback_netdev(char * output) { char buf[512]; FILE * fd = NULL; char *rc = NULL; if (!output) { fprintf(stderr, "output buf is a null pointer.\n"); goto out; } fd = fopen(PATH_PROC_NET_DEV, "r"); if (!fd) { fprintf(stderr, "Warning: cannot open %s (%s).\n", PATH_PROC_NET_DEV, strerror(errno)); goto out; } /* Skip the first two lines */ if (!fgets(buf, sizeof(buf), fd) || !fgets(buf, sizeof(buf), fd)) { fprintf(stderr, "Warning: cannot read header from %s.\n", PATH_PROC_NET_DEV); goto out; } while (fgets(buf, sizeof(buf), fd)) { char name[IFNAMSIZ]; if (NULL == get_ifname(buf, name)) { /* Maybe somethin is wrong, anyway continue */ continue; } if (is_loopback_interface(name)) { strncpy(output, name, IFNAMSIZ); rc = output; goto out; } } out: if (fd) { fclose(fd); } return rc; } int is_loopback_interface(char * ifname) { struct ifreq ifr; memset(&ifr, 0, sizeof(ifr)); if (ValidateIFName(ifname, &ifr) < 0) return 0; if (ifr.ifr_flags & IFF_LOOPBACK) { /* this is a loopback device. */ return 1; } else { return 0; } } char * get_ifname(char * buf, char * ifname) { char * start, * end, * buf_border; buf_border = buf + strnlen(buf, 512); start = buf; while (isspace((int) *start) && (start != buf_border)) { start++; } end = start; while ((*end != ':') && (end != buf_border)) { end++; } if ( start == buf_border || end == buf_border ) { /* Over the border of buf */ return NULL; } *end = '\0'; strncpy(ifname, start, IFNAMSIZ); return ifname; } int ConvertQuadToInt(char *dest) { struct in_addr ad; int bits, j; inet_pton(AF_INET, dest, &ad); bits = 0; j = ntohl(ad.s_addr); while(j != 0){ bits++; j <<= 1; } return (bits); } int main(int argc, char ** argv) { char * address = NULL; char * bcast_arg = NULL; char * netmaskbits = NULL; struct in_addr in; struct in_addr addr_out; unsigned long netmask; char best_if[MAXSTR]; char * if_specified = NULL; struct ifreq ifr; unsigned long best_netmask = INT_MAX; int argerrs = 0; cmdname=argv[0]; memset(&addr_out, 0, sizeof(addr_out)); memset(&in, 0, sizeof(in)); memset(&ifr, 0, sizeof(ifr)); switch (argc) { case 1: /* No -C argument */ break; case 2: /* Hopefully a -C argument */ if (strncmp(argv[1], "-C", sizeof("-C")) != 0) { argerrs=1; } OutputInCIDR=1; break; default: argerrs=1; break; } if (argerrs) { usage(OCF_ERR_ARGS); /* not reached */ return(1); } GetAddress (&address, &netmaskbits, &bcast_arg , &if_specified); if (address == NULL || *address == EOS) { fprintf(stderr, "ERROR: IP address parameter is mandatory."); usage(OCF_ERR_CONFIGURED); /* not reached */ } /* Is the IP address we're supposed to find valid? */ if (inet_pton(AF_INET, address, (void *)&in) <= 0) { fprintf(stderr, "IP address [%s] not valid.", address); usage(OCF_ERR_CONFIGURED); /* not reached */ } if(netmaskbits != NULL && *netmaskbits != EOS && strchr(netmaskbits, '.') != NULL) { int len = strlen(netmaskbits); snprintf(netmaskbits, len, "%d", ConvertQuadToInt(netmaskbits)); fprintf(stderr, "Converted dotted-quad netmask to CIDR as: %s\n", netmaskbits); } /* Validate the netmaskbits field */ ValidateNetmaskBits (netmaskbits, &netmask); if (if_specified != NULL && *if_specified != EOS) { if(ValidateIFName(if_specified, &ifr) < 0) { usage(OCF_ERR_CONFIGURED); /* not reached */ } strncpy(best_if, if_specified, sizeof(best_if)); *(best_if + sizeof(best_if) - 1) = '\0'; }else{ SearchRoute **sr = search_mechs; char errmsg[MAXSTR] = "No valid mecahnisms"; int rc = OCF_ERR_GENERIC; strcpy(best_if, "UNKNOWN"); while (*sr) { errmsg[0] = '\0'; rc = (*sr) (address, &in, &addr_out, best_if , sizeof(best_if) , &best_netmask, errmsg, sizeof(errmsg)); if (!rc) { /* Mechanism worked */ break; } sr++; } if (rc != 0) { /* No route, or all mechanisms failed */ if (*errmsg) { fprintf(stderr, "%s", errmsg); } return(rc); } } if (netmaskbits) { best_netmask = netmask; }else if (best_netmask == 0L) { /* On some distirbutions, there is no loopback related route item, this leads to the error here. My fix may be not good enough, please FIXME */ if (0 == strncmp(address, "127", 3)) { if (NULL != get_first_loopback_netdev(best_if)) { best_netmask = 0x000000ff; } else { fprintf(stderr, "No loopback interface found.\n"); return(OCF_ERR_GENERIC); } } else { fprintf(stderr , "ERROR: Cannot use default route w/o netmask [%s]\n" , address); return(OCF_ERR_GENERIC); } } /* Did they tell us the broadcast address? */ if (bcast_arg && *bcast_arg != EOS) { /* Yes, they gave us a broadcast address. * It at least should be a valid IP address */ struct in_addr bcast_addr; if (inet_pton(AF_INET, bcast_arg, (void *)&bcast_addr) <= 0) { fprintf(stderr, "Invalid broadcast address [%s].", bcast_arg); usage(OCF_ERR_CONFIGURED); /* not reached */ } best_netmask = htonl(best_netmask); if (!OutputInCIDR) { printf("%s\tnetmask %d.%d.%d.%d\tbroadcast %s\n" , best_if , (int)((best_netmask>>24) & 0xff) , (int)((best_netmask>>16) & 0xff) , (int)((best_netmask>>8) & 0xff) , (int)(best_netmask & 0xff) , bcast_arg); }else{ printf("%s\tnetmask %d\tbroadcast %s\n" , best_if , netmask_bits(best_netmask) , bcast_arg); } }else{ /* No, we use a common broadcast address convention */ unsigned long def_bcast; /* Common broadcast address */ def_bcast = (in.s_addr | (~best_netmask)); #if DEBUG fprintf(stderr, "best_netmask = %08lx, def_bcast = %08lx\n" , best_netmask, def_bcast); #endif /* Make things a bit more machine-independent */ best_netmask = htonl(best_netmask); def_bcast = htonl(def_bcast); if (!OutputInCIDR) { printf("%s\tnetmask %d.%d.%d.%d\tbroadcast %d.%d.%d.%d\n" , best_if , (int)((best_netmask>>24) & 0xff) , (int)((best_netmask>>16) & 0xff) , (int)((best_netmask>>8) & 0xff) , (int)(best_netmask & 0xff) , (int)((def_bcast>>24) & 0xff) , (int)((def_bcast>>16) & 0xff) , (int)((def_bcast>>8) & 0xff) , (int)(def_bcast & 0xff)); }else{ printf("%s\tnetmask %d\tbroadcast %d.%d.%d.%d\n" , best_if , netmask_bits(best_netmask) , (int)((def_bcast>>24) & 0xff) , (int)((def_bcast>>16) & 0xff) , (int)((def_bcast>>8) & 0xff) , (int)(def_bcast & 0xff)); } } return(0); } void usage(int ec) { fprintf(stderr, "\n" "%s version 2.99.1 Copyright Alan Robertson\n" "\n" "Usage: %s [-C]\n" "Options:\n" " -C: Output netmask as the number of bits rather " "than as 4 octets.\n" "Environment variables:\n" "OCF_RESKEY_ip ip address (mandatory!)\n" "OCF_RESKEY_cidr_netmask netmask of interface\n" "OCF_RESKEY_broadcast broadcast address for interface\n" "OCF_RESKEY_nic interface to assign to\n" , cmdname, cmdname); exit(ec); } /* Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT eth0 33D60987 00000000 0005 0 0 0 FFFFFFFF 0 0 0 eth0 00D60987 00000000 0001 0 0 0 00FFFFFF 0 0 0 lo 0000007F 00000000 0001 0 0 0 000000FF 0 0 0 eth0 00000000 FED60987 0003 0 0 0 00000000 0 0 0 netstat -rn outpug from RedHat Linux 6.0 Kernel IP routing table Destination Gateway Genmask Flags MSS Window irtt Iface 192.168.85.2 0.0.0.0 255.255.255.255 UH 0 0 0 eth1 10.0.0.2 0.0.0.0 255.255.255.255 UH 0 0 0 eth2 208.132.134.61 0.0.0.0 255.255.255.255 UH 0 0 0 eth0 208.132.134.32 0.0.0.0 255.255.255.224 U 0 0 0 eth0 192.168.85.0 0.0.0.0 255.255.255.0 U 0 0 0 eth1 10.0.0.0 0.0.0.0 255.255.255.0 U 0 0 0 eth2 127.0.0.0 0.0.0.0 255.0.0.0 U 0 0 0 lo 0.0.0.0 208.132.134.33 0.0.0.0 UG 0 0 0 eth0 |-------------------------------------------------------------------------------- netstat -rn output from FreeBSD 3.3 Routing tables Internet: Destination Gateway Flags Refs Use Netif Expire default 209.61.94.161 UGSc 3 8 pn0 192.168 link#1 UC 0 0 xl0 192.168.0.2 0:60:8:a4:91:fd UHLW 0 38 lo0 192.168.0.255 ff:ff:ff:ff:ff:ff UHLWb 1 7877 xl0 209.61.94.160/29 link#2 UC 0 0 pn0 209.61.94.161 0:a0:cc:26:c2:ea UHLW 6 17265 pn0 1105 209.61.94.162 0:a0:cc:27:1c:fb UHLW 1 568 pn0 1098 209.61.94.163 0:a0:cc:29:1f:86 UHLW 0 4749 pn0 1095 209.61.94.166 0:a0:cc:27:2d:e1 UHLW 0 12 lo0 209.61.94.167 ff:ff:ff:ff:ff:ff UHLWb 0 10578 pn0 |-------------------------------------------------------------------------------- netstat -rn output from FreeBSD 4.2 Routing tables Internet: Destination Gateway Flags Refs Use Netif Expire default 64.65.195.1 UGSc 1 11 dc0 64.65.195/24 link#1 UC 0 0 dc0 => 64.65.195.1 0:3:42:3b:0:dd UHLW 2 0 dc0 1131 64.65.195.184 0:a0:cc:29:1f:86 UHLW 2 18098 dc0 1119 64.65.195.194 0:a0:cc:27:2d:e1 UHLW 3 335161 dc0 943 64.65.195.200 52:54:0:db:33:b3 UHLW 0 13 dc0 406 64.65.195.255 ff:ff:ff:ff:ff:ff UHLWb 1 584 dc0 127.0.0.1 127.0.0.1 UH 0 0 lo0 192.168/16 link#2 UC 0 0 vx0 => 192.168.0.1 0:20:af:e2:f0:36 UHLW 0 2 lo0 192.168.255.255 ff:ff:ff:ff:ff:ff UHLWb 0 1 vx0 Internet6: Destination Gateway Flags Netif Expire ::1 ::1 UH lo0 fe80::%dc0/64 link#1 UC dc0 fe80::%vx0/64 link#2 UC vx0 fe80::%lo0/64 fe80::1%lo0 Uc lo0 ff01::/32 ::1 U lo0 ff02::%dc0/32 link#1 UC dc0 ff02::%vx0/32 link#2 UC vx0 ff02::%lo0/32 fe80::1%lo0 UC lo0 */ diff --git a/tools/ocft/Filesystem b/tools/ocft/Filesystem index 47700a653..ec77a27be 100644 --- a/tools/ocft/Filesystem +++ b/tools/ocft/Filesystem @@ -1,110 +1,110 @@ # Filesystem # by dejan@suse.de on # Tue Feb 15 18:50:04 CET 2011 CONFIG Agent Filesystem AgentRoot /usr/lib/ocf/resource.d/heartbeat HangTimeout 20 VARIABLE OCFT_fs=/var/run/resource-agents/ocft-Filesystem-fs - OCFT_loop=/dev/loop0 + OCFT_loop=/dev/loop7 OCFT_dir=/var/run/resource-agents/ocft-Filesystem-mnt SETUP-AGENT losetup $OCFT_loop 2>/dev/null && exit 1 rmdir $OCFT_dir 2>/dev/null || true mkdir $OCFT_dir - dd if=/dev/zero of=$OCFT_fs bs=1024k count=1 2>/dev/null + dd if=/dev/zero of=$OCFT_fs bs=1 count=0 seek=16M 2>/dev/null mke2fs -Fq $OCFT_fs losetup $OCFT_loop $OCFT_fs CLEANUP-AGENT rmdir $OCFT_dir rm $OCFT_fs losetup -d $OCFT_loop CASE-BLOCK required_args Env OCF_RESKEY_device=$OCFT_loop Env OCF_RESKEY_fstype=ext2 Env OCF_RESKEY_directory=$OCFT_dir CASE-BLOCK default_status AgentRun stop CASE-BLOCK prepare Include required_args Include default_status CASE "check base env" Include prepare AgentRun start OCF_SUCCESS CASE "check base env: invalid 'OCF_RESKEY_device'" Include prepare Env OCF_RESKEY_device=/dev/no_such_device AgentRun start OCF_ERR_INSTALLED CASE "check base env: unset 'OCF_RESKEY_device'" Include prepare Unenv OCF_RESKEY_device AgentRun start OCF_ERR_CONFIGURED CASE "normal start" Include prepare AgentRun start OCF_SUCCESS CASE "normal stop" Include prepare AgentRun start AgentRun stop OCF_SUCCESS CASE "double start" Include prepare AgentRun start AgentRun start OCF_SUCCESS CASE "double stop" Include prepare AgentRun stop OCF_SUCCESS CASE "monitor when running" Include prepare AgentRun start AgentRun monitor OCF_SUCCESS CASE "monitor when not running" Include prepare AgentRun monitor OCF_NOT_RUNNING CASE "monitor depth 10 when running" Include prepare AgentRun start Env OCF_CHECK_LEVEL=10 AgentRun monitor OCF_SUCCESS CASE "monitor depth 20 with running" Include prepare AgentRun start Env OCF_CHECK_LEVEL=20 AgentRun monitor OCF_SUCCESS CASE "start insert failure (remove device)" Include prepare Bash losetup -d $OCFT_loop BashAtExit losetup $OCFT_loop $OCFT_fs AgentRun start OCF_ERR_GENERIC CASE "monitor depth 20 insert failure (r/o fs)" Include prepare AgentRun start Bash mount -o remount,ro $OCFT_dir BashAtExit mount -o remount,rw $OCFT_dir Env OCF_CHECK_LEVEL=20 AgentRun monitor OCF_ERR_GENERIC CASE "unimplemented command" Include prepare AgentRun no_cmd OCF_ERR_UNIMPLEMENTED diff --git a/tools/ocft/IPv6addr b/tools/ocft/IPv6addr new file mode 100644 index 000000000..36a964294 --- /dev/null +++ b/tools/ocft/IPv6addr @@ -0,0 +1,150 @@ +# IPv6addr + +# Note: This test case uses two NICs(eth0, eth1) and +# a IPv6 address prefix (2001:db8::/32, RFC3849). +# Adjust them according to your environment at VARIABLE section if needed. + +CONFIG + Agent IPv6addr + AgentRoot /usr/lib/ocf/resource.d/heartbeat + HangTimeout 20 + +VARIABLE + OCFT_target_ipv6addr=2001:db8:1234::2 + OCFT_target_nic=eth0 + OCFT_target_prefix=64 + OCFT_target_netaddr=2001:db8:1234::1/$OCFT_target_prefix + OCFT_target_linklocal=fe80::2 + OCFT_wrong_ipv6addr=2001:db8:5678::2 + OCFT_force_nic=eth1 + OCFT_force_prefix=80 + +SETUP-AGENT + ip addr add $OCFT_target_netaddr dev $OCFT_target_nic + +CLEANUP-AGENT + ip addr del $OCFT_target_netaddr dev $OCFT_target_nic + +CASE-BLOCK required_args + Env OCF_RESKEY_ipv6addr=$OCFT_target_ipv6addr + Env OCFT_check_ipv6addr=$OCFT_target_ipv6addr + Env OCFT_check_prefix=$OCFT_target_prefix + Env OCFT_check_nic=$OCFT_target_nic + +CASE-BLOCK check_ip_assigned + Bash ip -6 -o addr show $OCFT_check_nic | grep -w $OCFT_check_ipv6addr/$OCFT_check_prefix >/dev/null # checking if the IPv6 address was assigned correctly + +CASE-BLOCK check_ip_removed + Bash ! ip -6 -o addr show $OCFT_check_nic | grep -w $OCFT_check_ipv6addr/$OCFT_check_prefix >/dev/null # checking if the IPv6 address was removed correctly + +CASE-BLOCK default_status + AgentRun stop + +CASE-BLOCK prepare + Include required_args + Include default_status + + +CASE "normal start" + Include prepare + AgentRun start OCF_SUCCESS + Include check_ip_assigned + +CASE "normal stop" + Include prepare + AgentRun start + AgentRun stop OCF_SUCCESS + Include check_ip_removed + +CASE "double start" + Include prepare + AgentRun start + AgentRun start OCF_SUCCESS + +CASE "double stop" + Include prepare + AgentRun stop OCF_SUCCESS + +CASE "monitor with running" + Include prepare + AgentRun start + AgentRun monitor OCF_SUCCESS + +CASE "monitor with not running" + Include prepare + AgentRun monitor OCF_NOT_RUNNING + +CASE "params with nic, no cidr_netmask" + Include prepare + Env OCF_RESKEY_nic=$OCFT_target_nic + AgentRun start OCF_SUCCESS + Include check_ip_assigned + AgentRun monitor OCF_SUCCESS + AgentRun stop OCF_SUCCESS + Include check_ip_removed + +CASE "params with nic, cidr_netmask" + Include prepare + Env OCF_RESKEY_nic=$OCFT_target_nic + Env OCF_RESKEY_cidr_netmask=$OCFT_target_prefix + AgentRun start OCF_SUCCESS + Include check_ip_assigned + AgentRun monitor OCF_SUCCESS + AgentRun stop OCF_SUCCESS + Include check_ip_removed + +CASE "normal usage for a link-local IPv6 address" + Include prepare + Env OCF_RESKEY_ipv6addr=$OCFT_target_linklocal + Env OCFT_check_ipv6addr=$OCFT_target_linklocal + # nic is mandatory for a link-local address + Env OCF_RESKEY_nic=$OCFT_target_nic + AgentRun start OCF_SUCCESS + Include check_ip_assigned + AgentRun monitor OCF_SUCCESS + AgentRun stop OCF_SUCCESS + Include check_ip_removed + +CASE "error start for a link-local IPv6 address when no nic" + Include prepare + Env OCF_RESKEY_ipv6addr=$OCFT_target_linklocal + # nic is mandatory for a link-local address + Unenv OCF_RESKEY_nic + AgentRun start OCF_ERR_GENERIC + Include check_ip_removed + +CASE "error params with wrong ipv6addr" + Include prepare + Env OCF_RESKEY_ipv6addr=$OCFT_wrong_ipv6addr + AgentRun start OCF_ERR_GENERIC + +# Note: this result is different from IPaddr2/findif +# IPaddr2 succeeds if the ip matched based on the netmask of the subnet +# or fails if it did not match to any. +# Recommended to always specify both nic and cidr_netmask when you needed. +CASE "error params with wrong cidr_netmask" + Include prepare + Env OCF_RESKEY_cidr_netmask=$OCFT_force_prefix + AgentRun start OCF_ERR_GENERIC + +# Note: this result is different from IPaddr2/findif +# IPaddr2 succeeds but it uses /32 as a guessed cidr_netmask which +# does not seem to be expected. +# Recommended to always specify both nic and cidr_netmask when you needed. +CASE "error params with wrong nic" + Include prepare + Env OCF_RESKEY_nic=$OCFT_force_nic + AgentRun start OCF_ERR_GENERIC + +# Note: This use case is now valid. It was not allowed until v3.9.2. +CASE "force to use the specified nic and cidr_netmask" + Include prepare + Env OCF_RESKEY_nic=$OCFT_force_nic + Env OCF_RESKEY_cidr_netmask=$OCFT_force_prefix + Env OCFT_check_nic=$OCFT_force_nic + Env OCFT_check_prefix=$OCFT_force_prefix + AgentRun start OCF_SUCCESS + Include check_ip_assigned + AgentRun monitor OCF_SUCCESS + AgentRun stop OCF_SUCCESS + Include check_ip_removed diff --git a/tools/ocft/LVM b/tools/ocft/LVM index f7154ae58..4bd2b221b 100644 --- a/tools/ocft/LVM +++ b/tools/ocft/LVM @@ -1,85 +1,86 @@ # LVM # by dejan@suse.de on # Wed Feb 16 13:15:01 CET 2011 CONFIG Agent LVM AgentRoot /usr/lib/ocf/resource.d/heartbeat HangTimeout 20 VARIABLE OCFT_pv=/var/run/resource-agents/ocft-LVM-pv OCFT_vg=ocft-vg OCFT_lv=ocft-lv - OCFT_loop=/dev/loop0 + OCFT_loop=/dev/loop7 SETUP-AGENT - losetup -d $OCFT_loop 2>/dev/null || true - dd if=/dev/zero of=$OCFT_pv bs=1024k count=1 2>/dev/null + losetup $OCFT_loop 2>/dev/null && exit 1 + dd if=/dev/zero of=$OCFT_pv bs=1 count=0 seek=16M 2>/dev/null losetup $OCFT_loop $OCFT_pv pvcreate $OCFT_loop vgcreate -s 4K $OCFT_vg $OCFT_loop lvcreate -n $OCFT_lv -L 600K $OCFT_vg CLEANUP-AGENT vgchange -an $OCFT_vg lvremove -f /dev/$OCFT_vg/$OCFT_lv vgremove -f $OCFT_vg + pvremove $OCFT_loop losetup -d $OCFT_loop rm $OCFT_pv CASE-BLOCK required_args Env OCF_RESKEY_volgrpname=$OCFT_vg CASE-BLOCK default_status AgentRun stop CASE-BLOCK prepare Include required_args Include default_status CASE "check base env" Include prepare AgentRun start OCF_SUCCESS CASE "check base env: invalid 'OCF_RESKEY_volgrpname'" Include prepare Env OCF_RESKEY_volgrpname=/dev/no_such_device AgentRun start OCF_ERR_GENERIC CASE "check base env: unset 'OCF_RESKEY_volgrpname'" Include prepare Unenv OCF_RESKEY_volgrpname AgentRun start OCF_ERR_CONFIGURED CASE "normal start" Include prepare AgentRun start OCF_SUCCESS CASE "normal stop" Include prepare AgentRun start AgentRun stop OCF_SUCCESS CASE "double start" Include prepare AgentRun start AgentRun start OCF_SUCCESS CASE "double stop" Include prepare AgentRun stop OCF_SUCCESS CASE "monitor when running" Include prepare AgentRun start AgentRun monitor OCF_SUCCESS CASE "monitor when not running" Include prepare AgentRun monitor OCF_NOT_RUNNING CASE "unimplemented command" Include prepare AgentRun no_cmd OCF_ERR_UNIMPLEMENTED diff --git a/tools/ocft/Makefile.am b/tools/ocft/Makefile.am index fb54e7690..c91f2388a 100644 --- a/tools/ocft/Makefile.am +++ b/tools/ocft/Makefile.am @@ -1,46 +1,48 @@ # Author: John Shi # jshi@suse.de # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(ocftcfgs_DATA) $(ocft_DATA) sbin_SCRIPTS = ocft ocftcfgsdir = $(datadir)/$(PACKAGE_NAME)/ocft/configs ocftcfgs_DATA = apache \ IPaddr2 \ + IPv6addr \ Filesystem \ LVM \ IPsrcaddr \ MailTo \ mysql \ pgsql \ db2 \ drbd.linbit \ nfsserver \ portblock \ iscsi \ named \ postfix \ + Xinetd \ SendArp ocftdir = $(datadir)/$(PACKAGE_NAME)/ocft ocft_DATA = README \ README.zh_CN \ caselib diff --git a/tools/ocft/Xinetd b/tools/ocft/Xinetd new file mode 100644 index 000000000..ba9c85c7f --- /dev/null +++ b/tools/ocft/Xinetd @@ -0,0 +1,56 @@ +# Xinetd + +CONFIG + Agent Xinetd + AgentRoot /usr/lib/ocf/resource.d/heartbeat + InstallPackage xinetd + +CASE-BLOCK required_args + Env OCF_RESKEY_service=discard + +CASE-BLOCK default_status + AgentRun stop + +CASE-BLOCK prepare + Include required_args + Include default_status + +CASE "check base env" + Include prepare + AgentRun start OCF_SUCCESS + +CASE "check base env: unset 'OCF_RESKEY_protocol'" + Include prepare + Unenv OCF_RESKEY_service + AgentRun start OCF_ERR_CONFIGURED + +CASE "normal start" + Include prepare + AgentRun start OCF_SUCCESS + +CASE "normal stop" + Include prepare + AgentRun start + AgentRun stop OCF_SUCCESS + +CASE "double start" + Include prepare + AgentRun start + AgentRun start OCF_SUCCESS + +CASE "double stop" + Include prepare + AgentRun stop OCF_SUCCESS + +CASE "monitor with running" + Include prepare + AgentRun start + AgentRun monitor OCF_SUCCESS + +CASE "monitor with not running" + Include prepare + AgentRun monitor OCF_NOT_RUNNING + +CASE "unimplemented command" + Include prepare + AgentRun no_cmd OCF_ERR_UNIMPLEMENTED diff --git a/tools/ocft/ocft.in b/tools/ocft/ocft.in index 4be27b9ce..a3da5910a 100644 --- a/tools/ocft/ocft.in +++ b/tools/ocft/ocft.in @@ -1,796 +1,788 @@ #!/bin/bash # Copyright (c) 2010-2011 Novell Inc, John Shi # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. die() { local str str="$1" echo "ERROR: $str" >&2 exit 1 } warn() { local str str="$1" echo "WARNING: $str" >&2 } parse_die() { local str str="$1" die "${agent}: line ${line_num}: ${str}" } # add quotes to string for Here Documents add_quotes() { local typ str a b typ="$1" str="$2" case "$typ" in 1) a=\'; b=\";; 2) a=\"; b=\';; esac echo "$str" | sed "s/$a/$a$b$a$b$a/g; 1 s/^/$a/; $ s/$/$a/" } # split strings explode() { local str str="$1" echo "$str" | awk -F'"' '{ if (NF > 0 && NF%2 == 0) exit(1); for (i=1; i<=NF; i++) { if (i%2 == 0) print $i; else { len = split($i, str, /[ \t]+/); for (j=1; j<=len; j++) { sb = sub(/#.*/, "", str[j]); if (str[j] != "") print str[j]; if (sb) exit(0); } } } }' } # phase 1: parse the string to 'command' and 'argument collection'. line2trunk() { trunk[0]="${line%%[[:blank:]]*}" trunk[1]="${line#*[[:blank:]]}" } # phase 2: split the argument collection. trunk2branch() { local IFS # Some of statements need one parameter at least. if [ "$line" = "${trunk[0]}" ]; then parse_die "missing parameter." fi IFS=$'\n' branch=($(explode "${trunk[1]}")) if [ $? -ne 0 ]; then parse_die "missing '\"'." fi } preparse_cfg() { local agent line trunk branch macro num host agent="$1" if [ ! -r "$opt_cfgsdir/$agent" ]; then die "${agent}: configuration file not found." fi line_num=0 while read -r line; do let line_num++ num=" $line_num" case "$line" in ""|\#*) continue;; esac line2trunk case "${trunk[0]}" in CASE-BLOCK) trunk2branch macro="$CASES_DIR/${agent}_macro.${branch[0]}" continue ;; Include|Include@*) host=$(echo "${trunk[0]}" | awk -F@ '{print $2}') trunk2branch if [ ! -r "$CASES_DIR/${agent}_macro.${branch[0]}" ]; then parse_die "Macro '${branch[0]}' not found." fi if [ -n "$host" ]; then line="$(sed -e 's/^\([^[:blank:]]*\)@[^[:blank:]]*/\1/' -e "s/^[^[:blank:]]*/&@$host/" "$CASES_DIR/${agent}_macro.${branch[0]}")" else line="$(<"$CASES_DIR/${agent}_macro.${branch[0]}")" fi num= ;; *[!A-Z-]*) : ;; *) macro= ;; esac if [ -n "$macro" ]; then echo "$line$num" >>"$macro" else echo "$line$num" >>"$CASES_DIR/${agent}_preparse" fi done <"$opt_cfgsdir/$agent" } case_finish() { local host if [ -n "$sh" ]; then cat >>$sh <>$sh done echo "quit 0" >>$sh fi atexit_num=0 hosts= sh= } agent_finish() { rm -f $CASES_DIR/${agent}_preparse rm -f $CASES_DIR/${agent}_macro.* rm -f $CASES_DIR/${agent}_setup rm -f $CASES_DIR/${agent}_cleanup rm -f $CASES_DIR/${agent}_var rm -f $CASES_DIR/${agent}_hosts cfg_agent= cfg_agent_root= cfg_install_package=() cfg_hang_timeout=20 } parse_cfg() { local agents i line stat sh trunk branch atexit_num host hosts if [ $# -eq 0 ]; then agents=($opt_cfgsdir/*) else agents=("$@") fi for agent in "${agents[@]}"; do rm -f $CASES_DIR/*_${agent}.sh agent_finish i=0 agent="$(basename "$agent")" echo "Making '$agent': " preparse_cfg "$agent" while read -r line; do line_num="${line##* }" line="${line% *}" line2trunk # state switch case "${trunk[0]}" in CONFIG) case_finish stat=1 continue ;; VARIABLE) case_finish stat=2 continue ;; SETUP-AGENT) case_finish stat=3 continue ;; CLEANUP-AGENT) case_finish stat=4 continue ;; CASE) case_finish trunk2branch echo " - case ${i}: ${branch[0]}" sh="$CASES_DIR/${i}_${agent}.sh" cat >$sh <>$CASES_DIR/${agent}_var ;; 3) echo "$line" >>$CASES_DIR/${agent}_setup ;; 4) echo "$line" >>$CASES_DIR/${agent}_cleanup ;; 5) host=$(echo ${trunk[0]} | awk -F@ '{print $2}') if [ -n "$host" ]; then if ! echo "$hosts" | grep -q "$host"; then echo "$host" >>$CASES_DIR/${agent}_hosts hosts=$hosts$'\n'$host cat >>$sh <>$sh if [ -n "$host" ]; then echo "backbash $host <<'CMD'" >>$sh fi case "${trunk[0]}" in Env|Env@*) cat >>$sh <>$sh <>$sh <>$sh <>$sh <>$sh <>$sh <>$sh <>$sh <>$sh fi ;; *) parse_die "unimplemented statement: ${trunk[0]}" ;; esac done <$CASES_DIR/${agent}_preparse if [ -r "$CASES_DIR/${agent}_setup" ]; then cat >$CASES_DIR/setup_${agent}.sh <>$CASES_DIR/setup_${agent}.sh <>$CASES_DIR/setup_${agent}.sh <$CASES_DIR/cleanup_${agent}.sh <>$CASES_DIR/cleanup_${agent}.sh <>$CASES_DIR/cleanup_${agent}.sh </dev/null 2>&1; then die "cases directory not found." fi export __OCFT__VERBOSE=$opt_verbose if [ $# -eq 0 ]; then agents=($(ls -1 *.sh 2>/dev/null | sed 's/.*_\([^_]*\)\.sh$/\1/' | sort | uniq)) else agents=("$@") fi for shs in "${agents[@]}"; do testsh="setup_${shs}.sh $(ls -1 [0-9]*_${shs}.sh 2>/dev/null | sort -n) cleanup_${shs}.sh" for sh in $testsh; do if [ -r "$sh" ]; then ./$sh ret=$? if [ $ret -eq 3 ]; then die "core function failed, break all tests." fi if [ $ret -eq 2 ]; then warn "core function failed, break all tests of '$shs'." break fi fi done done 2>&1 | while read -r line; do echo "$line" echo "$(date '+%F %T'): $line" | cat -A | sed -r 's/\^\[\[[0-9]+m|\^I|.$//g' >>ocft.log done } delete_cases() { local shs if [ $# -eq 0 ]; then rm -f $CASES_DIR/*.sh else for shs in "$@"; do rm -f $CASES_DIR/*_${shs}.sh done fi } usage() { cat <