diff --git a/configure.ac b/configure.ac index 919b1b25b..f624e8e6d 100644 --- a/configure.ac +++ b/configure.ac @@ -1,916 +1,917 @@ dnl dnl autoconf for Agents dnl dnl License: GNU General Public License (GPL) dnl =============================================== dnl Bootstrap dnl =============================================== AC_PREREQ(2.63) dnl Suggested structure: dnl information on the package dnl checks for programs dnl checks for libraries dnl checks for header files dnl checks for types dnl checks for structures dnl checks for compiler characteristics dnl checks for library functions dnl checks for system services AC_INIT([resource-agents], m4_esyscmd([make/git-version-gen .tarball-version]), [to_be_defined@foobar.org]) AC_USE_SYSTEM_EXTENSIONS CRM_DTD_VERSION="1.0" PKG_FEATURES="" AC_CONFIG_AUX_DIR(.) AC_CANONICAL_HOST dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below) dnl dnl Internal header: include/config.h dnl - Contains ALL defines dnl - include/config.h.in is generated automatically by autoheader dnl - NOT to be included in any header files except lha_internal.h dnl (which is also not to be included in any other header files) dnl dnl External header: include/agent_config.h dnl - Contains a subset of defines checked here dnl - Manually edit include/agent_config.h.in to have configure include new defines dnl - Should not include HAVE_* defines dnl - Safe to include anywhere AM_CONFIG_HEADER(include/config.h include/agent_config.h) ALL_LINGUAS="en fr" AC_ARG_WITH(version, [ --with-version=version Override package version (if you're a packager needing to pretend) ], [ PACKAGE_VERSION="$withval" ]) AC_ARG_WITH(pkg-name, [ --with-pkg-name=name Override package name (if you're a packager needing to pretend) ], [ PACKAGE_NAME="$withval" ]) dnl dnl AM_INIT_AUTOMAKE([1.11.1 foreign dist-bzip2 dist-xz]) dnl AM_INIT_AUTOMAKE([1.10.1 foreign dist-bzip2]) AC_DEFINE_UNQUOTED(AGENTS_VERSION, "$PACKAGE_VERSION", Current agents version) CC_IN_CONFIGURE=yes export CC_IN_CONFIGURE LDD=ldd dnl ======================================================================== dnl Compiler characteristics dnl ======================================================================== # check stolen from gnulib/m4/gnu-make.m4 if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then AC_MSG_ERROR([you don't seem to have GNU make; it is required]) fi AC_PROG_CC dnl Can force other with environment variable "CC". AM_PROG_CC_C_O AC_PROG_CC_STDC AC_PROG_AWK AC_PROG_LN_S AC_PROG_INSTALL AC_PROG_MAKE_SET AC_C_STRINGIZE AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_SSIZE_T AC_TYPE_UID_T AC_TYPE_UINT16_T AC_TYPE_UINT8_T AC_TYPE_UINT32_T AC_CHECK_SIZEOF(char) AC_CHECK_SIZEOF(short) AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(long long) AC_STRUCT_TIMEZONE dnl =============================================== dnl Helpers dnl =============================================== cc_supports_flag() { local CFLAGS="$@" AC_MSG_CHECKING(whether $CC supports "$@") AC_COMPILE_IFELSE([int main(){return 0;}] ,[RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)]) return $RC } extract_header_define() { AC_MSG_CHECKING(for $2 in $1) Cfile=/tmp/extract_define.$2.${$} printf "#include \n" > ${Cfile}.c printf "#include <%s>\n" $1 >> ${Cfile}.c printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c $CC $CFLAGS ${Cfile}.c -o ${Cfile} value=`${Cfile}` AC_MSG_RESULT($value) printf $value rm -f ${Cfile}.c ${Cfile} } dnl =============================================== dnl Configure Options dnl =============================================== dnl Some systems, like Solaris require a custom package name AC_ARG_WITH(pkgname, [ --with-pkgname=name name for pkg (typically for Solaris) ], [ PKGNAME="$withval" ], [ PKGNAME="LXHAhb" ], ) AC_SUBST(PKGNAME) AC_ARG_ENABLE([ansi], [ --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers. [default=yes]]) AC_ARG_ENABLE([fatal-warnings], [ --enable-fatal-warnings very pedantic and fatal warnings for gcc [default=yes]]) INITDIR="" AC_ARG_WITH(initdir, [ --with-initdir=DIR directory for init (rc) scripts [${INITDIR}]], [ INITDIR="$withval" ]) OCF_ROOT_DIR="/usr/lib/ocf" AC_ARG_WITH(ocf-root, [ --with-ocf-root=DIR directory for OCF scripts [${OCF_ROOT_DIR}]], [ if test x"$withval" = xprefix; then OCF_ROOT_DIR=${prefix}; else OCF_ROOT_DIR="$withval"; fi ]) HA_RSCTMPDIR=${localstatedir}/run/resource-agents AC_ARG_WITH(rsctmpdir, [ --with-rsctmpdir=DIR directory for resource agents state files [${HA_RSCTMPDIR}]], [ if test x"$withval" = xprefix; then HA_RSCTMPDIR=${prefix}; else HA_RSCTMPDIR="$withval"; fi ]) AC_ARG_ENABLE([libnet], [ --enable-libnet Use libnet for ARP based funcationality, [default=try]], [enable_libnet="$enableval"], [enable_libnet=try]) BUILD_RGMANAGER=0 BUILD_LINUX_HA=0 RASSET=all AC_ARG_WITH(ras-set, [ --with-ras-set=SET build/install only linux-ha or rgmanager resource-agents [default: all]], [ RASSET="$withval" ]) if test x$RASSET = xyes || test x$RASSET = xall ; then BUILD_RGMANAGER=1 BUILD_LINUX_HA=1 fi if test x$RASSET = xlinux-ha; then BUILD_LINUX_HA=1 fi if test x$RASSET = xrgmanager; then BUILD_RGMANAGER=1 fi if test $BUILD_LINUX_HA -eq 0 && test $BUILD_RGMANAGER -eq 0; then AC_MSG_ERROR([Are you really sure you want this package?]) exit 1 fi AM_CONDITIONAL(BUILD_LINUX_HA, test $BUILD_LINUX_HA -eq 1) AM_CONDITIONAL(BUILD_RGMANAGER, test $BUILD_RGMANAGER -eq 1) dnl =============================================== dnl General Processing dnl =============================================== INIT_EXT="" echo Our Host OS: $host_os/$host AC_MSG_NOTICE(Sanitizing prefix: ${prefix}) case $prefix in NONE) prefix=/usr;; esac AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix}) case $exec_prefix in dnl For consistency with Heartbeat, map NONE->$prefix NONE) exec_prefix=$prefix;; prefix) exec_prefix=$prefix;; esac AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR}) case $INITDIR in prefix) INITDIR=$prefix;; "") AC_MSG_CHECKING(which init (rc) directory to use) for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \ /usr/local/etc/rc.d /etc/rc.d do if test -d $initdir then INITDIR=$initdir break fi done if test -z $INITDIR then INITDIR=${sysconfdir}/init.d fi AC_MSG_RESULT($INITDIR);; esac AC_SUBST(INITDIR) if test "${prefix}" = "/usr"; then INITDIRPREFIX="$INITDIR" else INITDIRPREFIX="${prefix}/$INITDIR" fi AC_SUBST(INITDIRPREFIX) AC_MSG_NOTICE(Sanitizing libdir: ${libdir}) case $libdir in dnl For consistency with Heartbeat, map NONE->$prefix *prefix*|NONE) AC_MSG_CHECKING(which lib directory to use) for aDir in lib64 lib do trydir="${exec_prefix}/${aDir}" if test -d ${trydir} then libdir=${trydir} break fi done AC_MSG_RESULT($libdir); ;; esac dnl Expand autoconf variables so that we dont end up with '${prefix}' dnl in #defines and python scripts dnl NOTE: Autoconf deliberately leaves them unexpanded to allow dnl make exec_prefix=/foo install dnl No longer being able to do this seems like no great loss to me... eval prefix="`eval echo ${prefix}`" eval exec_prefix="`eval echo ${exec_prefix}`" eval bindir="`eval echo ${bindir}`" eval sbindir="`eval echo ${sbindir}`" eval libexecdir="`eval echo ${libexecdir}`" eval datadir="`eval echo ${datadir}`" eval sysconfdir="`eval echo ${sysconfdir}`" eval sharedstatedir="`eval echo ${sharedstatedir}`" eval localstatedir="`eval echo ${localstatedir}`" eval libdir="`eval echo ${libdir}`" eval includedir="`eval echo ${includedir}`" eval oldincludedir="`eval echo ${oldincludedir}`" eval infodir="`eval echo ${infodir}`" eval mandir="`eval echo ${mandir}`" dnl docdir is a recent addition to autotools eval docdir="`eval echo ${docdir}`" if test "x$docdir" = "x"; then docdir="`eval echo ${datadir}/doc`" fi AC_SUBST(docdir) dnl Home-grown variables eval INITDIR="${INITDIR}" for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ sharedstatedir localstatedir libdir includedir oldincludedir infodir \ mandir INITDIR docdir do dirname=`eval echo '${'${j}'}'` if test ! -d "$dirname" then AC_MSG_WARN([$j directory ($dirname) does not exist!]) fi done dnl This OS-based decision-making is poor autotools practice; dnl feature-based mechanisms are strongly preferred. dnl dnl So keep this section to a bare minimum; regard as a "necessary evil". REBOOT_OPTIONS="-f" POWEROFF_OPTIONS="-f" case "$host_os" in *bsd*) LIBS="-L/usr/local/lib" CPPFLAGS="$CPPFLAGS -I/usr/local/include" INIT_EXT=".sh" ;; *solaris*) REBOOT_OPTIONS="-n" POWEROFF_OPTIONS="-n" ;; *linux*) AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform) POWEROFF_OPTIONS="-nf" REBOOT_OPTIONS="-nf" ;; darwin*) AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform) LIBS="$LIBS -L${prefix}/lib" CFLAGS="$CFLAGS -I${prefix}/include" ;; esac AC_SUBST(INIT_EXT) AC_DEFINE_UNQUOTED(HA_LOG_FACILITY, LOG_DAEMON, Default logging facility) AC_MSG_NOTICE(Host CPU: $host_cpu) case "$host_cpu" in ppc64|powerpc64) case $CFLAGS in *powerpc64*) ;; *) if test "$GCC" = yes; then CFLAGS="$CFLAGS -m64" fi ;; esac esac AC_MSG_CHECKING(which format is needed to print uint64_t) case "$host_cpu" in s390x)U64T="%lu";; *64*) U64T="%lu";; *) U64T="%llu";; esac AC_MSG_RESULT($U64T) AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t) dnl Variables needed for substitution AC_CHECK_HEADERS(heartbeat/glue_config.h) if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then OCF_ROOT_DIR=`extract_header_define heartbeat/glue_config.h OCF_ROOT_DIR` else enable_libnet=no fi AC_DEFINE_UNQUOTED(OCF_ROOT_DIR,"$OCF_ROOT_DIR", OCF root directory - specified by the OCF standard) AC_SUBST(OCF_ROOT_DIR) GLUE_STATE_DIR=${localstatedir}/run AC_DEFINE_UNQUOTED(GLUE_STATE_DIR,"$GLUE_STATE_DIR", Where to keep state files and sockets) AC_SUBST(GLUE_STATE_DIR) AC_DEFINE_UNQUOTED(HA_VARRUNDIR,"$GLUE_STATE_DIR", Where Heartbeat keeps state files and sockets - old name) HA_VARRUNDIR="$GLUE_STATE_DIR" AC_SUBST(HA_VARRUNDIR) # Expand $prefix eval HA_RSCTMPDIR="`eval echo ${HA_RSCTMPDIR}`" AC_DEFINE_UNQUOTED(HA_RSCTMPDIR,"$HA_RSCTMPDIR", Where Resouce agents keep state files) AC_SUBST(HA_RSCTMPDIR) dnl Eventually move out of the heartbeat dir tree and create symlinks when needed HA_VARLIBHBDIR=${localstatedir}/lib/heartbeat AC_DEFINE_UNQUOTED(HA_VARLIBHBDIR,"$HA_VARLIBHBDIR", Whatever this used to mean) AC_SUBST(HA_VARLIBHBDIR) OCF_RA_DIR="${OCF_ROOT_DIR}/resource.d/" AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs) AC_SUBST(OCF_RA_DIR) if test "${prefix}" = "/usr"; then OCF_RA_DIR_PREFIX="$OCF_RA_DIR" else OCF_RA_DIR_PREFIX="${prefix}/$OCF_RA_DIR" fi AC_SUBST(OCF_RA_DIR_PREFIX) OCF_LIB_DIR="${OCF_ROOT_DIR}/lib/" AC_DEFINE_UNQUOTED(OCF_LIB_DIR,"$OCF_LIB_DIR", Location for shared code for OCF RAs) AC_SUBST(OCF_LIB_DIR) if test "${prefix}" = "/usr"; then OCF_LIB_DIR_PREFIX="$OCF_LIB_DIR" else OCF_LIB_DIR_PREFIX="${prefix}/$OCF_LIB_DIR" fi AC_SUBST(OCF_LIB_DIR_PREFIX) dnl =============================================== dnl rgmanager ras bits dnl =============================================== LOGDIR=${localstatedir}/log/cluster CLUSTERDATA=${datadir}/cluster AC_SUBST([LOGDIR]) AC_SUBST([CLUSTERDATA]) dnl =============================================== dnl Program Paths dnl =============================================== PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin" export PATH AM_PATH_PYTHON AC_CHECK_PROGS(MAKE, gmake make) AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh) AC_PATH_PROGS(SCP, scp, /usr/bin/scp) AC_PATH_PROGS(TAR, tar) AC_PATH_PROGS(MD5, md5) AC_PATH_PROGS(TEST, test) AC_PATH_PROGS(PING, ping, /bin/ping) AC_PATH_PROGS(IFCONFIG, ifconfig, /sbin/ifconfig) AC_PATH_PROGS(MAILCMD, mailx mail, mail) AC_PATH_PROGS(EGREP, egrep) AC_PATH_PROGS(PKGCONFIG, pkg-config) AC_SUBST(MAILCMD) AC_SUBST(EGREP) AC_SUBST(SHELL) AC_SUBST(PING) AC_SUBST(TEST) AC_PATH_PROGS(ROUTE, route) AC_DEFINE_UNQUOTED(ROUTE, "$ROUTE", path to route command) AC_MSG_CHECKING(ifconfig option to list interfaces) for IFCONFIG_A_OPT in "-A" "-a" "" do $IFCONFIG $IFCONFIG_A_OPT > /dev/null 2>&1 if test "$?" = 0 then AC_DEFINE_UNQUOTED(IFCONFIG_A_OPT, "$IFCONFIG_A_OPT", option for ifconfig command) AC_MSG_RESULT($IFCONFIG_A_OPT) break fi done AC_SUBST(IFCONFIG_A_OPT) if test x"${MAKE}" = x""; then AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE}) fi dnl =============================================== dnl Libraries dnl =============================================== AC_CHECK_LIB(socket, socket) AC_CHECK_LIB(gnugetopt, getopt_long) dnl if available if test x"${PKGCONFIG}" = x""; then AC_MSG_ERROR(You need pkgconfig installed in order to build ${PACKAGE}) fi if test "x${enable_thread_safe}" = "xyes"; then GPKGNAME="gthread-2.0" else GPKGNAME="glib-2.0" fi if $PKGCONFIG --exists $GPKGNAME then GLIBCONFIG="$PKGCONFIG $GPKGNAME" else set -x echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH $PKGCONFIG --exists $GPKGNAME; echo $? $PKGCONFIG --cflags $GPKGNAME; echo $? $PKGCONFIG $GPKGNAME; echo $? set +x AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE}) fi AC_MSG_RESULT(using $GLIBCONFIG) if test "X$GLIBCONFIG" != X; then AC_MSG_CHECKING(for special glib includes: ) GLIBHEAD=`$GLIBCONFIG --cflags` AC_MSG_RESULT($GLIBHEAD) CPPFLAGS="$CPPFLAGS $GLIBHEAD" AC_MSG_CHECKING(for glib library flags) GLIBLIB=`$GLIBCONFIG --libs` AC_MSG_RESULT($GLIBLIB) LIBS="$LIBS $GLIBLIB" fi dnl ======================================================================== dnl Headers dnl ======================================================================== AC_HEADER_STDC AC_CHECK_HEADERS(sys/socket.h) AC_CHECK_HEADERS(sys/sockio.h) AC_CHECK_HEADERS([arpa/inet.h]) AC_CHECK_HEADERS([fcntl.h]) AC_CHECK_HEADERS([limits.h]) AC_CHECK_HEADERS([malloc.h]) AC_CHECK_HEADERS([netdb.h]) AC_CHECK_HEADERS([netinet/in.h]) AC_CHECK_HEADERS([sys/file.h]) AC_CHECK_HEADERS([sys/ioctl.h]) AC_CHECK_HEADERS([sys/param.h]) AC_CHECK_HEADERS([sys/time.h]) AC_CHECK_HEADERS([syslog.h]) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_FUNC_FORK AC_FUNC_STRNLEN AC_CHECK_FUNCS([alarm gettimeofday inet_ntoa memset mkdir socket uname]) AC_CHECK_FUNCS([strcasecmp strchr strdup strerror strrchr strspn strstr strtol strtoul]) dnl 'reboot()' system call: one argument (e.g. Linux) or two (e.g. Solaris)? dnl AC_CACHE_CHECK([number of arguments in reboot system call], ac_cv_REBOOT_ARGS,[ AC_TRY_COMPILE( [#include ], [(void)reboot(0);], ac_cv_REBOOT_ARGS=1, [AC_TRY_COMPILE( [#include ], [(void)reboot(0,(void *)0);], ac_cv_REBOOT_ARGS=2, ac_cv_REBOOT_ARGS=0 )], ac_cv_REBOOT_ARGS=0 ) ] ) dnl Argument count of 0 suggests no known 'reboot()' call. if test "$ac_cv_REBOOT_ARGS" -ge "1"; then AC_DEFINE_UNQUOTED(REBOOT_ARGS,$ac_cv_REBOOT_ARGS,[number of arguments for reboot system call]) fi AC_PATH_PROGS(REBOOT, reboot, /sbin/reboot) AC_SUBST(REBOOT) AC_SUBST(REBOOT_OPTIONS) AC_DEFINE_UNQUOTED(REBOOT, "$REBOOT", path to the reboot command) AC_DEFINE_UNQUOTED(REBOOT_OPTIONS, "$REBOOT_OPTIONS", reboot options) AC_PATH_PROGS(POWEROFF_CMD, poweroff, /sbin/poweroff) AC_SUBST(POWEROFF_CMD) AC_SUBST(POWEROFF_OPTIONS) AC_DEFINE_UNQUOTED(POWEROFF_CMD, "$POWEROFF_CMD", path to the poweroff command) AC_DEFINE_UNQUOTED(POWEROFF_OPTIONS, "$POWEROFF_OPTIONS", poweroff options) AC_PATH_PROGS(XSLTPROC, xsltproc) AM_CONDITIONAL(BUILD_DOC, test "x$XSLTPROC" != "x" ) if test "x$XSLTPROC" = "x"; then AC_MSG_WARN([xsltproc not installed, unable to (re-)build manual pages]) fi AC_SUBST(XSLTPROC) AC_PATH_PROGS(POD2MAN, pod2man) AM_CONDITIONAL(BUILD_POD_DOC, test "x$POD2MAN" != "x" ) if test "x$POD2MAN" = "x"; then AC_MSG_WARN([pod2man not installed, unable to (re-)build ldirector manual page]) fi AC_SUBST(POD2MAN) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function])) dnl ======================================================================== dnl sfex dnl ======================================================================== build_sfex=no case $host_os in *Linux*|*linux*) if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then build_sfex=yes fi ;; esac AM_CONDITIONAL(BUILD_SFEX, test "$build_sfex" = "yes" ) dnl ======================================================================== dnl tickle (needs port to BSD platforms) dnl ======================================================================== AC_CHECK_MEMBERS([struct iphdr.saddr],,,[[#include ]]) AM_CONDITIONAL(BUILD_TICKLE, test "$ac_cv_member_struct_iphdr_saddr" = "yes" ) dnl ======================================================================== dnl libnet dnl ======================================================================== libnet="" libnet_version="none" LIBNETLIBS="" LIBNETDEFINES="" AC_MSG_CHECKING(if libnet is required) libnet_fatal=$enable_libnet case $enable_libnet in no) ;; yes|libnet10|libnet11|10|11) libnet_fatal=yes;; try) case $host_os in *Linux*|*linux*) libnet_fatal=no;; *) libnet_fatal=yes;; dnl legacy behavior esac ;; *) libnet_fatal=yes; enable_libnet=try;; esac AC_MSG_RESULT($libnet_fatal) if test "x$enable_libnet" != "xno"; then AC_PATH_PROGS(LIBNETCONFIG, libnet-config) AC_CHECK_LIB(nsl, t_open) dnl -lnsl AC_CHECK_LIB(socket, socket) dnl -lsocket AC_CHECK_LIB(net, libnet_get_hwaddr, LIBNETLIBS=" -lnet", []) fi AC_MSG_CHECKING(for libnet) if test "x$LIBNETLIBS" != "x" -o "x$enable_libnet" = "xlibnet11"; then LIBNETDEFINES="" if test "$ac_cv_lib_nsl_t_open" = yes; then LIBNETLIBS="-lnsl $LIBNETLIBS" fi if test "$ac_cv_lib_socket_socket" = yes; then LIBNETLIBS="-lsocket $LIBNETLIBS" fi libnet=net libnet_version="libnet1.1" fi if test "x$enable_libnet" = "xtry" -o "x$enable_libnet" = "xlibnet10"; then if test "x$LIBNETLIBS" = x -a "x${LIBNETCONFIG}" != "x" ; then LIBNETDEFINES="`$LIBNETCONFIG --defines` `$LIBNETCONFIG --cflags`"; LIBNETLIBS="`$LIBNETCONFIG --libs`"; libnet_version="libnet1.0 (old)" case $LIBNETLIBS in *-l*) libnet=`echo $LIBNETLIBS | sed 's%.*-l%%'`;; *) libnet_version=none;; esac CPPFLAGS="$CPPFLAGS $LIBNETDEFINES" AC_CHECK_HEADERS(libnet.h) if test "$ac_cv_header_libnet_h" = no; then libnet_version=none fi fi fi AC_MSG_RESULT(found $libnet_version) if test "$libnet_version" = none; then LIBNETLIBS="" LIBNETDEFINES="" if test $libnet_fatal = yes; then AC_MSG_ERROR(libnet not found) fi else AC_CHECK_LIB($libnet,libnet_init, [new_libnet=yes; AC_DEFINE(HAVE_LIBNET_1_1_API, 1, Libnet 1.1 API)], [new_libnet=no; AC_DEFINE(HAVE_LIBNET_1_0_API, 1, Libnet 1.0 API)],$LIBNETLIBS) AC_SUBST(LIBNETLIBS) fi if test "$new_libnet" = yes; then AC_MSG_CHECKING(for libnet API 1.1.4: ) save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -fgnu89-inline -Wall -Werror" AC_COMPILE_IFELSE([#include int main(){libnet_t *l=NULL; libnet_pblock_record_ip_offset(l, l->total_size); return(0); }], [AC_MSG_RESULT(no)], [AC_DEFINE(HAVE_LIBNET_1_1_4_API, 1, Libnet 1.1.4 API) AC_MSG_RESULT(yes)]) CFLAGS="$save_CFLAGS" fi sendarp_linux=0 case $host_os in *Linux*|*linux*) sendarp_linux=1;; esac AC_SUBST(LIBNETLIBS) AC_SUBST(LIBNETDEFINES) AM_CONDITIONAL(SENDARP_LINUX, test $sendarp_linux = 1 ) AM_CONDITIONAL(USE_LIBNET, test "x$libnet_version" != "xnone" ) dnl ************************************************************************ dnl * Check for netinet/icmp6.h to enable the IPv6addr resource agent AC_CHECK_HEADERS(netinet/icmp6.h,[],[],[#include ]) AM_CONDITIONAL(USE_IPV6ADDR, test "$ac_cv_header_netinet_icmp6_h" = yes ) dnl ======================================================================== dnl Compiler flags dnl ======================================================================== dnl Make sure that CFLAGS is not exported. If the user did dnl not have CFLAGS in their environment then this should have dnl no effect. However if CFLAGS was exported from the user's dnl environment, then the new CFLAGS will also be exported dnl to sub processes. CC_ERRORS="" CC_EXTRAS="" if export | fgrep " CFLAGS=" > /dev/null; then export -n CFLAGS || true # We don't want to bomb out if this fails SAVED_CFLAGS="$CFLAGS" unset CFLAGS CFLAGS="$SAVED_CFLAGS" unset SAVED_CFLAGS fi if test "$GCC" != yes; then CFLAGS="$CFLAGS -g" enable_fatal_warnings=no else CFLAGS="$CFLAGS -ggdb3" # We had to eliminate -Wnested-externs because of libtool changes # Also remove -Waggregate-return because we use one libnet # call which returns a struct EXTRA_FLAGS="-fgnu89-inline -fstack-protector-all -Wall -Wbad-function-cast -Wcast-qual -Wcast-align -Wdeclaration-after-statement -Wendif-labels -Wfloat-equal -Wformat=2 -Wformat-security -Wformat-nonliteral -Winline -Wmissing-prototypes -Wmissing-declarations -Wmissing-format-attribute -Wnested-externs -Wno-long-long -Wno-strict-aliasing -Wpointer-arith -Wstrict-prototypes -Wunsigned-char -Wwrite-strings" # Additional warnings it might be nice to enable one day # -Wshadow # -Wunreachable-code for j in $EXTRA_FLAGS do if cc_supports_flag $j then CC_EXTRAS="$CC_EXTRAS $j" fi done dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'` AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4) dnl System specific options case "$host_os" in *linux*|*bsd*) if test "${enable_fatal_warnings}" = "unknown"; then enable_fatal_warnings=yes fi ;; esac if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then enable_fatal_warnings=yes else enable_fatal_warnings=no fi if test "x${enable_ansi}" != xno && cc_supports_flag -std=iso9899:199409 ; then AC_MSG_NOTICE(Enabling ANSI Compatibility) CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY" fi AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS}) fi CFLAGS="$CFLAGS $CC_EXTRAS" NON_FATAL_CFLAGS="$CFLAGS" AC_SUBST(NON_FATAL_CFLAGS) dnl dnl We reset CFLAGS to include our warnings *after* all function dnl checking goes on, so that our warning flags don't keep the dnl AC_*FUNCS() calls above from working. In particular, -Werror will dnl *always* cause us troubles if we set it before here. dnl dnl if test "x${enable_fatal_warnings}" = xyes ; then AC_MSG_NOTICE(Enabling Fatal Warnings) CFLAGS="$CFLAGS -Werror" fi AC_SUBST(CFLAGS) dnl This is useful for use in Makefiles that need to remove one specific flag CFLAGS_COPY="$CFLAGS" AC_SUBST(CFLAGS_COPY) AC_SUBST(LOCALE) AC_SUBST(CC) AC_SUBST(MAKE) dnl The Makefiles and shell scripts we output AC_CONFIG_FILES(Makefile \ include/Makefile \ heartbeat/Makefile \ heartbeat/ocf-binaries \ heartbeat/ocf-directories \ heartbeat/ocf-shellfuncs \ heartbeat/shellfuncs \ tools/Makefile \ tools/ocf-tester \ tools/ocft/Makefile \ tools/ocft/ocft \ tools/ocft/caselib \ tools/ocft/README \ tools/ocft/README.zh_CN \ ldirectord/Makefile \ ldirectord/ldirectord \ ldirectord/init.d/Makefile \ ldirectord/init.d/ldirectord \ ldirectord/init.d/ldirectord.debian \ ldirectord/init.d/ldirectord.debian.default \ ldirectord/logrotate.d/Makefile \ ldirectord/OCF/Makefile \ ldirectord/OCF/ldirectord \ doc/Makefile \ + doc/man/Makefile \ rgmanager/Makefile \ rgmanager/src/Makefile \ rgmanager/src/resources/Makefile \ rgmanager/src/resources/utils/Makefile \ ) dnl Now process the entire list of files added by previous dnl calls to AC_CONFIG_FILES() AC_OUTPUT() dnl ***************** dnl Configure summary dnl ***************** AC_MSG_RESULT([]) AC_MSG_RESULT([$PACKAGE configuration:]) AC_MSG_RESULT([ Version = ${VERSION}]) AC_MSG_RESULT([ Build Version = $Format:%H$]) AC_MSG_RESULT([ Features =${PKG_FEATURES}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ Prefix = ${prefix}]) AC_MSG_RESULT([ Executables = ${sbindir}]) AC_MSG_RESULT([ Man pages = ${mandir}]) AC_MSG_RESULT([ Libraries = ${libdir}]) AC_MSG_RESULT([ Header files = ${includedir}]) AC_MSG_RESULT([ Arch-independent files = ${datadir}]) AC_MSG_RESULT([ Documentation = ${docdir}]) AC_MSG_RESULT([ State information = ${localstatedir}]) AC_MSG_RESULT([ System configuration = ${sysconfdir}]) AC_MSG_RESULT([ RA state files = ${HA_RSCTMPDIR}]) AC_MSG_RESULT([ AIS Plugins = ${LCRSODIR}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ CFLAGS = ${CFLAGS}]) AC_MSG_RESULT([ Libraries = ${LIBS}]) AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) diff --git a/doc/Makefile.am b/doc/Makefile.am index 3801a16e3..0f06b2fa4 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,162 +1,25 @@ # # doc: Linux-HA resource agents # # Copyright (C) 2009 Florian Haas # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # +SUBDIRS = man MAINTAINERCLEANFILES = Makefile.in -EXTRA_DIST = $(doc_DATA) $(REFENTRY_STYLESHEET) \ - mkappendix.sh ralist.sh - -CLEANFILES = $(man_MANS) $(xmlfiles) metadata-*.xml - doc_DATA = README.webapps - -STYLESHEET_PREFIX ?= http://docbook.sourceforge.net/release/xsl/current -MANPAGES_STYLESHEET ?= $(STYLESHEET_PREFIX)/manpages/docbook.xsl -HTML_STYLESHEET ?= $(STYLESHEET_PREFIX)/xhtml/docbook.xsl -FO_STYLESHEET ?= $(STYLESHEET_PREFIX)/fo/docbook.xsl -REFENTRY_STYLESHEET ?= ra2refentry.xsl - -XSLTPROC_OPTIONS ?= --xinclude -XSLTPROC_MANPAGES_OPTIONS ?= $(XSLTPROC_OPTIONS) -XSLTPROC_HTML_OPTIONS ?= $(XSLTPROC_OPTIONS) -XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS) - -radir = $(top_srcdir)/heartbeat - -# OCF_ROOT=. is necessary due to a sanity check in ocf-shellfuncs -# (which tests whether $OCF_ROOT points to a directory -metadata-%.xml: $(radir)/% - OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ - -metadata-IPv6addr.xml: ../heartbeat/IPv6addr - OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ - -# Please note: we can't name the man pages -# ocf:heartbeat:. Believe me, I've tried. It looks like it -# works, but then it doesn't. While make can deal correctly with -# colons in target names (when properly escaped), it royally messes up -# when it is deals with _dependencies_ that contain colons. See Bug -# 12126 on savannah.gnu.org. But, maybe it gets fixed soon, it was -# first reported in 1995 and added to Savannah in in 2005... -if BUILD_DOC -man_MANS = ocf_heartbeat_AoEtarget.7 \ - ocf_heartbeat_AudibleAlarm.7 \ - ocf_heartbeat_ClusterMon.7 \ - ocf_heartbeat_CTDB.7 \ - ocf_heartbeat_Delay.7 \ - ocf_heartbeat_Dummy.7 \ - ocf_heartbeat_EvmsSCC.7 \ - ocf_heartbeat_Evmsd.7 \ - ocf_heartbeat_Filesystem.7 \ - ocf_heartbeat_ICP.7 \ - ocf_heartbeat_IPaddr.7 \ - ocf_heartbeat_IPaddr2.7 \ - ocf_heartbeat_IPsrcaddr.7 \ - ocf_heartbeat_LVM.7 \ - ocf_heartbeat_LinuxSCSI.7 \ - ocf_heartbeat_MailTo.7 \ - ocf_heartbeat_ManageRAID.7 \ - ocf_heartbeat_ManageVE.7 \ - ocf_heartbeat_Pure-FTPd.7 \ - ocf_heartbeat_Raid1.7 \ - ocf_heartbeat_Route.7 \ - ocf_heartbeat_SAPDatabase.7 \ - ocf_heartbeat_SAPInstance.7 \ - ocf_heartbeat_SendArp.7 \ - ocf_heartbeat_ServeRAID.7 \ - ocf_heartbeat_SphinxSearchDaemon.7 \ - ocf_heartbeat_Squid.7 \ - ocf_heartbeat_Stateful.7 \ - ocf_heartbeat_SysInfo.7 \ - ocf_heartbeat_VIPArip.7 \ - ocf_heartbeat_VirtualDomain.7 \ - ocf_heartbeat_WAS.7 \ - ocf_heartbeat_WAS6.7 \ - ocf_heartbeat_WinPopup.7 \ - ocf_heartbeat_Xen.7 \ - ocf_heartbeat_Xinetd.7 \ - ocf_heartbeat_anything.7 \ - ocf_heartbeat_apache.7 \ - ocf_heartbeat_asterisk.7 \ - ocf_heartbeat_conntrackd.7 \ - ocf_heartbeat_db2.7 \ - ocf_heartbeat_drbd.7 \ - ocf_heartbeat_eDir88.7 \ - ocf_heartbeat_ethmonitor.7 \ - ocf_heartbeat_exportfs.7 \ - ocf_heartbeat_fio.7 \ - ocf_heartbeat_iSCSILogicalUnit.7 \ - ocf_heartbeat_iSCSITarget.7 \ - ocf_heartbeat_ids.7 \ - ocf_heartbeat_iscsi.7 \ - ocf_heartbeat_jboss.7 \ - ocf_heartbeat_lxc.7 \ - ocf_heartbeat_mysql.7 \ - ocf_heartbeat_mysql-proxy.7 \ - ocf_heartbeat_named.7 \ - ocf_heartbeat_nfsserver.7 \ - ocf_heartbeat_nginx.7 \ - ocf_heartbeat_oracle.7 \ - ocf_heartbeat_oralsnr.7 \ - ocf_heartbeat_pgsql.7 \ - ocf_heartbeat_pingd.7 \ - ocf_heartbeat_portblock.7 \ - ocf_heartbeat_postfix.7 \ - ocf_heartbeat_proftpd.7 \ - ocf_heartbeat_rsyncd.7 \ - ocf_heartbeat_rsyslog.7 \ - ocf_heartbeat_scsi2reservation.7 \ - ocf_heartbeat_sfex.7 \ - ocf_heartbeat_slapd.7 \ - ocf_heartbeat_symlink.7 \ - ocf_heartbeat_syslog-ng.7 \ - ocf_heartbeat_tomcat.7 \ - ocf_heartbeat_vmware.7 - -if USE_IPV6ADDR -man_MANS += ocf_heartbeat_IPv6addr.7 -endif - -xmlfiles = $(man_MANS:.7=.xml) - -%.1 %.5 %.7 %.8: %.xml - $(XSLTPROC) \ - $(XSLTPROC_MANPAGES_OPTIONS) \ - $(MANPAGES_STYLESHEET) $< - -ocf_heartbeat_%.xml: metadata-%.xml $(srcdir)/$(REFENTRY_STYLESHEET) - $(XSLTPROC) --novalid \ - --stringparam package $(PACKAGE_NAME) \ - --stringparam version $(VERSION) \ - --output $@ \ - $(srcdir)/$(REFENTRY_STYLESHEET) $< - -ocf_resource_agents.xml: $(xmlfiles) mkappendix.sh - ./mkappendix.sh $(xmlfiles) > $@ - -%.html: %.xml - $(XSLTPROC) \ - $(XSLTPROC_HTML_OPTIONS) \ - --output $@ \ - $(HTML_STYLESHEET) $< - -xml: ocf_resource_agents.xml -endif diff --git a/doc/dev-guides/ra-dev-guide-docinfo.xml b/doc/dev-guides/ra-dev-guide-docinfo.xml new file mode 100644 index 000000000..9e5113583 --- /dev/null +++ b/doc/dev-guides/ra-dev-guide-docinfo.xml @@ -0,0 +1,89 @@ + + Florian + Haas + + hastexo + + florian.haas@hastexo.com + + + John + Shi + + SUSE + + Original ocft README + jshi@suse.com + + + Dejan + Muhamedagic + + SUSE + + ocft documentation rewrite + dmuhamedagic@suse.com + + + 2010 + 2011 + + LINBIT HA-Solutions GmbH + + + + 2011 + + Novell, Inc. + + + + 2011 + + SUSE Linux GmbH + + + + 2011 + + hastexo Professional Services GmbH + + + + License information + The text of and illustrations in this document are licensed + under a Creative Commons Attribution–Share Alike 3.0 Unported + license ("CC-BY-SA"). + + + A summary of CC-BY-SA is available at . + + + The full license text is available at . + + + In accordance with CC-BY-SA, if you distribute this document + or an adaptation of it, you must provide the URL for the original + version. + + + + + + 1.0.2 + November 16, 2011 + FGH + + + 1.0.1 + January 3, 2011 + FGH + + + 1.0.0 + December 13, 2010 + FGH + + diff --git a/doc/dev-guides/ra-dev-guide.txt b/doc/dev-guides/ra-dev-guide.txt new file mode 100644 index 000000000..cf828726d --- /dev/null +++ b/doc/dev-guides/ra-dev-guide.txt @@ -0,0 +1,1945 @@ += The OCF Resource Agent Developer's Guide + +== Introduction + +This document is to serve as a guide and reference for all developers, +maintainers, and contributors working on OCF (Open Cluster Framework) +compliant cluster resource agents. It explains the anatomy and general +functionality of a resource agent, illustrates the resource agent API, +and provides valuable hints and tips to resource agent authors. + +=== What is a resource agent? + +A resource agent is an executable that manages a cluster resource. No +formal definition of a cluster resource exists, other than "anything a +cluster manages is a resource." Cluster resources can be as diverse as +IP addresses, file systems, database services, and entire virtual +machines -- to name just a few examples. + +=== Who or what uses a resource agent? + +Any Open Cluster Framework (OCF) compliant cluster management +application is capable of managing resources using the resource agents +described in this document. At the time of writing, two OCF compliant +cluster management applications exist for the Linux platform: + +* _Pacemaker_, a cluster manager supporting both the Corosync and + Heartbeat cluster messaging frameworks. Pacemaker evolved out of the + Linux-HA project. +* _RGmanager_, the cluster manager bundled in Red Hat Cluster + Suite. It supports the Corosync cluster messaging framework + exclusively. + +=== Which language is a resource agent written in? + +An OCF compliant resource agent can be implemented in _any_ +programming language. The API is not language specific. However, most +resource agents are implemented as shell scripts, which is why this +guide primarily uses example code written in shell language. + +== API definitions + +=== Environment variables + +A resource agent receives all configuration information about the +resource it manages via environment variables. The names of these +environment variables are always the name of the resource parameter, +prefixed with +OCF_RESKEY_+. For example, if the resource has an +ip+ +parameter set to +192.168.1.1+, then the resource agent will have +access to an environment variable +OCF_RESKEY_ip+ holding that value. + +For any resource parameter that is not required to be set by the user +-- that is, its parameter definition in the resource agent metadata +does not specify +required="true"+ -- then the resource agent must + +* Provide a reasonable default. This should be advertised in the + metadata. By convention, the resource agent uses a variable named + +OCF_RESKEY__default+ that holds this default. +* Alternatively, cater correctly for the value being empty. + +In addition, the cluster manager may also support _meta_ resource +parameters. These do not apply directly to the resource configuration, +but rather specify _how_ the cluster resource manager is expected to manage +the resource. For example, the Pacemaker cluster manager uses the ++target-role+ meta parameter to specify whether the resource should be +started or stopped. + +Meta parameters are passed into the resource agent in the ++OCF_RESKEY_CRM_meta_+ namespace, with any hypens converted to +underscores. Thus, the +target-role+ attribute maps to an environment +variable named +OCF_RESKEY_CRM_meta_target_role+. + +=== Actions + +Any resource agent must support one command-line argument which +specifies the action the resource agent is about to execute. The +following actions must be supported by any resource agent: + +* +start+ -- starts the resource. +* +stop+ -- shuts down the resource. +* +monitor+ -- queries the resource for its state. +* +meta-data+ -- dumps the resource agent metadata. + +In addition, resource agents may optionally support the following +actions: + +* +promote+ -- turns a resource into the +Master+ role (Master/Slave + resources only). +* +demote+ -- turns a resource into the +Slave+ role (Master/Slave + resources only). +* +migrate_to+ and +migrate_from+ -- implement live migration of + resources. +* +validate-all+ -- validates a resource's configuration. +* +usage+ or +help+ -- displays a usage message when the resource + agent is invoked from the command line, rather than by the cluster + manager. +* +status+ -- historical (deprecated) synonym for +monitor+. + +=== Timeouts + +Action timeouts are enforced outside the resource agent proper. It is +the cluster manager's responsibility to monitor how long a resource +agent action has been running, and terminate it if it does not meet +its completion deadline. Thus, resource agents need not themselves +check for any timeout expiry. + +Resource agents can, however, _advise_ the user of sensible timeout +values (which, when correctly set, will be duly enforced by the +cluster manager). See <<_metadata,the following section>> for details +on how a resource agent advertises its suggested timeouts. + +=== Metadata + +Every resource agent must describe its own purpose and supported +parameters in a set of XML metadata. This metadata is used by cluster +management applications for on-line help, and resource agent man pages +are generated from it as well. The following is a fictitious set of +metadata from an imaginary resource agent: + +[source,xml] +-------------------------------------------------------------------------- + + + + 0.1 + +This is a fictitious example resource agent written for the +OCF Resource Agent Developers Guide. + + Example resource agent + for budding OCF RA developers + + + + Number of eggs, an example numeric parameter + + Number of eggs + + + + + Enable superfrobnication, an example boolean parameter + + Enable superfrobnication + + + + + Data directory, an example string parameter + + Data directory + + + + + + + + + + + + + + +-------------------------------------------------------------------------- + +The +resource-agent+ element, of which there must only be one per +resource agent, defines the resource agent +name+ and +version+. + +The +longdesc+ and +shortdesc+ elements in +resource-agent+ provide a +long and short description of the resource agent's +functionality. While +shortdesc+ is a one-line description of what +the resource agent does and is usually used in terse listings, ++longdesc+ should give a full-blown description of the resource agent +in as much detail as possible. + +The +parameters+ element describes the resource agent parameters, and +should hold any number of +parameter+ children -- one for each +parameter that the resource agent supports. + +Every +parameter+ should, like the +resource-agent+ as a whole, come +with a +shortdesc+ and a +longdesc+, and also a +content+ child that +describes the parameter's expected content. + +On the +content+ element, there may be four different attributes: + +* +type+ describes the parameter type (+string+, +integer+, or + +boolean+). If unset, +type+ defaults to +string+. + +* +required+ indicates whether setting the parameter is mandatory + (+required="true"+) or optional (+required="false"+). + +* For optional parameters, it is customary to provide a sensible + default via the +default+ attribute. + +* Finally, the +unique+ attribute (allowed values: +true+ or +false+) + indicates that a specific value must be unique across the cluster, + for this parameter of this particular resource type. For example, a + highly available floating IP address is declared +unique+ -- as that + one IP address should run only once throughout the cluster, avoiding + duplicates. + +The +actions+ list defines the actions that the resource agent +advertises as supported. + +Every +action+ should list its own +timeout+ value. This is a +hint to the user what _minimal_ timeout should be configured for the +action. This is meant to cater for the fact that some resources are +quick to start and stop (IP addresses or filesystems, for example), +some may take several minutes to do so (such as databases). + +In addition, recurring actions (such as +monitor+) should also specify +a recommended minimum +interval+, which is the time between two +consecutive invocations of the same action. Like +timeout+, this value +does not constitute a default -- it is merely a hint for the user +which action interval to configure, at minimum. + +== Return codes + +For any invocation, resource agents must exit with a defined return +code that informs the caller of the outcome of the invoked +action. The return codes are explained in detail in the following +subsections. + +=== +OCF_SUCCESS+ (0) + +The action completed successfully. This is the expected return code +for any successful +start+, +stop+, +promote+, +demote+, ++migrate_from+, +migrate_to+, +meta_data+, +help+, and +usage+ action. + +For +monitor+ (and its deprecated alias, +status+), however, a +modified convention applies: + +* For primitive (stateless) resources, +OCF_SUCCESS+ from +monitor+ + means that the resource is running. Non-running and gracefully + shut-down resources must instead return +OCF_NOT_RUNNING+. + +* For master/slave (stateful) resources, +OCF_SUCCESS+ from +monitor+ + means that the resource is running _in Slave mode_. Resources + running in Master mode must instead return +OCF_RUNNING_MASTER+, and + gracefully shut-down resources must instead return + +OCF_NOT_RUNNING+. + +=== +OCF_ERR_GENERIC+ (1) + +The action returned a generic error. A resource agent should use this +exit code only when none of the more specific error codes, defined +below, accurately describes the problem. + +The cluster resource manager interprets this exit code as a _soft_ +error. This means that unless specifically configured otherwise, the +resource manager will attempt to recover a resource which failed with ++OCF_ERR_GENERIC+ in-place -- usually by restarting the resource on +the same node. + +=== +OCF_ERR_ARGS+ (2) + +The resource agent was invoked with incorrect arguments. This is a +safety net "can't happen" error which the resource agent should only +return when invoked with, for example, an incorrect number of command +line arguments. + +NOTE: The resource agent should not return this error when instructed +to perform an action that it does not support. Instead, under those +circumstances, it should return +OCF_ERR_UNIMPLEMENTED+. + +=== +OCF_ERR_UNIMPLEMENTED+ (3) + +The resource agent was instructed to execute an action that the agent +does not implement. + +Not all resource agent actions are mandatory. +promote+, +demote+, ++migrate_to+, +migrate_from+, and +notify+, are all optional actions +which the resource agent may or may not implement. When a non-stateful +resource agent is misconfigured as a master/slave resource, for +example, then the resource agent should alert the user about this +misconfiguration by returning +OCF_ERR_UNIMPLEMENTED+ on the +promote+ +and +demote+ actions. + +=== +OCF_ERR_PERM+ (4) + +The action failed due to insufficient permissions. This may be due to +the agent not being able to open a certain file, to listen on a +specific socket, to write to a directory, or similar. + +The cluster resource manager interprets this exit code as a _hard_ +error. This means that unless specifically configured otherwise, the +resource manager will attempt to recover a resource which failed with +this error by restarting the resource on a different node (where the +permission problem may not exist). + +=== +OCF_ERR_INSTALLED+ (5) + +The action failed because a required component is missing on the node +where the action was executed. This may be due to a required binary +not being executable, or a vital configuration file being unreadable. + +The cluster resource manager interprets this exit code as a _hard_ +error. This means that unless specifically configured otherwise, the +resource manager will attempt to recover a resource which failed with +this error by restarting the resource on a different node (where the +required files or binaries may be present). + +=== +OCF_ERR_CONFIGURED+ (6) + +The action failed because the user misconfigured the resource. For +example, the user may have configured an alphanumeric string for a +parameter that really should be an integer. + +The cluster resource manager interprets this exit code as a _fatal_ +error. Since this is a configuration error that is present +cluster-wide, it would make no sense to recover such a resource on a +different node, let alone in-place. When a resource fails with this +error, the cluster manager will attempt to shut down the resource, and +wait for administrator intervention. + +=== +OCF_NOT_RUNNING+ (7) + +The resource was found not to be running. This is an exit code that +may be returned by the +monitor+ action exclusively. Note that this +implies that the resource has either _gracefully_ shut down, or has +never been started. + +If the resource is not running due to an error condition, the ++monitor+ action should instead return one of the +OCF_ERR_+ exit +codes or +OCF_FAILED_MASTER+. + +=== +OCF_RUNNING_MASTER+ (8) + +The resource was found to be running in the +Master+ role. This +applies only to stateful (Master/Slave) resources, and only to +their +monitor+ action. + +Note that there is no specific exit code for "running in slave +mode". This is because their is no functional distinction between a +primitive resource running normally, and a stateful resource running +as a slave. The +monitor+ action of a stateful resource running +normally in the +Slave+ role should simply return +OCF_SUCCESS+. + +=== +OCF_FAILED_MASTER+ (9) + +The resource was found to have failed in the +Master+ role. This +applies only to stateful (Master/Slave) resources, and only to their ++monitor+ action. + +The cluster resource manager interprets this exit code as a _soft_ +error. This means that unless specifically configured otherwise, the +resource manager will attempt to recover a resource which failed with ++$OCF_FAILED_MASTER+ in-place -- usually by demoting, stopping, +starting and then promoting the resource on the same node. + + +== Resource agent structure + +A typical (shell-based) resource agent contains standard structural +items, in the order as listed in this section. It describes the +expected behavior of a resource agent with respect to the various +actions it supports, using a fictitous resource agent named +foobar+ +as an example. + +=== Resource agent interpreter + +Any resource agent implemented as a script must specify its +interpreter using standard "shebang" (+#!+) header syntax. + +[source,bash] +-------------------------------------------------------------------------- +#!/bin/sh +-------------------------------------------------------------------------- + +If a resource agent is written in shell, specifying the generic shell +interpreter (+#!/bin/sh+) is generally preferred, though not +required. Resource agents declared as +/bin/sh+ compatible must not +use constructs native to a specific shell (such as, for example, ++${!variable}+ syntax native to +bash+). It is advisable to +occasionally run such resource agents through a sanitization utility +such as +checkbashisms+. + +It is considered a regression to introduce a patch that will make a +previously +sh+ compatible resource agent suitable only for +bash+, ++ksh+, or any other non-generic shell. It is, however, perfectly +acceptable for a new resource agent to explicitly define a specific +shell, such as +/bin/bash+, as its interpreter. + +=== Author and license information + +The resource agent should contain a comment listing the resource agent +author(s) and/or copyright holder(s), and stating the license that +applies to the resource agent: + +[source,bash] +-------------------------------------------------------------------------- +# +# Resource Agent for managing foobar resources. +# +# License: GNU General Public License (GPL) +# (c) 2008-2010 John Doe, Jane Roe, +# and Linux-HA contributors +-------------------------------------------------------------------------- + +When a resource agent refers to a license for which multiple versions +exist, it is assumed that the current version applies. + +=== Initialization + +Any shell resource agent should source the +.ocf-shellfuncs+ function +library. With the syntax below, this is done in terms of ++$OCF_FUNCTIONS_DIR+, which -- for testing purposes, and also for +generating documentation -- may be overridden from the command line. + +[source,bash] +-------------------------------------------------------------------------- +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat} +. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs +-------------------------------------------------------------------------- + +=== Functions implementing resource agent actions + +What follows next are the functions implementing the resource agent's +advertised actions. The individual actions are described in detail in +<<_resource_agent_actions>>. + +=== Execution block + +This is the part of the resource agent that actually executes when the +resource agent is invoked. It typically follows a fairly standard +structure: + +[source,bash] +-------------------------------------------------------------------------- +# Make sure meta-data and usage always succeed +case $__OCF_ACTION in +meta-data) foobar_meta_data + exit $OCF_SUCCESS + ;; +usage|help) foobar_usage + exit $OCF_SUCCESS + ;; +esac + +# Anything other than meta-data and usage must pass validation +foobar_validate || exit $? + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start) foobar_start;; +stop) foobar_stop;; +status|monitor) foobar_monitor;; +promote) foobar_promote;; +demote) foobar_demote;; +reload) ocf_log info "Reloading..." + foobar_start + ;; +validate-all) ;; +*) foobar_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" +exit $rc +-------------------------------------------------------------------------- + + +== Resource agent actions + +Each action is typically implemented in a separate function or method +in the resource agent. By convention, these are usually named ++_+, so the function implementing the +start+ action in ++foobar+ would be named +foobar_start()+. + +As a general rule, whenever the resource agent encounters an error +that it is not able to recover, it is permitted to immediately exit, +throw an exception, or otherwise cease execution. Examples for this +include configuration issues, missing binaries, permission problems, +etc. It is not necessary to pass these errors up the call stack. + +It is the cluster manager's responsibility to initiate the appropriate +recovery action based on the user's configuration. The resource agent +should not guess at said configuration. + +=== +start+ action + +When invoked with the +start+ action, the resource agent must start +the resource if it is not yet running. This means that the agent must +verify the resource's configuration, query its state, and then start +it only if it is not running. A common way of doing this would be to +invoke the +validate_all+ and +monitor+ function first, as in the +following example: + +[source,bash] +-------------------------------------------------------------------------- +foobar_start() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # if resource is already running, bail out early + if foobar_monitor; then + ocf_log info "Resource is already running" + return $OCF_SUCCESS + fi + + # actually start up the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ... + + # After the resource has been started, check whether it started up + # correctly. If the resource starts asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # start up within the defined timeout, the cluster manager will + # consider the start action failed + while ! foobar_monitor; do + ocf_log debug "Resource has not started yet, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + + +=== +stop+ action + +When invoked with the +stop+ action, the resource agent must stop the +resource, if it is running. This means that the agent must verify the +resource configuration, query its state, and then stop it only if it +is currently running. A common way of doing this would be to invoke +the +validate_all+ and +monitor+ function first. It is important to +understand that +stop+ is a force operation -- the resource agent must +do everything in its power to shut down, the resource, short of +rebooting the node or shutting it off. Consider the following example: + +[source,bash] +-------------------------------------------------------------------------- +foobar_stop() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + foobar_monitor + rc=$? + case "$rc" in + "$OCF_SUCCESS") + # Currently running. Normal, expected behavior. + ocf_log debug "Resource is currently running" + ;; + "$OCF_RUNNING_MASTER") + # Running as a Master. Need to demote before stopping. + ocf_log info "Resource is currently running as Master" + foobar_demote || \ + ocf_log warn "Demote failed, trying to stop anyway" + ;; + "$OCF_NOT_RUNNING") + # Currently not running. Nothing to do. + ocf_log info "Resource is already stopped" + return $OCF_SUCCESS + ;; + esac + + # actually shut down the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ... + + # After the resource has been stopped, check whether it shut down + # correctly. If the resource stops asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # shut down within the defined timeout, the cluster manager will + # consider the stop action failed + while foobar_monitor; do + ocf_log debug "Resource has not stopped yet, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS + +} +-------------------------------------------------------------------------- + +NOTE: The expected exit code for a successful stop operation is ++$OCF_SUCCESS+, _not_ +$OCF_NOT_RUNNING+. + +IMPORTANT: A failed stop operation is a potentially dangerous +situation which the cluster manager will almost invariably try to +resolve by means of node fencing. In other words, the cluster manager +will forcibly evict from the cluster a node on which a stop operation +has failed. While this measure serves ultimately to protect data, it +does cause disruption to applications and their users. Thus, a +resource agent should make sure that it exits with an error only if +all avenues for proper resource shutdown have been exhausted. + +=== +monitor+ action + +The +monitor+ action queries the current status of a resource. It must +discern between three different states: + +* resource is currently running (return +$OCF_SUCCESS+); +* resource has stopped gracefully (return +$OCF_NOT_RUNNING+); +* resource has run into a problem and must be considered failed + (return the appropriate +$OCF_ERR_+ code to indicate the nature of the + problem). + + +[source,bash] +-------------------------------------------------------------------------- +foobar_monitor() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + ocf_run frobnicate --test + + # This example assumes the following exit code convention + # for frobnicate: + # 0: running, and fully caught up with master + # 1: gracefully stopped + # any other: error + case "$?" in + 0) + rc=$OCF_SUCCESS + ocf_log debug "Resource is running" + ;; + 1) + rc=$OCF_NOT_RUNNING + ocf_log debug "Resource is not running" + ;; + *) + ocf_log err "Resource has failed" + exit $OCF_ERR_GENERIC + esac + + return $rc +} +-------------------------------------------------------------------------- + +Stateful (master/slave) resource agents may use a more elaborate +monitoring scheme where they can provide "hints" to the cluster +manager identifying which instance is best suited to assume the ++Master+ role. <<_specifying_a_master_preference>> explains the +details. + +NOTE: The cluster manager may invoke the +monitor+ action for a +_probe_, which is a test whether the resource is currently +running. Normally, the monitor operation would behave exactly the same +during a probe and a "real" monitor action. If a specific resource +does require special treatment for probes, however, the +ocf_is_probe+ +convenience function is available in the OCF shell functions library +for that purpose. + +=== +validate-all+ action + +The +validate-all+ action tests for correct resource agent +configuration and a working environment. +validate-all+ should exit +with one of the following return codes: + +* +$OCF_SUCCESS+ -- all is well, the configuration is valid and + usable. +* +$OCF_ERR_CONFIGURED+ -- the user has misconfigured the resource. +* +$OCF_ERR_INSTALLED+ -- the resource has possibly been configured + correctly, but a vital component is missing on the node where + +validate-all+ is being executed. +* +$OCF_ERR_PERM+ -- the resource is configured correctly and is not + missing any required components, but is suffering from a permission + issue (such as not being able to create a necessary file). + ++validate-all+ is usually wrapped in a function that is not only +called when explicitly invoking the corresponding action, but also -- +as a sanity check -- from just about any other function. Therefore, +the resource agent author must keep in mind that the function may be +invoked during the +start+, +stop+, and +monitor+ operations, and also +during probes. + +Probes pose a separate challenge for validation. During a probe (when +the cluster manager may expect the resource _not_ to be running on the +node where the probe is executed), some required components may be +_expected_ to not be available on the affected node. For example, this +includes any shared data on storage devices not available for reading +during the probe. The +validate-all+ function may thus need to treat +probes specially, using the +ocf_is_probe+ convenience function: + +[source,bash] +-------------------------------------------------------------------------- +foobar_validate_all() { + # Test for configuration errors first + if ! ocf_is_decimal $OCF_RESKEY_eggs; then + ocf_log err "eggs is not numeric!" + exit $OCF_ERR_CONFIGURED + fi + + # Test for required binaries + check_binary frobnicate + + # Check for data directory (this may be on shared storage, so + # disable this test during probes) + if ! ocf_is_probe; then + if ! [ -d $OCF_RESKEY_datadir ]; then + ocf_log err "$OCF_RESKEY_datadir does not exist or is not a directory!" + exit $OCF_ERR_INSTALLED + fi + fi + + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +=== +meta-data+ action + +The +meta-data+ action dumps the resource agent metadata to standard +output. The output must follow the metadata format as specified in +<<_metadata>>. + +[source,bash] +-------------------------------------------------------------------------- +foobar_meta_data { + cat < + + + 0.1 + +... +EOF +} +-------------------------------------------------------------------------- + +=== +promote+ action + +The +promote+ action is optional. It must only be supported by +_stateful_ resource agents, which means agents that discern between +two distinct _roles_: +Master+ and +Slave+. +Slave+ is functionally +identical to the +Started+ state in a stateless resource agent. Thus, +while a regular (stateless) resource agent only needs to implement ++start+ and +stop+, a stateful resource agent must also support the ++promote+ action to be able to make a transition between the +Started+ +(+Slave+) and +Master+ roles. + +[source,bash] +-------------------------------------------------------------------------- +foobar_promote() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # test the resource's current state + foobar_monitor + rc=$? + case "$rc" in + "$OCF_SUCCESS") + # Running as slave. Normal, expected behavior. + ocf_log debug "Resource is currently running as Slave" + ;; + "$OCF_RUNNING_MASTER") + # Already a master. Unexpected, but not a problem. + ocf_log info "Resource is already running as Master" + return $OCF_SUCCESS + ;; + "$OCF_NOT_RUNNING") + # Currently not running. Need to start before promoting. + ocf_log info "Resource is currently not running" + foobar_start + ;; + *) + # Failed resource. Let the cluster manager recover. + ocf_log err "Unexpected error, cannot promote" + exit $rc + ;; + esac + + # actually promote the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --master-mode || exit $OCF_ERR_GENERIC + + # After the resource has been promoted, check whether the + # promotion worked. If the resource promotion is asynchronous, the + # agent may spin on the monitor function here -- if the resource + # does not assume the Master role within the defined timeout, the + # cluster manager will consider the promote action failed. + while true; do + foobar_monitor + if [ $? -eq $OCF_RUNNING_MASTER ]; then + ocf_log debug "Resource promoted" + break + else + ocf_log debug "Resource still awaiting promotion" + sleep 1 + fi + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +=== +demote+ action + +The +demote+ action is optional. It must only be supported by +_stateful_ resource agents, which means agents that discern between +two distict _roles_: +Master+ and +Slave+. +Slave+ is functionally +identical to the +Started+ state in a stateless resource agent. Thus, +while a regular (stateless) resource agent only needs to implement ++start+ and +stop+, a stateful resource agent must also support the ++demote+ action to be able to make a transition between the +Master+ +and +Started+ (+Slave+) roles. + +[source,bash] +-------------------------------------------------------------------------- +foobar_demote() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # test the resource's current state + foobar_monitor + rc=$? + case "$rc" in + "$OCF_RUNNING_MASTER") + # Running as master. Normal, expected behavior. + ocf_log debug "Resource is currently running as Master" + ;; + "$OCF_SUCCESS") + # Alread running as slave. Nothing to do. + ocf_log debug "Resource is currently running as Slave" + return $OCF_SUCCESS + ;; + "$OCF_NOT_RUNNING") + # Currently not running. Getting a demote action + # in this state is unexpected. Exit with an error + # and let the cluster manager recover. + ocf_log err "Resource is currently not running" + exit $OCF_ERR_GENERIC + ;; + *) + # Failed resource. Let the cluster manager recover. + ocf_log err "Unexpected error, cannot demote" + exit $rc + ;; + esac + + # actually demote the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --unset-master-mode || exit $OCF_ERR_GENERIC + + # After the resource has been demoted, check whether the + # demotion worked. If the resource demotion is asynchronous, the + # agent may spin on the monitor function here -- if the resource + # does not assume the Slave role within the defined timeout, the + # cluster manager will consider the demote action failed. + while true; do + foobar_monitor + if [ $? -eq $OCF_RUNNING_MASTER ]; then + ocf_log debug "Resource still awaiting promotion" + sleep 1 + else + ocf_log debug "Resource demoted" + break + fi + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +=== +migrate_to+ action + +The +migrate_to+ action can serve one of two purposes: + +* Initiate a native _push_ type migration for the resource. In other + words, instruct the resource to move _to_ a specific node from the + node it is currently running on. The resource agent knows about its + destination node via the +$OCF_RESKEY_CRM_meta_migrate_target+ environment + variable. + +* Freeze the resource in a _freeze/thaw_ (also known as + _suspend/resume_) type migration. In this mode, the resource does + not need any information about its destination node at this point. + +The example below illustrates a push type migration: + +[source,bash] +-------------------------------------------------------------------------- +foobar_migrate_to() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # if resource is not running, bail out early + if ! foobar_monitor; then + ocf_log err "Resource is not running" + exit $OCF_ERR_GENERIC + fi + + # actually start up the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --migrate \ + --dest=$OCF_RESKEY_CRM_meta_migrate_target \ + || exit OCF_ERR_GENERIC + ... + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +In contrast, a freeze/thaw type migration may implement its freeze +operation like this: + +[source,bash] +-------------------------------------------------------------------------- +foobar_migrate_to() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # if resource is not running, bail out early + if ! foobar_monitor; then + ocf_log err "Resource is not running" + exit $OCF_ERR_GENERIC + fi + + # actually start up the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --freeze || exit OCF_ERR_GENERIC + ... + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + + +=== +migrate_from+ action + +The +migrate_from+ action can serve one of two purposes: + +* Complete a native _push_ type migration for the resource. In other + words, check whether the migration has succeeded properly, and the + resource is running on the local node. The resource agent knows + about its the migration source via the + +$OCF_RESKEY_CRM_meta_migrate_source+ environment variable. + +* Thaw the resource in a _freeze/thaw_ (also known as + _suspend/resume_) type migration. In this mode, the resource usually + not need any information about its source node at this point. + +The example below illustrates a push type migration: + +[source,bash] +-------------------------------------------------------------------------- +foobar_migrate_from() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # After the resource has been migrated, check whether it resumed + # correctly. If the resource starts asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # run within the defined timeout, the cluster manager will + # consider the migrate_from action failed + while ! foobar_monitor; do + ocf_log debug "Resource has not yet migrated, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +In contrast, a freeze/thaw type migration may implement its thaw +operation like this: + +[source,bash] +-------------------------------------------------------------------------- +foobar_migrate_from() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # actually start up the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --thaw || exit OCF_ERR_GENERIC + + # After the resource has been migrated, check whether it resumed + # correctly. If the resource starts asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # run within the defined timeout, the cluster manager will + # consider the migrate_from action failed + while ! foobar_monitor; do + ocf_log debug "Resource has not yet migrated, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + + +=== +notify+ action + +With notifications, instances of clones (and of master/slave +resources, which are an extended kind of clones) can inform each other +about their state. When notifications are enabled, any action on any +instance of a clone carries a +pre+ and +post+ notification. Then, the +cluster manager invokes the +notify+ operation on _all_ clone +instances. For +notify+ operations, additional environment variables +are passed into the resource agent during execution: + +* +$OCF_RESKEY_CRM_meta_notify_type+ -- the notification type (+pre+ + or +post+) + +* +$OCF_RESKEY_CRM_meta_notify_operation+ -- the operation (action) + that the notification is about (+start+, +stop+, +promote+, +demote+ + etc.) + +* +$OCF_RESKEY_CRM_meta_notify_start_uname+ -- node name of the node + where the resource is being started (+start+ notifications only) + +* +$OCF_RESKEY_CRM_meta_notify_stop_uname+ -- node name of the node + where the resource is being stopped (+stop+ notifications only) + +* +$OCF_RESKEY_CRM_meta_notify_master_uname+ -- node name of the node + where the resource currently _is in_ the Master role + +* +$OCF_RESKEY_CRM_meta_notify_promote_uname+ -- node name of the node + where the resource currently _is being promoted to_ the Master role + (+promote+ notifications only) + +* +$OCF_RESKEY_CRM_meta_notify_demote_uname+ -- node name of the node + where the resource currently _is being demoted to_ the Slave role + (+demote+ notifications only) + +Notifications come in particularly handy for master/slave resources +using a "pull" scheme, where the master is a publisher and the slave a +subscriber. Since the master is obviously only available as such when +a promotion has occurred, the slaves can use a "pre-promote" +notification to configure themselves to subscribe to the right +publisher. + +Likewise, the subscribers may want to unsubscribe from the publisher +after it has relinquished its master status, and a "post-demote" +notification can be used for that purpose. + +Consider the example below to illustrate the concept. + +[source,bash] +-------------------------------------------------------------------------- +foobar_notify() { + local type_op + type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" + + ocf_log debug "Received $type_op notification." + case "$type_op" in + 'pre-promote') + ocf_run frobnicate --slave-mode \ + --master=$OCF_RESKEY_CRM_meta_notify_promote_uname \ + || exit $OCF_ERR_GENERIC + ;; + 'post-demote') + ocf_run frobnicate --unset-slave-mode || exit $OCF_ERR_GENERIC + ;; + esac + + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +NOTE: A master/slave resource agent may support a _multi-master_ +configuration, where there is possibly more than one master at any +given time. If that is the case, then the ++$OCF_RESKEY_CRM_meta_notify_*_uname+ variables may each contain a +space-separated lists of hostnames, rather than a single host name as +shown in the example. Under those circumstances the resource agent +would have to properly iterate over this list. + +== Script variables + +This section outlines variables typically available to resource agents, +primarily for convenience purposes. For additional variables +available while the agent is being executed, refer to +<<_environment_variables>> and <<_return_codes>>. + +=== +$OCF_ROOT+ + +The root of the OCF resource agent hierarchy. This should never be +changed by a resource agent. This is usually +/usr/lib/ocf+. + +=== +$OCF_FUNCTIONS_DIR+ + +The directory where the resource agents shell function library, ++.ocf-shellfuncs+, resides. This is usually defined in terms of ++$OCF_ROOT+ and should never be changed by a resource agent. This +variable may, however, be overridden from the command line while +testing a new or modified resource agent. + +=== +$OCF_RESOURCE_INSTANCE+ + +The resource instance name. For primitive (non-clone, non-stateful) +resources, this is simply the resource name. For clones and stateful +resources, this is the primitive name, followed by a colon an the +clone instance number (such as +p_foobar:0+). + +=== +$__OCF_ACTION+ + +The currently invoked action. This is exactly the first command-line +argument that the cluster manager specifies when it invokes the +resource agent. + +=== +$__SCRIPT_NAME+ + +The name of the resource agent. This is exactly the base name of the +resource agent script, with leading directory names removed. + +=== +$HA_RSCTMP+ + +A temporary directory for use by resource agents. The system startup +sequence (on any LSB compliant Linux distribution) guarantees that +this directory is emptied on system startup, so this directory will +not contain any stale data after a node reboot. + +== Convenience functions + +=== Logging: +ocf_log+ + +Resource agents should use the +ocf_log+ function for logging +purposes. This convenient logging wrapper is invoked as follows: + +[source,bash] +-------------------------------------------------------------------------- +ocf_log "Log message" +-------------------------------------------------------------------------- + +It supports following the following severity levels: + +* +debug+ -- for debugging messages. Most logging configurations + suppress this level by default. +* +info+ -- for informational messages about the agent's behavior or + status. +* +warn+ -- for warnings. This is for any messages which reflect + unexpected behavior that does _not_ constitute an unrecoverable + error. +* +err+ -- for errors. As a general rule, this logging level should + only be used immediately prior to an +exit+ with the appropriate + error code. +* +crit+ -- for critical errors. As with +err+, this logging level + should not be used unless the resource agent also exits with an + error code. Very rarely used. + +=== Testing for binaries: +have_binary+ and +check_binary+ + +A resource agent may need to test for the availability of a specific +executable. The +have_binary+ convenience function comes in handy +here: + +[source,bash] +-------------------------------------------------------------------------- +if ! have_binary frobnicate; then + ocf_log warn "Missing frobnicate binary, frobnication disabled!" +fi +-------------------------------------------------------------------------- + +If a missing binary is a fatal problem for the resource, then the ++check_binary+ function should be used: + +[source,bash] +-------------------------------------------------------------------------- +check_binary frobnicate +-------------------------------------------------------------------------- + +Using +check_binary+ is a shorthand method for testing for the +existence (and executability) of the specified binary, and exiting +with +$OCF_ERR_INSTALLED+ if it cannot be found or executed. + +NOTE: Both +have_binary+ and +check_binary+ honor +$PATH+ when the +binary to test for is not specified as a full path. It is usually wise +to _not_ test for a full path, as binary installations path may vary +by distribution or user policy. + +=== Executing commands and capturing their output: +ocf_run+ + +Whenever a resource agent needs to execute a command and capture its +output, it should use the +ocf_run+ convenience function, invoked as +in this example: + +[source,bash] +-------------------------------------------------------------------------- +ocf_run "frobnicate --spam=eggs" || exit $OCF_ERR_GENERIC +-------------------------------------------------------------------------- + +With the command specified above, the resource agent will invoke ++frobnicate --spam=eggs+ and capture its output and +exit code. If the exit code is nonzero (indicating an error), ++ocf_run+ logs the command output with the +err+ logging severity, and +the resource agent subsequently exits. If the exit code is zero +(indicating success), any command output will be logged with the +info+ +logging severity. + +If the resource agent wishes to ignore the output of a successful +command execution, it can use the +-q+ flag with +ocf_run+. In the +example below, +ocf_run+ will only log output if the command exit code +is nonzero. + +[source,bash] +-------------------------------------------------------------------------- +ocf_run -q "frobnicate --spam=eggs" || exit $OCF_ERR_GENERIC +-------------------------------------------------------------------------- + +Finally, if the resource agent wants to log the output of a command +with a nonzero exit code with a severity _other_ than error, it may do +so by adding the +-info+ or +-warn+ option to +ocf_run+: + +[source,bash] +-------------------------------------------------------------------------- +ocf_run -warn "frobnicate --spam=eggs" +-------------------------------------------------------------------------- + +=== Locks: +ocf_take_lock+ and +ocf_release_lock_on_exit+ + +Occasionally, there may be different resources of the same type in a +cluster configuration that should not execute actions in +parallel. When a resource agent needs to guard against parallel +execution on the same machine, it can use the +ocf_take_lock+ and ++ocf_release_lock_on_exit+ convenience functions: + +[source,bash] +-------------------------------------------------------------------------- +LOCKFILE=${HA_RSCTMP}/foobar +ocf_release_lock_on_exit $LOCKFILE + +foobar_start() { + ... + ocf_take_lock $LOCKFILE + ... +} +-------------------------------------------------------------------------- + ++ocf_take_lock+ attempts to acquire the designated +$LOCKFILE+. When +it is unavailable, it sleeps a random amount of time between 0 and 1 +seconds, and retries. +ocf_release_lock_on_exit+ releases the lock +file when the agent exits (for any reason). + +=== Testing for numerical values: +ocf_is_decimal+ + +Specifically for parameter validation, it can be helpful to test +whether a given value is numeric. The +ocf_is_decimal+ function exists +for that purpose: +-------------------------------------------------------------------------- +foobar_validate_all() { + if ! ocf_is_decimal $OCF_RESKEY_eggs; then + ocf_log err "eggs is not numeric!" + exit $OCF_ERR_CONFIGURED + fi + ... +} +-------------------------------------------------------------------------- + +=== Testing for boolean values: +ocf_is_true+ + +When a resource agent defines a boolean parameter, the value +for this parameter may be specified by the user as +0+/+1+, ++true+/+false+, or +on+/+off+. Since it is tedious to test for all +these values from within the resource agent, the agent should instead +use the +ocf_is_true+ convenience function: + +[source,bash] +-------------------------------------------------------------------------- +if ocf_is_true $OCF_RESKEY_superfrobnicate; then + ocf_run "frobnicate --super" +fi +-------------------------------------------------------------------------- + +NOTE: If +ocf_is_true+ is used against an empty or non-existant +variable, it always returns an exit code of +1+, which is equivalent +to +false+. + +=== Pseudo resources: +ha_pseudo_resource+ + +"Pseudo resources" are those where the resource agent in fact does not +actually start or stop something akin to a runnable process, but +merely executes a single action and then needs some form of tracing +whether that action has been executed or not. The +portblock+ resource +agent is an example of this. + +Resource agents for pseudo resources can use a convenience function, ++ha_pseudo_resource+, which makes use of _tracking files_ to keep tabs +on the status of a resource. If +foobar+ was designed to manage a +pseudo resource, then its +start+ action could look like this: + +[source,bash] +-------------------------------------------------------------------------- +foobar_start() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # if resource is already running, bail out early + if foobar_monitor; then + ocf_log info "Resource is already running" + return $OCF_SUCCESS + fi + + # start the pseudo resource + ha_pseudo_resource ${OCF_RESOURCE_INSTANCE} start + + # After the resource has been started, check whether it started up + # correctly. If the resource starts asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # start up within the defined timeout, the cluster manager will + # consider the start action failed + while ! foobar_monitor; do + ocf_log debug "Resource has not started yet, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + + +== Conventions + +This section contains a collection of conventions that have emerged in +the resource agent repositories over the years. Following these +conventions is by no means mandatory for resource agent authors, but +it is a good idea based on the +http://en.wikipedia.org/wiki/Principle_of_least_surprise[Principle of +Least Surprise] -- resource agents following these conventions will be +easier to understand, review, and use than those that do not. + +=== Well-known parameter names + +Several parameter names are supported by a number of resource +agents. For new resource agents, following these examples is generally +a good idea: + +* +binary+ -- the name of a binary that principally manages the + resource, such as a server daemon +* +config+ -- the full path to a configuration file +* +pid+ -- the full path to a file holding a process ID (PID) +* +log+ -- the full path to a log file +* +socket+ -- the full path to a UNIX socket that the resource manages +* +ip+ -- an IP address that a daemon binds to +* +port+ -- a TCP or UDP port that a daemon binds to + +Needless to say, resource agents should only implement any of these +parameters if they are sensible to use in the agent's context. + +=== Parameter defaults + +Defaults for resource agent parameters should be set by initializing +variables with the suffix +_default+: + +[source,bash] +-------------------------------------------------------------------------- +# Defaults +OCF_RESKEY_superfrobnicate_default=0 + +: ${OCF_RESKEY_superfrobnicate=${OCF_RESKEY_superfrobnicate_default}} +-------------------------------------------------------------------------- + +NOTE: The resource agent should make sure that it sets a default for +any parameter not marked as +required+ in the metadata. + + +=== Honoring +PATH+ for binaries + +When a resource agent supports a parameter designed to hold the name +of a binary (such as a daemon, or a client utility for querying +status), then that parameter should honor the +PATH+ environment +variable. Do not supply full paths. Thus, the following approach: + +[source,bash] +-------------------------------------------------------------------------- +# Good example -- do it this way +OCF_RESKEY_frobnicate_default="frobnicate" +: ${OCF_RESKEY_frobnicate="${OCF_RESKEY_frobnicate_default}"} +-------------------------------------------------------------------------- + +is much preferred over specifying a full path, as shown here: + +[source,bash] +-------------------------------------------------------------------------- +# Bad example -- avoid if you can +OCF_RESKEY_frobnicate_default="/usr/local/sbin/frobnicate" +: ${OCF_RESKEY_frobnicate="${OCF_RESKEY_frobnicate_default}"} +-------------------------------------------------------------------------- + +This rule holds for defaults, as well. + + + +== Special considerations + +=== Licensing + +Whenever possible, resource agent contributors are _encouraged_ to use +the GNU General Public License (GPL), version 2 and later, for any new +resource agents. The shell functions library does not strictly mandate +this, however, as it is licensed under the GNU Lesser General Public +License (LGPL), version 2.1 and later (so it can be used by non-GPL +agents). + +The resource agent _must_ explicitly state its own license in the +agent source code. + + +=== Locale settings + +When sourcing +.ocf-shellfuncs+ as explained in <<_initialization>>, +any resource agent automatically sets +LANG+ and +LC_ALL+ to the +C+ +locale. Resource agents can thus expect to always operate in the +C+ +locale, and need not reset +LANG+ or any of the +LC_+ environment +variables themselves. + + +=== Testing for running processes + +For testing whether a particular process (with a known process ID) is +currently running, a frequently found method is to send it a +0+ +signal and catch errors, similar to this example: + +[source,bash] +-------------------------------------------------------------------------- +if kill -s 0 `cat $daemon_pid_file`; then + ocf_log debug "Process is currently running" +else + ocf_log warn "Process is dead, removing pid file" + rm -f $daemon_pid_file +if +-------------------------------------------------------------------------- + +IMPORTANT: An approach far superior to this example is to instead test +the _functionality_ of the daemon by connecting to it with a client +process, as shown in the example in +<<_literal_monitor_literal_action>>. + + +=== Specifying a master preference + +Stateful (master/slave) resources must set their own _master +preference_ -- they can thus provide hints to the cluster manager +which is the the best instance to promote to the +Master+ role. + +IMPORTANT: It is acceptable for multiple instances to have identical +positive master preferences. In that case, the cluster resource +manager will automatically select a resource agent to +promote. However, if _all_ instances have the (default) master score +of zero, the cluster manager will not promote any instance at +all. Thus, it is crucial that at least one instance has a positive +master score. + +For this purpose, +crm_master+ comes in handy. This convenience +wrapper around the +crm_attribute+ sets a node attribute named ++master-<<_literal_ocf_resource_instance_literal,$OCF_RESOURCE_INSTANCE>>+ +for the node it is being executed on, and fills this attribute with +the specified value. The cluster manager is then expected to translate +this into a promotion score for the corresponding instance, and base +its promotion preference on that score. + +Stateful resource agents typically execute +crm_master+ during the +<<_literal_monitor_literal_action,+monitor+>> and/or +<<_literal_notify_literal_action,+notify+>> action. + +The following example assumes that the +foobar+ resource agent can +test the application's status by executing a binary that returns +certain exit codes based on whether + +* the resource is either in the master role, or is a slave that is + fully caught up with the master (at any rate, it has current data), + or +* the resource is in the slave role, but through some form of + asynchronous replication has "fallen behind" the master, or +* the resource has gracefully stopped, or +* the resource has unexpectedly failed. + +[source,bash] +-------------------------------------------------------------------------- +foobar_monitor() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + ocf_run frobnicate --test + + # This example assumes the following exit code convention + # for frobnicate: + # 0: running, and fully caught up with master + # 1: gracefully stopped + # 2: running, but lagging behind master + # any other: error + case "$?" in + 0) + rc=$OCF_SUCCESS + ocf_log debug "Resource is running" + # Set a high master preference. The current master + # will always get this, plus 1. Any current slaves + # will get a high preference so that if the master + # fails, they are next in line to take over. + crm_master -l reboot -v 100 + ;; + 1) + rc=$OCF_NOT_RUNNING + ocf_log debug "Resource is not running" + # Remove the master preference for this node + crm_master -l reboot -D + ;; + 2) + rc=$OCF_SUCCESS + ocf_log debug "Resource is lagging behind master" + # Set a low master preference: if the master fails + # right now, and there is another slave that does + # not lag behind the master, its higher master + # preference will win and that slave will become + # the new master + crm_master -l reboot -v 5 + ;; + *) + ocf_log err "Resource has failed" + exit $OCF_ERR_GENERIC + esac + + return $rc +} +-------------------------------------------------------------------------- + + +== Testing resource agents + +This section discusses automated testing for resource agents. Testing +is a vital aspect of development; it is crucial both for creating new +resource agents, and for modifying existing ones. + + +=== Testing with +ocf-tester+ + +The resource agents repository (and hence, any installed resource +agents package) contains a utility named +ocf-tester+. This shell +script allows you to conveniently and easily test the functionality of +your resource agent. + ++ocf-tester+ is commonly invoked, as +root+, like this: + +-------------------------------------------------------------------------- +ocf-tester -n [-o = ... ] +-------------------------------------------------------------------------- + +* ++ is an arbitrary resource name. + +* You may set any number of +=+ with the +-o+ option, + corresponding to any resource parameters you wish to set for + testing. + +* ++ is the full path to your resource agent. + +When invoked, +ocf-tester+ executes all mandatory actions and enforces +action behavior as explained in <<_resource_agent_actions>>. + +It also tests for optional actions. Optional actions must behave as +expected when advertised, but do not cause +ocf-tester+ to flag an +error if not implemented. + +IMPORTANT: +ocf-tester+ does not initiate "dry runs" of actions, nor +does it create resource dummies of any kind. Instead, it exercises the +actual resource agent as-is, whether that may include opening and +closing databases, mounting file systems, starting or stopping virtual +machines, etc. Use with care. + +For example, you could run +ocf-tester+ on the +foobar+ resource agent +as follows: + +-------------------------------------------------------------------------- +# ocf-tester -n foobartest \ + -o superfrobnicate=true \ + -o datadir=/tmp \ + /home/johndoe/ra-dev/foobar +Beginning tests for /home/johndoe/ra-dev/foobar... +* Your agent does not support the notify action (optional) +* Your agent does not support the reload action (optional) +/home/johndoe/ra-dev/foobar passed all tests +-------------------------------------------------------------------------- + + +=== Testing with +ocft+ + ++ocft+ is a testing tool for resource agents. The main difference +to +ocf-tester+ is that +ocft+ can automate creating complex +testing environments. That includes package installation and +arbitrary shell scripting. + +==== +ocft+ components + ++ocft+ consists of the following components: + +* A test case generator (+/usr/sbin/ocft+) -- generates shell + scripts from test case configuration files + +* Configuration files (+/usr/share/resource-agents/ocft/configs/+) -- + a configuration file contains environment setup and test cases + for one resource agent + +* The testing scripts are stored in +/var/lib/resource-agents/ocft/cases/+, + but normally there is no need to inspect them + +==== Customizing the testing environment + ++ocft+ modifies the runtime environment of the resource agent +either by changing environment variables (through the interface +defined by OCF) or by running ad-hoc shell scripts which can for +instance change permissions of a file or unmount a file system. + +==== How to test + +You need to know the software (resource) you want to test. Draw a +sketch of all interesting scenarios, with all expected and +unexpected conditions and how the resource agent should react to +them. Then you need to encode these conditions and the expected +outcomes as +ocft+ test cases. Running ocft is then simple: + +--------------------------------------- +# ocft make +# ocft test +--------------------------------------- + +The first subcommand generates the scripts for your test cases +whereas the second runs them and checks the outcome. + +==== +ocft+ configuration file syntax + +There are four top level options each of which can contain +one or more sub-options. + +===== +CONFIG+ (top level option) + +This option is global and influences every test case. + + ** +AgentRoot+ (sub-option) +--------------------------------------- +AgentRoot /usr/lib/ocf/resource.d/xxx +--------------------------------------- + +Normally, we assume that the resource agent lives under the ++heartbeat+ provider. Use `AgentRoot` to test agent which is +distributed by another vendor. + + ** +InstallPackage+ (sub-option) +--------------------------------------- +InstallPackage package [package2 [...]] +--------------------------------------- + +Install packages necessary for testing. The installation is +skipped if the packages have already been installed. + + ** 'HangTimeout' (sub-option) +--------------------------------------- +HangTimeout secs +--------------------------------------- + +The maximum time allowed for a single RA action. If this timer +expires, the action is considered as failed. + +===== +SETUP-AGENT+ (top level option) +--------------------------------------- +SETUP-AGENT + bash commands +--------------------------------------- + +If the RA needs to be initialized before testing, you can put +bash code here for that purpose. The initialization is done only +once. If you need to reinitialize then delete the ++/tmp/.[AGENT_NAME]_set+ stamp file. + +===== +CASE+ (top level option) +--------------------------------------- +CASE "description" +--------------------------------------- + +This is the main building block of the test suite. Each test +case is to be described in one +CASE+ top level option. + +One case consists of several suboptions typically followed by the ++RunAgent+ suboption. + + ** +Var+ (sub-option) +--------------------------------------- +Var VARIABLE=value +--------------------------------------- + +It is to set up an environment variable of the resource agent. They +usually appear to be OCF_RESKEY_xxx. One point is to be noted is there +is no blank by both sides of "=". + + ** +Unvar+ (sub-option) +--------------------------------------- +Unvar VARIABLE [VARIABLE2 [...]] +--------------------------------------- + +Remove the environment variable. + + ** +Include+ (sub-option) +--------------------------------------- +Include macro_name +--------------------------------------- + +Include statements in 'macro_name'. See below for description of ++CASE-BLOCK+. + +** +Bash+ (sub-option) +--------------------------------------- +Bash bash_codes +--------------------------------------- + +This option is to set up the environment of OS, where you can insert +BASH code to customize the system randomly. Note, do not cause +unrecoverable consequences to the system. + +** +BashAtExit+ (sub-option) +--------------------------------------- +BashAtExit bash_codes +--------------------------------------- + +This option is to recover the OS environment in order to run another +test case correctly. Of cause you can use 'Bash' option to recover +it. However, if mistakes occur in the process, the script will quit +directly instead of running your recovery codes. If it happens, you +ought to use BashAtExit which can restore the system environment +before you quit. + +** +RunAgent+ (sub-option) +--------------------------------------- +RunAgent cmd [ret_value] +--------------------------------------- + +This option is to run resource agent. "cmd" is the parameter of the +resource agent, such as "start, status, stop ...". The second +parameter is optional. It will compare the actual returned value with +the expected value when the script has run recourse agent. If +differs, bugs will be found. + +It is also possible to execute a suboption on a remote host +instead of locally. The protocol used is ssh and the command is +run in the background. Just add the +@+ suffix to the +suboption name. For instance: + +--------------------------------------- +Bash@192.168.1.100 date +--------------------------------------- + +would run the date program. Remote commands are run in +background. + +NB: Not clear how can ssh be automated as we don't know in +advance the environment. Perhaps use "well-known" host names such +as "node2"? Also, if the command runs in the background, it's not +clear how is the exit code checked. Finally, does Var@node make +sense? Or is the current environment somehow copied over? We +probably need an example here. + +Need examples in general. + +===== +CASE-BLOCK+ (top level option) +--------------------------------------- +CASE-BLOCK macro_name +--------------------------------------- + +The +CASE-BLOCK+ option defines a macro which can be +Include+d +in any +CASE+. All +CASE+ suboptions are valid in +CASE-BLOCK+. + + +== Installing and packaging resource agents + +This section discusses what to do with your resource agent once it is +done and tested -- where to install it, and how to include it in either +your own application package or in the Linux-HA resource agents +repository. + +=== Installing resource agents + +If you choose to include your resource agent in your own project, make +sure it installs into the correct location. Resource agents should +install into the +/usr/lib/ocf/resource.d/+ directory, where +++ is the name of your project or any other name you wish to +identify the resource agent with. + +For example, if your +foobar+ resource agent is being packaged as part +of a project named +fortytwo+, then the correct full path to your +resource agent would be ++/usr/lib/ocf/resource.d/fortytwo/foobar+. Make sure your resource +agent installs with +0755+ (+-rwxr-xr-x+) permission bits. + +When installed this way, OCF-compliant cluster resource managers will +be able to properly identify, parse, and execute your resource +agent. The Pacemaker cluster manager, for example, would map the +above-mentioned installation path to the +ocf:fortytwo:foobar+ +resource type identifier. + +=== Packaging resource agents + +When you package resource agents as part of your own project, you +should apply the considerations outlined in this section. + +NOTE: If you instead prefer to submit your resource agent to the +Linux-HA resource agents repository, see +<<_submitting_resource_agents>> for information on doing so. + +==== RPM packaging + +It is recommended to put your OCF resource agent(s) in an RPM +sub-package, with the name +-resource-agents+. Ensure that +the package owns its provider directory, and depends on the upstream ++resource-agents+ package which lays out the directory hierarchy and +provides convenience shell functions. An example RPM spec snippet is +given below: + +-------------------------------------------------------------------------- +%package resource-agents +Summary: OCF resource agent for Foobar +Group: System Environment/Base +Requires: %{name} = %{version}-%{release}, resource-agents + +%description resource-agents +This package contains the OCF-compliant resource agents for Foobar. + +%files resource-agents +%defattr(755,root,root,-) +%dir %{_prefix}/lib/ocf/resource.d/fortytwo +%{_prefix}/lib/ocf/resource.d/fortytwo/foobar +-------------------------------------------------------------------------- + +NOTE: If an RPM spec file contains a +%package+ declaration, then RPM +considers this a sub-package which inherits top-level fields such as ++Name+, +Version+, +License+, etc. Sub-packages have the top-level +package name automatically prepended to their own name. Thus the snippet +above would create a sub-package named +foobar-resource-agents+ +(presuming the package +Name+ is +foobar+). + +==== Debian packaging + +For Debian packages, like for <<_rpm_packaging,RPMs>>, it is +recommended to create a separate package holding your resource agents, +which then should depend on the +cluster-agents+ package. + +NOTE: This section assumes that you are packaging with +debhelper+. + +An example +debian/control+ snippet is given below: + +-------------------------------------------------------------------------- +Package: foobar-cluster-agents +Priority: extra +Architecture: all +Depends: cluster-agents +Description: OCF-compliant resource agents for Foobar +-------------------------------------------------------------------------- + +You will also create a separate +.install+ file. Sticking with the +example of installing the +foobar+ resource agent as a sub-package of ++fortytwo+, the +debian/fortytwo-cluster-agents.install+ file could +consist of the following content: + +-------------------------------------------------------------------------- +usr/lib/ocf/resource.d/fortytwo/foobar +-------------------------------------------------------------------------- + +=== Submitting resource agents + +If you choose not to bundle your resource agent with your own package, +but instead wish to submit it to the upstream resource agent +repository hosted on http://hg.linux-ha.org/agents[the Linux-HA +Mercurial server], please follow the steps outlined in this section. + +Create a working copy (a Mercurial _clone_) of the upstream repository +with the following command: + +-------------------------------------------------------------------------- +hg clone http://hg.linux-ha.org/agents resource-agents +-------------------------------------------------------------------------- + +Create a new Mercurial queue, and a new patchset: +------------------------------------------------------------------------- +cd resource-agents +hg qinit +hg qnew --edit foobar-ra +-------------------------------------------------------------------------- + +In your patch message, be sure to include a meaningful description, +for example: +-------------------------------------------------------------------------- +High: foobar: new resource agent + +This new resource agent adds functionality to manage a foobar service. +It supports being configured as a primitive or as a master/slave set, +and also optionally supports superfrobnication. +-------------------------------------------------------------------------- + +Then, copy your resource agent into the +heartbeat+ subdirectory: +-------------------------------------------------------------------------- +cd heartbeat +cp /path/to/your/local/copy/of/foobar . +chmod 0755 foobar +hg add foobar +cd .. +-------------------------------------------------------------------------- + +Next, modify the +Makefile.am+ file in +resource-agents/heartbeat+ and +add your new resource agent to the +ocf_SCRIPTS+ list. This will make +sure the agent is properly installed. + +Lastly, open Makefile.am in +resource-agents/doc+ and add ++ocf_heartbeat_.7+ to the +man_MANS+ variable. This will +automatically generate a resource agent manual page from its metadata, +and then install that man page into the correct location. + +Once all that is done, you can update your patch set: +-------------------------------------------------------------------------- +hg qrefresh +-------------------------------------------------------------------------- + +Now the patch set is good for review on the mailing list: +-------------------------------------------------------------------------- +hg email --to=linux-ha-dev@lists.linux-ha.org foobar-ra +-------------------------------------------------------------------------- + +Once your new resource agent has been accepted for merging, one of the +upstream developers will push your patch into the upstream +repository. At that point, you can update your checkout from upstream, +and remove your own patch set. + +-------------------------------------------------------------------------- +hg qpop -a +hg pull --update +hg qdelete foobar-ra +-------------------------------------------------------------------------- diff --git a/doc/Makefile.am b/doc/man/Makefile.am similarity index 99% copy from doc/Makefile.am copy to doc/man/Makefile.am index 3801a16e3..1b64f00ef 100644 --- a/doc/Makefile.am +++ b/doc/man/Makefile.am @@ -1,162 +1,160 @@ # # doc: Linux-HA resource agents # # Copyright (C) 2009 Florian Haas # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(doc_DATA) $(REFENTRY_STYLESHEET) \ mkappendix.sh ralist.sh CLEANFILES = $(man_MANS) $(xmlfiles) metadata-*.xml -doc_DATA = README.webapps - STYLESHEET_PREFIX ?= http://docbook.sourceforge.net/release/xsl/current MANPAGES_STYLESHEET ?= $(STYLESHEET_PREFIX)/manpages/docbook.xsl HTML_STYLESHEET ?= $(STYLESHEET_PREFIX)/xhtml/docbook.xsl FO_STYLESHEET ?= $(STYLESHEET_PREFIX)/fo/docbook.xsl REFENTRY_STYLESHEET ?= ra2refentry.xsl XSLTPROC_OPTIONS ?= --xinclude XSLTPROC_MANPAGES_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_HTML_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS) radir = $(top_srcdir)/heartbeat # OCF_ROOT=. is necessary due to a sanity check in ocf-shellfuncs # (which tests whether $OCF_ROOT points to a directory metadata-%.xml: $(radir)/% OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ metadata-IPv6addr.xml: ../heartbeat/IPv6addr OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ # Please note: we can't name the man pages # ocf:heartbeat:. Believe me, I've tried. It looks like it # works, but then it doesn't. While make can deal correctly with # colons in target names (when properly escaped), it royally messes up # when it is deals with _dependencies_ that contain colons. See Bug # 12126 on savannah.gnu.org. But, maybe it gets fixed soon, it was # first reported in 1995 and added to Savannah in in 2005... if BUILD_DOC man_MANS = ocf_heartbeat_AoEtarget.7 \ ocf_heartbeat_AudibleAlarm.7 \ ocf_heartbeat_ClusterMon.7 \ ocf_heartbeat_CTDB.7 \ ocf_heartbeat_Delay.7 \ ocf_heartbeat_Dummy.7 \ ocf_heartbeat_EvmsSCC.7 \ ocf_heartbeat_Evmsd.7 \ ocf_heartbeat_Filesystem.7 \ ocf_heartbeat_ICP.7 \ ocf_heartbeat_IPaddr.7 \ ocf_heartbeat_IPaddr2.7 \ ocf_heartbeat_IPsrcaddr.7 \ ocf_heartbeat_LVM.7 \ ocf_heartbeat_LinuxSCSI.7 \ ocf_heartbeat_MailTo.7 \ ocf_heartbeat_ManageRAID.7 \ ocf_heartbeat_ManageVE.7 \ ocf_heartbeat_Pure-FTPd.7 \ ocf_heartbeat_Raid1.7 \ ocf_heartbeat_Route.7 \ ocf_heartbeat_SAPDatabase.7 \ ocf_heartbeat_SAPInstance.7 \ ocf_heartbeat_SendArp.7 \ ocf_heartbeat_ServeRAID.7 \ ocf_heartbeat_SphinxSearchDaemon.7 \ ocf_heartbeat_Squid.7 \ ocf_heartbeat_Stateful.7 \ ocf_heartbeat_SysInfo.7 \ ocf_heartbeat_VIPArip.7 \ ocf_heartbeat_VirtualDomain.7 \ ocf_heartbeat_WAS.7 \ ocf_heartbeat_WAS6.7 \ ocf_heartbeat_WinPopup.7 \ ocf_heartbeat_Xen.7 \ ocf_heartbeat_Xinetd.7 \ ocf_heartbeat_anything.7 \ ocf_heartbeat_apache.7 \ ocf_heartbeat_asterisk.7 \ ocf_heartbeat_conntrackd.7 \ ocf_heartbeat_db2.7 \ ocf_heartbeat_drbd.7 \ ocf_heartbeat_eDir88.7 \ ocf_heartbeat_ethmonitor.7 \ ocf_heartbeat_exportfs.7 \ ocf_heartbeat_fio.7 \ ocf_heartbeat_iSCSILogicalUnit.7 \ ocf_heartbeat_iSCSITarget.7 \ ocf_heartbeat_ids.7 \ ocf_heartbeat_iscsi.7 \ ocf_heartbeat_jboss.7 \ ocf_heartbeat_lxc.7 \ ocf_heartbeat_mysql.7 \ ocf_heartbeat_mysql-proxy.7 \ ocf_heartbeat_named.7 \ ocf_heartbeat_nfsserver.7 \ ocf_heartbeat_nginx.7 \ ocf_heartbeat_oracle.7 \ ocf_heartbeat_oralsnr.7 \ ocf_heartbeat_pgsql.7 \ ocf_heartbeat_pingd.7 \ ocf_heartbeat_portblock.7 \ ocf_heartbeat_postfix.7 \ ocf_heartbeat_proftpd.7 \ ocf_heartbeat_rsyncd.7 \ ocf_heartbeat_rsyslog.7 \ ocf_heartbeat_scsi2reservation.7 \ ocf_heartbeat_sfex.7 \ ocf_heartbeat_slapd.7 \ ocf_heartbeat_symlink.7 \ ocf_heartbeat_syslog-ng.7 \ ocf_heartbeat_tomcat.7 \ ocf_heartbeat_vmware.7 if USE_IPV6ADDR man_MANS += ocf_heartbeat_IPv6addr.7 endif xmlfiles = $(man_MANS:.7=.xml) %.1 %.5 %.7 %.8: %.xml $(XSLTPROC) \ $(XSLTPROC_MANPAGES_OPTIONS) \ $(MANPAGES_STYLESHEET) $< ocf_heartbeat_%.xml: metadata-%.xml $(srcdir)/$(REFENTRY_STYLESHEET) $(XSLTPROC) --novalid \ --stringparam package $(PACKAGE_NAME) \ --stringparam version $(VERSION) \ --output $@ \ $(srcdir)/$(REFENTRY_STYLESHEET) $< ocf_resource_agents.xml: $(xmlfiles) mkappendix.sh ./mkappendix.sh $(xmlfiles) > $@ %.html: %.xml $(XSLTPROC) \ $(XSLTPROC_HTML_OPTIONS) \ --output $@ \ $(HTML_STYLESHEET) $< xml: ocf_resource_agents.xml endif diff --git a/doc/mkappendix.sh b/doc/man/mkappendix.sh similarity index 100% rename from doc/mkappendix.sh rename to doc/man/mkappendix.sh diff --git a/doc/ra2refentry.xsl b/doc/man/ra2refentry.xsl similarity index 100% rename from doc/ra2refentry.xsl rename to doc/man/ra2refentry.xsl diff --git a/doc/ralist.sh b/doc/man/ralist.sh similarity index 100% rename from doc/ralist.sh rename to doc/man/ralist.sh