diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am index 35b4b01ea..1276386c9 100644 --- a/heartbeat/Makefile.am +++ b/heartbeat/Makefile.am @@ -1,176 +1,177 @@ # Makefile.am for OCF RAs # # Author: Sun Jing Dong # Copyright (C) 2004 IBM # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(ocf_SCRIPTS) $(ocfcommon_DATA) \ $(common_DATA) $(hb_DATA) $(dtd_DATA) \ README AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/linux-ha halibdir = $(libexecdir)/heartbeat ocfdir = $(OCF_RA_DIR_PREFIX)/heartbeat dtddir = $(datadir)/$(PACKAGE_NAME) dtd_DATA = ra-api-1.dtd metadata.rng if USE_IPV6ADDR_AGENT ocf_PROGRAMS = IPv6addr else ocf_PROGRAMS = endif if IPV6ADDR_COMPATIBLE halib_PROGRAMS = send_ua else halib_PROGRAMS = endif IPv6addr_SOURCES = IPv6addr.c IPv6addr_utils.c send_ua_SOURCES = send_ua.c IPv6addr_utils.c IPv6addr_LDADD = -lplumb $(LIBNETLIBS) send_ua_LDADD = $(LIBNETLIBS) ocf_SCRIPTS = AoEtarget \ AudibleAlarm \ ClusterMon \ CTDB \ Delay \ Dummy \ EvmsSCC \ Evmsd \ Filesystem \ ICP \ IPaddr \ IPaddr2 \ IPsrcaddr \ LVM \ LinuxSCSI \ + lvmlockd \ MailTo \ ManageRAID \ ManageVE \ NodeUtilization \ Pure-FTPd \ Raid1 \ Route \ SAPDatabase \ SAPInstance \ SendArp \ ServeRAID \ SphinxSearchDaemon \ Squid \ Stateful \ SysInfo \ VIPArip \ VirtualDomain \ WAS \ WAS6 \ WinPopup \ Xen \ Xinetd \ ZFS \ anything \ apache \ asterisk \ aws-vpc-route53 \ awseip \ awsvip \ clvm \ conntrackd \ db2 \ dhcpd \ dnsupdate \ docker \ eDir88 \ ethmonitor \ exportfs \ fio \ galera \ garbd \ iSCSILogicalUnit \ iSCSITarget \ ids \ iface-bridge \ iface-vlan \ iscsi \ jboss \ kamailio \ lxc \ minio \ mysql \ mysql-proxy \ nagios \ named \ nfsnotify \ nfsserver \ nginx \ oraasm \ oracle \ oralsnr \ pgagent \ pgsql \ pingd \ portblock \ postfix \ pound \ proftpd \ rabbitmq-cluster \ redis \ rkt \ rsyncd \ rsyslog \ scsi2reservation \ sfex \ sg_persist \ slapd \ symlink \ syslog-ng \ tomcat \ varnish \ vmware \ vsftpd \ zabbixserver ocfcommondir = $(OCF_LIB_DIR_PREFIX)/heartbeat ocfcommon_DATA = ocf-shellfuncs \ ocf-binaries \ ocf-directories \ ocf-returncodes \ ocf-rarun \ ocf-distro \ apache-conf.sh \ http-mon.sh \ sapdb-nosha.sh \ sapdb.sh \ ora-common.sh \ mysql-common.sh \ nfsserver-redhat.sh \ findif.sh # Legacy locations hbdir = $(sysconfdir)/ha.d hb_DATA = shellfuncs check: $(ocf_SCRIPTS:=.check) %.check: % OCF_ROOT=$(abs_srcdir) OCF_FUNCTIONS_DIR=$(abs_srcdir) ./$< meta-data | xmllint --path $(abs_srcdir) --noout --relaxng $(abs_srcdir)/metadata.rng - diff --git a/heartbeat/lvmlockd b/heartbeat/lvmlockd new file mode 100755 index 000000000..8d112e09f --- /dev/null +++ b/heartbeat/lvmlockd @@ -0,0 +1,438 @@ +#!/bin/bash +# +# +# lvmlockd OCF Resource Agent +# +# Copyright (c) 2017 SUSE LINUX, Eric Ren +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +This agent manages the lvmlockd daemon. "lvmlockd" is like "clvmd". Both +are used by LVM commands to coordinate access to shared storage, but with +different design and implementations. "lvmlockd" can use two lock managers: +dlm and sanlock. This agent only supports "dlm + lvmlockd". If dlm (or corosync) +are already being used by other cluster software, you are advised to select +dlm, then configure "controld" resource agent for dlm and this agent for "lvmlockd". +Otherwise, consider sanlock for "lvmlockd" if dlm/corosync is not required. +For more information, refer to manpage lvmlockd.8. + +This agent manages the lvmlockd daemon + + + +pid file +pid file + + + + +Set the socket path to listen on. +socket path + + + + +Write log messages from this level up to syslog. +syslog priority + + + + + +Adopt locks from a previous instance of lvmlockd. + +Adopt locks from a previous instance of lvmlockd + + + + + +Whether or not to activate all shared volume groups after starting +the lvmlockd. Note that shared volume groups will always be deactivated +before the lvmlockd stops regardless of what this option is set to. + +Activate volume groups + + + + + + + + + + + + + +END +} + +####################################################################### + +: ${OCF_RESKEY_pidfile:="/run/lvmlockd.pid"} +: ${OCF_RESKEY_activate_vgs:="true"} + +LOCKD="lvmlockd" +# 0.5s sleep each count +TIMEOUT_COUNT=20 + +usage() { + cat </dev/null 2>&1 + fi +} + +silent_status() +{ + local pid=$(get_pid) + + if [ -n "$pid" ] ; then + daemon_is_running "$pid" + else + # No pid file + false + fi +} + +shared_vgs() +{ + local vg_list=() + # the 6th attr bit is either (c)lustered or (s)hared + local offset=5 + + while read -r vg attr ; do + if [ "${attr:$offset:1}" = "s" ] ; then + vg_list=(${vg_list[@]} $vg) + fi + done <<< "$(vgs --noheadings -o name,attr 2>/dev/null)" + + echo "${vg_list[@]}" +} + +# Immdiately exit shell on error, if it retures, it must be successful +deactivate_all_vgs() +{ + # Try to deactivate all volume groups, before stop shared VG(s) + # and lvmlockd. If fails, some logical volumes are still being + # used. In such case, we exit immdiately, leaving lvmlockd running. + # We cannot kill lvmlockd forcely because that will leave lockspaces/ + # locks behind. + ocf_log info "Deactivating shared volume groups..." + ocf_run vgchange -an $(shared_vgs) + if [ $? -ne $OCF_SUCCESS ] ; then + ocf_log info "Failed to deactivate VG(s)." + exit $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +activate_all_vgs() +{ + if ! ocf_is_true "$OCF_RESKEY_activate_vgs" ; then + ocf_log info "\"activate_vgs\" is set to $OCF_RESKEY_activate_vgs, skipping activation." + return $OCF_SUCCESS + fi + + ocf_log info "Activating all shared volume groups..." + # FIXME: + # David Teigland (lvmlockd feature author) suggests to use "aay", i.e. + # auto-activation here. I gave it a try. The problem are: + # 1) It will try to activate every VGs if auto_activation_volume_list is + # _not_ defined; the command returns error code if any clustered volume + # on the system; + # 2) The command will get error output like "LV locked by other host: xxx" + # when the LVs have been activated on other nodes using "-aay". + # + # I think it makes sense to only activate shared LVs by this RA. The behavior + # is same as clvmd. If the issues above disappear and someone raises a strong + # reason in the future, we can change this that time. + ocf_run vgchange -asy $(shared_vgs) + if [ $? -ne $OCF_SUCCESS ] ; then + ocf_log info "Failed to activate shared VG(s):" + lvmlockd_stop + exit $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +check_config() +{ + local out="" + local use_lvmlockd="" + local lock_type="" + + # To use lvmlockd, ensure configure lvm.conf: + # locking_type = 1 + # use_lvmlockd = 1 + out=$(lvmconfig 'global/use_lvmlockd') + use_lvmlockd=$(echo "$out" | cut -d'=' -f2) + + out=$(lvmconfig 'global/locking_type') + lock_type=$(echo "$out" | cut -d'=' -f2) + + if [ "$use_lvmlockd" -ne 1 ] ; then + ocf_log info "lvmlockd is not enabled, please ensure \"use_lvmlockd=1\"" + fi + if [ "$lock_type" -ne 1 ] ; then + ocf_log info "locking type is wrong, please ensure \"locking_type=1\"" + fi + + if [ "$use_lvmlockd" -ne 1 ] || [ "$lock_type" -ne 1 ] ; then + ocf_exit_reason "Improper configuration to use lvmlockd." + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +check_dlm_controld() +{ + local pid="" + + # dlm daemon should have only one instance, but for safe... + pid=$(pgrep dlm_controld | head -n1) + if ! daemon_is_running $pid ; then + ocf_exit_reason "DLM is not running. Is it configured?" + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +lvmlockd_start() { + local extras="" + + ocf_log info "checking config settings for ${LOCKD}..." + check_config + + ocf_log info "checking if DLM is started first..." + check_dlm_controld + + if silent_status ; then + ocf_log info "${LOCKD} already started (pid=$(get_pid))" + activate_all_vgs + return $OCF_SUCCESS + fi + + if [ ! -z "$OCF_RESKEY_socket_path" ] ; then + extras="$extras -s ${OCF_RESKEY_socket_path}" + fi + if [ ! -z "$OCF_RESKEY_syslog_priority" ] ; then + extras="$extras -S ${OCF_RESKEY_syslog_priority}" + fi + if [ ! -z "$OCF_RESKEY_adopt" ] ; then + extras="$extras -A ${OCF_RESKEY_adopt}" + else + # Inside lvmlockd daemon, this option defaults to 0. But, we + # want it defaults to 1 for resource agent. When RA monitor pulls + # this daemon up, we expect it to adopt locks from a previous + # instance of lvmlockd. + extras="$extras -A 1" + fi + # This client only support "dlm" lock manager + extras="$extras -g dlm" + + ocf_log info "starting ${LOCKD}..." + ocf_run ${LOCKD} -p ${OCF_RESKEY_pidfile} $extras + rc=$? + if [ $rc -ne $OCF_SUCCESS ] ; then + ocf_exit_reason "Failed to start ${LOCKD}, exit code: $rc" + return $OCF_ERR_GENERIC + fi + + # lvmlockd requires shared VGs to be started before they're used + ocf_log info "start lockspaces of shared VG(s)..." + ocf_run vgchange --lockstart $(shared_vgs) + rc=$? + if [ $rc -ne $OCF_SUCCESS ] ; then + ocf_exit_reason "Failed to start shared VG(s), exit code: $rc" + return $OCF_ERR_GENERIC + fi + + activate_all_vgs + return $OCF_SUCCESS +} + +# Each shared VG has its own lockspace. Besides, lvm_global lockspace +# is for global use, and it should be the last one to close. It should +# be enough to only check on lvm_global. +wait_lockspaces_close() +{ + local retries=0 + + ocf_log info "Waiting for all lockspaces to be closed" + while [ $retries -lt "$TIMEOUT_COUNT" ] + do + if ! dlm_tool ls lvm_global | grep -Eqs "^name[[:space:]]+lvm_global" ; then + return $OCF_SUCCESS + fi + + sleep 0.5 + retries=$((retries + 1)) + done + + ocf_exit_reason "Failed to close all lockspaces clearly" + exit $OCF_ERR_GENERIC +} + +kill_stop() +{ + local pid=$1 + local retries=0 + + ocf_log info "Killing ${LOCKD} (pid=$pid)" + while + daemon_is_running $pid && [ $retries -lt "$TIMEOUT_COUNT" ] + do + if [ $retries -ne 0 ] ; then + # don't sleep on the first try + sleep 0.5 + fi + kill -s TERM $pid >/dev/null 2>&1 + retries=$((retries + 1)) + done + +} + +lvmlockd_stop() { + local pid="" + + if ! silent_status ; then + ocf_log info "${LOCKD} is not running" + return $OCF_SUCCESS + fi + + deactivate_all_vgs + + # lvmlockd requires shared VGs to be started before they're used + ocf_log info "stop the lockspaces of shared VG(s)..." + ocf_run vgchange --lockstop $(shared_vgs) + rc=$? + if [ $rc -ne $OCF_SUCCESS ] ; then + ocf_exit_reason "Failed to stop VG(s), exit code: $rc" + return $OCF_ERR_GENERIC + fi + + wait_lockspaces_close + + pid=$(get_pid) + kill_stop $pid + if silent_status ; then + ocf_exit_reason "Failed to stop, ${LOCKD}[$pid] still running." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +lvmlockd_monitor() { + if silent_status ; then + return $OCF_SUCCESS + fi + + ocf_log info "${LOCKD} not running" + return $OCF_NOT_RUNNING +} + +lvmlockd_validate() { + check_binary ${LOCKD} + check_binary vgchange + check_binary vgs + check_binary lvmconfig + check_binary dlm_tool + check_binary pgrep + + return $OCF_SUCCESS +} + + +# Make sure meta-data and usage always succeed +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +usage|help) usage + exit $OCF_SUCCESS + ;; +esac + +# Anything other than meta-data and usage must pass validation +lvmlockd_validate || exit $? + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start) lvmlockd_start + ;; +stop) lvmlockd_stop + ;; +monitor) lvmlockd_monitor + ;; +validate-all) lvmlockd_validate + ;; +*) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? + +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc