diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate new file mode 100755 index 000000000..275a16349 --- /dev/null +++ b/heartbeat/LVM-activate @@ -0,0 +1,849 @@ +#!/bin/sh +# +# +# Copyright (c) 2017 SUSE LINUX, Eric Ren +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# LVM-activate OCF Resource Agent: +# +# Logical volume manager (LVM) provides new features for cluster enviroment: +# lvmlockd and system ID, which aims to replace clvmd and tagged-exclusive +# activation. Accordingly, we have created a new resource agent named "lvmlockd" +# to manage lvmlockd daemon. In addition, this new resource agent "LVM-activate" +# is created to take care of LVM activation/deactivation work. This agent supports +# the new features: lvmlockd and system ID, and also supports the old features: +# clvmd and lvm tag. +# +# For LVM activation, we already have an old resource agent "LVM" that supports +# LVM1, clvmd and lvm tag. Some reasons why we tend to create a new one rather +# than changing the old: +# 1. "LVM" supports clvmd and some legacy things. The code looks very messy, +# which make it difficult to extend for new features, and ensure everything +# works correctly with huge changes. +# +# 2. We want this new RA to do the activation work in a much more safe way by +# leveraging the new features and activating at VG or LV basis depending on +# your choice. +# +# We will leave the old RA alone while the new one is phased in. In order to replace +# the old RA smoothly, this RA also tries to support the functionalities of the old +# RA and inevitably copies some code from the LVM RA. +# +# WARNINGS: +# 1. We support two configure combinations: lvmlockd+LVM-activate and clvm+LVM-activate. +# But, you cannot use them in your cluster at the same time! +# 2. Please put all "lvmlockd"/"clvmd" volume group into auto management by RA, +# once you choose pacemaker to manage one of them. If you manage some +# by hand, the stop action of lvmlockd RA may fail and the node may get +# fenced consequently, because some DLM lockspaces might be in use and +# cannot be closed automatically. +# +# Thanks David Teigland! He is the author of these LVM features, giving valuable +# idea/feedback about this resource agent. +############################################################################ + +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# If LV is given, only activate this named LV; otherwise, activate all +# LVs in the named VG. +VG=${OCF_RESKEY_vgname} +LV=${OCF_RESKEY_lvname} + +# How LVM controls access to the VG: +# +# 0: place-holder for any incorrect casees; To be safe, we enforce the VG +# must use any of the following protection methods in cluster environment. +# 1: vg is shared - lvmlockd (new) +# 2: vg is clustered - clvmd (old) +# 3: vg has system_id (new) +# 4: vg has tagging (old) +VG_access_mode=${OCF_RESKEY_vg_access_mode} + +# Activate LV(s) with "shared" lock for cluster fs +# or "exclusive" lock for local fs +LV_activation_mode=${OCF_RESKEY_activation_mode:-exclusive} + +# For system ID feature +SYSTEM_ID="" + +# For tagging activation mode +OUR_TAG=${OCF_RESKEY_tag:-pacemaker} + +####################################################################### + +meta_data() { + cat < + + + + +1.0 + + +This agent manages LVM activation/deactivation work for a given volume group. +It supports both new features: lvmlockd and system ID, and old features: +clvmd and lvm tag. Please use the new features as possible as you can! + +This agent activates/deactivates logical volumes. + + + + +The volume group name. + +The volume group name + + + + + +If set, only the specified LV will be activated. + +Only activate the given LV + + + + + +This option decides which solution will be used to protect the volume group in +cluster environment. Optional solutions are: lvmlockd, clvmd, system_id and +tagging. + +The VG access mode + + + + + +The activation mode decides the visibility of logical volumes in the cluster. There +are two different modes: "shared" for cluster filesystem and "exclusive" for local +filesystem. With "shared", an LV can be activated concurrently from multiple nodes. +With "exclusive", an LV can be activated by one node at a time. + +This option only has effect on "lvmlockd"/"clvmd" vg_access_mode. For "system_id" +and "tagging", they always mean exclusive activation. + +Logical volume activation mode + + + + + +The tag used for tagging activation mode. + +The tag used for tagging activation mode + + + + + + + + + + + + + +END +} + +####################################################################### + +usage() { + cat </dev/null | tr -d \') + export ${kvs} + vg_locktype=${LVM2_VG_LOCK_TYPE} + vg_clustered=${LVM2_VG_CLUSTERED} + vg_systemid=${LVM2_VG_SYSTEMID} + vg_tags=${LVM2_VG_TAGS} + + # We know this VG is using lvmlockd if the lock type is dlm. + if [ "$vg_locktype" = "dlm" ]; then + access_mode=1 + elif [ "$vg_clustered" = "clustered" ]; then + access_mode=2 + elif [ -n "$vg_systemid" ]; then + SYSTEM_ID=$(lvm systemid 2>/dev/null | cut -d':' -f2 | tr -d '[:blank:]') + access_mode=3 + elif [ -n "$vg_tags" ]; then + # TODO: + # We don't have reliable way to test if tagging activation is used. + access_mode=4 + else + access_mode=0 + fi + + return $access_mode +} + +# TODO: All tagging activation code is almost copied from LVM RA!!! +# But, the old LVM RA just uses the ordinary tags, not the "hosttag" feature +# which may be a better method for active-inactive cluster scenario. +# +# We have two choice: +# 1. Continue to use the LVM way, which may work well on old system. +# 2. Change to use the real hosttag feature, but it looks very same +# to systemID. +# Anyway, we can easily change this if anyone requests with good reasons. + +# does this vg have our tag +check_tags() +{ + local owner=$(vgs -o tags --noheadings ${VG} | tr -d '[:blank:]') + + if [ -z "$owner" ]; then + # No-one owns this VG yet + return 1 + fi + + if [ "$OUR_TAG" = "$owner" ]; then + # yep, this is ours + return 0 + fi + + # some other tag is set on this vg + return 2 +} + +strip_tags() +{ + local tag + + for tag in $(vgs --noheadings -o tags $OCF_RESKEY_volgrpname | sed s/","/" "/g); do + ocf_log info "Stripping tag, $tag" + + # LVM version 2.02.98 allows changing tags if PARTIAL + vgchange --deltag "$tag" ${VG} + done + + if [ ! -z $(vgs -o tags --noheadings ${VG} | tr -d '[:blank:]') ]; then + ocf_exit_reason "Failed to remove ownership tags from ${VG}" + exit $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +set_tags() +{ + case check_tags in + 0) + # we already own it. + return $OCF_SUCCESS + ;; + 2) + # other tags are set, strip them before setting + if ! strip_tags; then + return $OCF_ERR_GENERIC + fi + ;; + *) + : ;; + esac + + if ! vgchange --addtag $OUR_TAG ${VG} ; then + ocf_log err "Failed to add ownership tag to ${VG}" + return $OCF_ERR_GENERIC + fi + + ocf_log info "New tag \"${OUR_TAG}\" added to ${VG}" + return $OCF_SUCCESS +} + +# Parameters: +# 1st: config item name +# 2nd: expected config item value +config_verify() +{ + local name=$1 + local expect=$2 + local real="" + + real=$(lvmconfig "$name" | cut -d'=' -f2) + if [ "$real" != "$expect" ]; then + ocf_exit_reason "config item $name: expect=$expect but real=$real" + exit $OCF_ERR_CONFIGURED + + fi + + return $OCF_SUCCESS +} + +lvmlockd_check() +{ + config_verify "global/use_lvmlockd" "1" + config_verify "global/locking_type" "1" + + # We recommend to activate one LV at a time so that this specific volume + # binds to a proper filesystem to protect the data + # TODO: + # Will this warn message be too noisy? + if [ -z "$LV" ]; then + ocf_log warn "You are recommended to activate one LV at a time or use exclusive activation mode." + fi + + # Good: lvmlockd is running, and clvmd is not running + if ! pgrep lvmlockd >/dev/null 2>&1 ; then + ocf_exit_reason "lvmlockd daemon is not running!" + exit $OCF_ERR_CONFIGURED + fi + + if pgrep clvmd >/dev/null 2>&1 ; then + ocf_exit_reason "clvmd daemon is running unexpectedly." + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +clvmd_check() +{ + config_verify "global/use_lvmetad" "0" + config_verify "global/use_lvmlockd" "0" + config_verify "global/locking_type" "3" + + # TODO: + # David asked a question: does lvchange -aey works well enough with clvmd? + # + # Corey said: I think it does work well enough. We do a fair amount of exclusive + # activation clvm testing, and my experience is you'll get the LV activated on + # the node you ran the command on. But, I think the specific scenario and issue + # that surprised us all was when the LV was *already* exclusively active on say + # nodeA, and nodeB then attempts to also exclusively activate it as well. Instead + # of failing, the activation succeeds even though nodeB activation didn't occur. + # This is documented in the following bug: + # https://bugzilla.redhat.com/show_bug.cgi?id=1191724#c8 + # Technically, you're not guaranteed to have it activated on the node you run + # the cmd on, but again, that's not been my experience. + # + # Eric: Put the interesting discussion here so that we can be more careful on this. + + # Good: clvmd is running, and lvmlockd is not running + if ! pgrep clvmd >/dev/null 2>&1 ; then + ocf_exit_reason "clvmd daemon is not running!" + exit $OCF_ERR_CONFIGURED + fi + + if pgrep lvmetad >/dev/null 2>&1 ; then + ocf_exit_reason "Please stop lvmetad daemon when clvmd is running." + exit $OCF_ERR_CONFIGURED + fi + + if pgrep lvmlockd >/dev/null 2>&1 ; then + ocf_exit_reason "lvmlockd daemon is running unexpectedly." + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +systemid_check() +{ + local source + + # system_id_source is set in lvm.conf + source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2) + if [ "$source" = "" ] || [ "$source" = "none" ]; then + ocf_exit_reason "system_id_source in lvm.conf is not set correctly!" + exit $OCF_ERR_CONFIGURED + fi + + if [ -z ${SYSTEM_ID} ]; then + ocf_exit_reason "local/system_id is not set!" + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +# Verify tags setup +tagging_check() +{ + # The volume_list must be initialized to something in order to + # guarantee our tag will be filtered on startup + if ! lvm dumpconfig activation/volume_list; then + ocf_log err "LVM: Improper setup detected" + ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd" + exit $OCF_ERR_CONFIGURED + fi + + # Our tag must _NOT_ be in the volume_list. This agent + # overrides the volume_list during activation using the + # special tag reserved for cluster activation + if lvm dumpconfig activation/volume_list | grep -e "\"@${OUR_TAG}\"" -e "\"${VG}\""; then + ocf_log err "LVM: Improper setup detected" + ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"${OUR_TAG}\", or volume group, ${VG}" + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +lvm_validate() { + local lv_count + local mode + + check_binary pgrep + # Every LVM command is just symlink to lvm binary + check_binary lvm + check_binary dmsetup + + if ! vgs --foreign ${VG} >/dev/null 2>&1 ; then + # stop action exits successfully if the VG cannot be accessed... + if [ $__OCF_ACTION = "stop" ]; then + ocf_log warn "VG [${VG}] cannot be accessed, stop action exits successfully." + exit $OCF_SUCCESS + fi + + ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!" + exit $OCF_ERR_CONFIGURED + fi + + # Get the access mode from VG metadata and check if it matches the input + # value. Skip to check "tagging" mode because there's no reliable way to + # automatically check if "tagging" mode is being used. + get_VG_access_mode + mode=$? + if [ $VG_access_mode -ne 4 ] && [ $mode -ne $VG_access_mode ]; then + ocf_exit_reason "The specified vg_access_mode doesn't match the mode on VG metadata!" + exit $OCF_ERR_ARGS + fi + + # Nothing to do if the VG has no logical volume + lv_count=$(vgs --foreign -o lv_count --noheadings ${VG} 2>/dev/null) + if [ $lv_count -lt 1 ]; then + ocf_exit_reason "Volume group [$VG] doesn't contain any logical volume!" + exit $OCF_ERR_CONFIGURED + fi + + # Check if the given $LV is in the $VG + if [ -n "$LV" ]; then + OUT=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1) + if [ $? -ne 0 ]; then + ocf_log err "lvs: ${OUT}" + ocf_exit_reason "LV ($LV) is not in the given VG ($VG)." + exit $OCF_ERR_ARGS + fi + fi + + # VG_access_mode specific checking goes here + case ${VG_access_mode} in + 1) + lvmlockd_check + ;; + 2) + clvmd_check + ;; + 3) + systemid_check + ;; + + 4) + tagging_check + ;; + *) + ocf_exit_reason "Incorrect VG access mode detected!" + exit $OCF_ERR_CONFIGURED + esac + + if [ $? -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Improper configuration issue is detected!" + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +# To activate LV(s) with different "activation mode" parameters +do_activate() { + local activate_opt=$1 + + # Only activate the specific LV if it's given + if [ -n "$LV" ]; then + ocf_run lvchange $activate_opt ${VG}/${LV} + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + else + ocf_run lvchange $activate_opt ${VG} + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + fi + + return $OCF_SUCCESS +} + +lvmlockd_activate() { + # activation opt + local activate_opt + + if [ "$LV_activation_mode" = "shared" ]; then + activate_opt="-asy" + else + activate_opt="-aey" + fi + + # lvmlockd requires shared VGs to be started before they're used + ocf_run vgchange --lockstart ${VG} + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_log err "Failed to start shared VG(s), exit code: $rc" + return $OCF_ERR_GENERIC + fi + + do_activate "$activate_opt" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +# clvmd must be running to activate clustered VG +clvmd_activate() { + local activate_opt + + if [ "$LV_activation_mode" = "shared" ]; then + activate_opt="-asy" + else + activate_opt="-aey" + fi + + do_activate "$activate_opt" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +systemid_activate() { + local cur_systemid + + pvscan --cache + cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]') + + # Put our system ID on the VG + vgchange -y --config "local/extra_system_ids=[\"${cur_systemid}\"]" \ + --systemid ${SYSTEM_ID} ${VG} + + do_activate "-ay" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +tagging_activate() { + if ! set_tags ; then + ocf_log err "Failed to set tags on ${VG}." + return $OCF_ERR_GENERIC + fi + + do_activate "-ay --config activation{volume_list=[\"@${OUR_TAG}\"]}" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +lvmlockd_deactivate() { + do_activate "-an" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + OUT=$(lvs --noheadings -S lv_active=active ${VG} 2>/dev/null) + [[ -n "$OUT" ]] && return $OCF_SUCCESS + + # Close the lockspace of this VG if there is no active LV + ocf_run vgchange --lockstop ${VG} + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_log err "Failed to close the shared VG lockspace, exit code: $rc" + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +clvmd_deactivate() { + do_activate "-an" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +systemid_deactivate() { + do_activate "-an" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +tagging_deactivate() { + do_activate "-an --config activation{volume_list=[\"@${OUR_TAG}\"]}" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + if ! strip_tags ; then + ocf_log err "Failed to remove tags on ${VG}." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +# TODO: +# How can we accurately check if LVs in the given VG are all active? +# +# David: +# If we wanted to check that all LVs in the VG are active, then we would +# probably need to use the lvs/lv_live_table command here since dmsetup +# won't know about inactive LVs that should be active. +# +# Eric: +# But, lvs/lv_live_table command doesn't work well now. I tried the following +# method: +# +# lv_count=$(vgs --foreign -o lv_count --noheadings ${VG} 2>/dev/null | tr -d '[:blank:]') +# dm_count=$(dmsetup --noheadings info -c -S "vgname=${VG}" 2>/dev/null | grep -c "${VG}-") +# test $lv_count -eq $dm_count +# +# It works, but we cannot afford to use LVM command in lvm_status. LVM command is expensive +# because it may potencially scan all disks on the system, update the metadata even using +# lvs/vgs when the metadata is somehow inconsistent. +# +# So, we have to make compromise that the VG is assumably active if any LV of the VG is active. +lvm_status() { + local dm_count + + if [ -n "${LV}" ]; then + # dmsetup ls? It cannot accept device name. It's + # too heavy to list all DM devices. + dmsetup info --noheadings --noflush -c -S "vgname=${VG} && lvname=${LV}" \ + | grep -Eq "${VG}-+${LV}" + else + dm_count=$(dmsetup --noheadings info -c -S "vgname=${VG}" 2>/dev/null | grep -c "${VG}-") + test $dm_count -gt 0 + fi + + if [ $? -ne 0 ]; then + return $OCF_NOT_RUNNING + fi + + return $OCF_SUCCESS +} + +lvm_start() { + local rc + local vol + + if lvm_status ; then + ocf_log info "${vol}: is already active." + return $OCF_SUCCESS + fi + + [ -z ${LV} ] && vol=${VG} || vol=${VG}/${LV} + ocf_log info "Activating ${vol}" + + case ${VG_access_mode} in + 1) + lvmlockd_activate + ;; + 2) + clvmd_activate + ;; + 3) + systemid_activate + ;; + 4) + tagging_activate + ;; + *) + ocf_exit_reason "VG [${VG}] is not properly configured in cluster. It's unsafe!" + exit $OCF_ERR_CONFIGURED + ;; + esac + + rc=$? + if lvm_status ; then + ocf_log info "${vol}: activated successfully." + return $OCF_SUCCESS + else + ocf_exit_reason "${vol}: failed to activate." + return $rc + fi +} + +# Deactivate LVM volume(s) +lvm_stop() { + local vol + + [[ -z ${LV} ]] && vol=${VG} || vol=${VG}/${LV} + + if ! lvm_status ; then + ocf_log info "${vol}: has already been deactivated." + return $OCF_SUCCESS + fi + + ocf_log info "Deactivating ${vol}" + + if ! vgs ${VG} >/dev/null 2>&1 ; then + ocf_log info "Volume group ${VG} not found. Nothing to deactivate." + return $OCF_SUCCESS + fi + + case ${VG_access_mode} in + 1) + lvmlockd_deactivate + ;; + 2) + clvmd_deactivate + ;; + 3) + systemid_deactivate + ;; + 4) + tagging_deactivate + ;; + *) + ocf_exit_reason "VG [${VG}] is not properly configured in cluster. It's unsafe!" + exit $OCF_ERR_CONFIGURED + ;; + esac + + if ! lvm_status ; then + ocf_log info "${vol}: deactivated successfully." + return $OCF_SUCCESS + else + ocf_exit_reason "${vol}: failed to deactivate." + return $OCF_ERR_GENERIC + fi +} + +# +# MAIN +# + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +usage|help) usage + exit $OCF_SUCCESS + ;; +esac + +# Parameters checking +if [ -z "$VG" ] +then + ocf_exit_reason "You must identify the volume group name!" + exit $OCF_ERR_ARGS +fi + +if [ "$LV_activation_mode" != "shared" ] && [ "$LV_activation_mode" != "exclusive" ] +then + ocf_exit_reason "Invalid value for activation_mode: $LV_activation_mode" + exit $OCF_ERR_ARGS +fi + +# Convert VG_access_mode from string to index +case ${VG_access_mode} in +lvmlockd) + VG_access_mode=1 + ;; +clvmd) + VG_access_mode=2 + ;; +system_id) + VG_access_mode=3 + ;; +tagging) + VG_access_mode=4 + ;; +*) + ocf_exit_reason "You specified an invalid value for vg_access_mode: $VG_access_mode" + exit $OCF_ERR_ARGS + ;; +esac + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start) + lvm_validate + lvm_start + ;; + +stop) + lvm_validate + lvm_stop + ;; +monitor) + lvm_status + ;; +validate-all) + lvm_validate + ;; +*) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? + +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am index 0cb3864ca..1d71ecf0f 100644 --- a/heartbeat/Makefile.am +++ b/heartbeat/Makefile.am @@ -1,179 +1,180 @@ # Makefile.am for OCF RAs # # Author: Sun Jing Dong # Copyright (C) 2004 IBM # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(ocf_SCRIPTS) $(ocfcommon_DATA) \ $(common_DATA) $(hb_DATA) $(dtd_DATA) \ README AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/linux-ha halibdir = $(libexecdir)/heartbeat ocfdir = $(OCF_RA_DIR_PREFIX)/heartbeat dtddir = $(datadir)/$(PACKAGE_NAME) dtd_DATA = ra-api-1.dtd metadata.rng if USE_IPV6ADDR_AGENT ocf_PROGRAMS = IPv6addr else ocf_PROGRAMS = endif if IPV6ADDR_COMPATIBLE halib_PROGRAMS = send_ua else halib_PROGRAMS = endif IPv6addr_SOURCES = IPv6addr.c IPv6addr_utils.c send_ua_SOURCES = send_ua.c IPv6addr_utils.c IPv6addr_LDADD = -lplumb $(LIBNETLIBS) send_ua_LDADD = $(LIBNETLIBS) ocf_SCRIPTS = AoEtarget \ AudibleAlarm \ ClusterMon \ CTDB \ Delay \ Dummy \ EvmsSCC \ Evmsd \ Filesystem \ ICP \ IPaddr \ IPaddr2 \ IPsrcaddr \ LVM \ LinuxSCSI \ lvmlockd \ + LVM-activate \ MailTo \ ManageRAID \ ManageVE \ NodeUtilization \ Pure-FTPd \ Raid1 \ Route \ SAPDatabase \ SAPInstance \ SendArp \ ServeRAID \ SphinxSearchDaemon \ Squid \ Stateful \ SysInfo \ VIPArip \ VirtualDomain \ WAS \ WAS6 \ WinPopup \ Xen \ Xinetd \ ZFS \ anything \ apache \ asterisk \ aws-vpc-move-ip \ aws-vpc-route53 \ awseip \ awsvip \ clvm \ conntrackd \ db2 \ dhcpd \ dnsupdate \ docker \ eDir88 \ ethmonitor \ exportfs \ fio \ galera \ garbd \ iSCSILogicalUnit \ iSCSITarget \ ids \ iface-bridge \ iface-vlan \ iscsi \ jboss \ kamailio \ lxc \ minio \ mysql \ mysql-proxy \ nagios \ named \ nfsnotify \ nfsserver \ nginx \ oraasm \ oracle \ oralsnr \ ovsmonitor \ pgagent \ pgsql \ pingd \ portblock \ postfix \ pound \ proftpd \ rabbitmq-cluster \ redis \ rkt \ rsyncd \ rsyslog \ scsi2reservation \ sfex \ sg_persist \ slapd \ symlink \ syslog-ng \ tomcat \ varnish \ vmware \ vsftpd \ zabbixserver ocfcommondir = $(OCF_LIB_DIR_PREFIX)/heartbeat ocfcommon_DATA = ocf-shellfuncs \ ocf-binaries \ ocf-directories \ ocf-returncodes \ ocf-rarun \ ocf-distro \ apache-conf.sh \ http-mon.sh \ sapdb-nosha.sh \ sapdb.sh \ ora-common.sh \ mysql-common.sh \ nfsserver-redhat.sh \ findif.sh # Legacy locations hbdir = $(sysconfdir)/ha.d hb_DATA = shellfuncs check: $(ocf_SCRIPTS:=.check) %.check: % OCF_ROOT=$(abs_srcdir) OCF_FUNCTIONS_DIR=$(abs_srcdir) ./$< meta-data | xmllint --path $(abs_srcdir) --noout --relaxng $(abs_srcdir)/metadata.rng - diff --git a/heartbeat/lvmlockd b/heartbeat/lvmlockd index 8d112e09f..645cf81e7 100755 --- a/heartbeat/lvmlockd +++ b/heartbeat/lvmlockd @@ -1,438 +1,352 @@ -#!/bin/bash +#!/bin/sh # # # lvmlockd OCF Resource Agent # # Copyright (c) 2017 SUSE LINUX, Eric Ren # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 This agent manages the lvmlockd daemon. "lvmlockd" is like "clvmd". Both are used by LVM commands to coordinate access to shared storage, but with different design and implementations. "lvmlockd" can use two lock managers: dlm and sanlock. This agent only supports "dlm + lvmlockd". If dlm (or corosync) are already being used by other cluster software, you are advised to select dlm, then configure "controld" resource agent for dlm and this agent for "lvmlockd". Otherwise, consider sanlock for "lvmlockd" if dlm/corosync is not required. For more information, refer to manpage lvmlockd.8. This agent manages the lvmlockd daemon pid file pid file Set the socket path to listen on. socket path Write log messages from this level up to syslog. syslog priority Adopt locks from a previous instance of lvmlockd. Adopt locks from a previous instance of lvmlockd - - - -Whether or not to activate all shared volume groups after starting -the lvmlockd. Note that shared volume groups will always be deactivated -before the lvmlockd stops regardless of what this option is set to. - -Activate volume groups - - - END } ####################################################################### : ${OCF_RESKEY_pidfile:="/run/lvmlockd.pid"} -: ${OCF_RESKEY_activate_vgs:="true"} LOCKD="lvmlockd" # 0.5s sleep each count TIMEOUT_COUNT=20 usage() { cat </dev/null 2>&1 fi } silent_status() { local pid=$(get_pid) if [ -n "$pid" ] ; then daemon_is_running "$pid" else # No pid file false fi } -shared_vgs() -{ - local vg_list=() - # the 6th attr bit is either (c)lustered or (s)hared - local offset=5 - - while read -r vg attr ; do - if [ "${attr:$offset:1}" = "s" ] ; then - vg_list=(${vg_list[@]} $vg) - fi - done <<< "$(vgs --noheadings -o name,attr 2>/dev/null)" - - echo "${vg_list[@]}" -} - -# Immdiately exit shell on error, if it retures, it must be successful -deactivate_all_vgs() -{ - # Try to deactivate all volume groups, before stop shared VG(s) - # and lvmlockd. If fails, some logical volumes are still being - # used. In such case, we exit immdiately, leaving lvmlockd running. - # We cannot kill lvmlockd forcely because that will leave lockspaces/ - # locks behind. - ocf_log info "Deactivating shared volume groups..." - ocf_run vgchange -an $(shared_vgs) - if [ $? -ne $OCF_SUCCESS ] ; then - ocf_log info "Failed to deactivate VG(s)." - exit $OCF_ERR_GENERIC - fi - - return $OCF_SUCCESS -} - -activate_all_vgs() -{ - if ! ocf_is_true "$OCF_RESKEY_activate_vgs" ; then - ocf_log info "\"activate_vgs\" is set to $OCF_RESKEY_activate_vgs, skipping activation." - return $OCF_SUCCESS - fi - - ocf_log info "Activating all shared volume groups..." - # FIXME: - # David Teigland (lvmlockd feature author) suggests to use "aay", i.e. - # auto-activation here. I gave it a try. The problem are: - # 1) It will try to activate every VGs if auto_activation_volume_list is - # _not_ defined; the command returns error code if any clustered volume - # on the system; - # 2) The command will get error output like "LV locked by other host: xxx" - # when the LVs have been activated on other nodes using "-aay". - # - # I think it makes sense to only activate shared LVs by this RA. The behavior - # is same as clvmd. If the issues above disappear and someone raises a strong - # reason in the future, we can change this that time. - ocf_run vgchange -asy $(shared_vgs) - if [ $? -ne $OCF_SUCCESS ] ; then - ocf_log info "Failed to activate shared VG(s):" - lvmlockd_stop - exit $OCF_ERR_GENERIC - fi - - return $OCF_SUCCESS -} - check_config() { local out="" local use_lvmlockd="" local lock_type="" # To use lvmlockd, ensure configure lvm.conf: # locking_type = 1 # use_lvmlockd = 1 out=$(lvmconfig 'global/use_lvmlockd') use_lvmlockd=$(echo "$out" | cut -d'=' -f2) out=$(lvmconfig 'global/locking_type') lock_type=$(echo "$out" | cut -d'=' -f2) if [ "$use_lvmlockd" -ne 1 ] ; then ocf_log info "lvmlockd is not enabled, please ensure \"use_lvmlockd=1\"" fi if [ "$lock_type" -ne 1 ] ; then ocf_log info "locking type is wrong, please ensure \"locking_type=1\"" fi if [ "$use_lvmlockd" -ne 1 ] || [ "$lock_type" -ne 1 ] ; then ocf_exit_reason "Improper configuration to use lvmlockd." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } check_dlm_controld() { local pid="" # dlm daemon should have only one instance, but for safe... pid=$(pgrep dlm_controld | head -n1) if ! daemon_is_running $pid ; then ocf_exit_reason "DLM is not running. Is it configured?" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } lvmlockd_start() { local extras="" ocf_log info "checking config settings for ${LOCKD}..." check_config ocf_log info "checking if DLM is started first..." check_dlm_controld if silent_status ; then ocf_log info "${LOCKD} already started (pid=$(get_pid))" - activate_all_vgs return $OCF_SUCCESS fi if [ ! -z "$OCF_RESKEY_socket_path" ] ; then extras="$extras -s ${OCF_RESKEY_socket_path}" fi if [ ! -z "$OCF_RESKEY_syslog_priority" ] ; then extras="$extras -S ${OCF_RESKEY_syslog_priority}" fi if [ ! -z "$OCF_RESKEY_adopt" ] ; then extras="$extras -A ${OCF_RESKEY_adopt}" else # Inside lvmlockd daemon, this option defaults to 0. But, we # want it defaults to 1 for resource agent. When RA monitor pulls # this daemon up, we expect it to adopt locks from a previous # instance of lvmlockd. extras="$extras -A 1" fi # This client only support "dlm" lock manager extras="$extras -g dlm" ocf_log info "starting ${LOCKD}..." ocf_run ${LOCKD} -p ${OCF_RESKEY_pidfile} $extras rc=$? if [ $rc -ne $OCF_SUCCESS ] ; then ocf_exit_reason "Failed to start ${LOCKD}, exit code: $rc" return $OCF_ERR_GENERIC fi - # lvmlockd requires shared VGs to be started before they're used - ocf_log info "start lockspaces of shared VG(s)..." - ocf_run vgchange --lockstart $(shared_vgs) - rc=$? - if [ $rc -ne $OCF_SUCCESS ] ; then - ocf_exit_reason "Failed to start shared VG(s), exit code: $rc" - return $OCF_ERR_GENERIC - fi - - activate_all_vgs return $OCF_SUCCESS } # Each shared VG has its own lockspace. Besides, lvm_global lockspace # is for global use, and it should be the last one to close. It should # be enough to only check on lvm_global. wait_lockspaces_close() { local retries=0 ocf_log info "Waiting for all lockspaces to be closed" while [ $retries -lt "$TIMEOUT_COUNT" ] do if ! dlm_tool ls lvm_global | grep -Eqs "^name[[:space:]]+lvm_global" ; then return $OCF_SUCCESS fi sleep 0.5 retries=$((retries + 1)) done ocf_exit_reason "Failed to close all lockspaces clearly" exit $OCF_ERR_GENERIC } kill_stop() { local pid=$1 local retries=0 ocf_log info "Killing ${LOCKD} (pid=$pid)" while daemon_is_running $pid && [ $retries -lt "$TIMEOUT_COUNT" ] do if [ $retries -ne 0 ] ; then # don't sleep on the first try sleep 0.5 fi kill -s TERM $pid >/dev/null 2>&1 retries=$((retries + 1)) done } lvmlockd_stop() { local pid="" if ! silent_status ; then ocf_log info "${LOCKD} is not running" return $OCF_SUCCESS fi - deactivate_all_vgs - - # lvmlockd requires shared VGs to be started before they're used - ocf_log info "stop the lockspaces of shared VG(s)..." - ocf_run vgchange --lockstop $(shared_vgs) - rc=$? - if [ $rc -ne $OCF_SUCCESS ] ; then - ocf_exit_reason "Failed to stop VG(s), exit code: $rc" - return $OCF_ERR_GENERIC + if [ -n "$(dlm_tool ls)" ]; then + # We are going to stop lvmlockd, at this moment, we hope all shared VG have + # been deactivated, otherwise we are in trouble: the stop action will fail! + ocf_log info "stop the lockspaces of shared VG(s)..." + ocf_run lvmlockctl --stop-lockspaces + rc=$? + if [ $rc -ne $OCF_SUCCESS ] ; then + ocf_exit_reason "Failed to close lockspace, exit code: $rc" + return $OCF_ERR_GENERIC + fi fi wait_lockspaces_close pid=$(get_pid) kill_stop $pid if silent_status ; then ocf_exit_reason "Failed to stop, ${LOCKD}[$pid] still running." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } lvmlockd_monitor() { if silent_status ; then return $OCF_SUCCESS fi ocf_log info "${LOCKD} not running" return $OCF_NOT_RUNNING } lvmlockd_validate() { check_binary ${LOCKD} - check_binary vgchange - check_binary vgs - check_binary lvmconfig + check_binary lvm check_binary dlm_tool check_binary pgrep + check_binary lvmlockctl return $OCF_SUCCESS } # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac # Anything other than meta-data and usage must pass validation lvmlockd_validate || exit $? # Translate each action into the appropriate function call case $__OCF_ACTION in start) lvmlockd_start ;; stop) lvmlockd_stop ;; monitor) lvmlockd_monitor ;; validate-all) lvmlockd_validate ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc