diff --git a/rgmanager/src/resources/lvm_by_lv.sh b/rgmanager/src/resources/lvm_by_lv.sh index 6e164cb29..915a7e29d 100644 --- a/rgmanager/src/resources/lvm_by_lv.sh +++ b/rgmanager/src/resources/lvm_by_lv.sh @@ -1,441 +1,504 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # lv_verify # # Verify the parameters passed in # lv_verify() { # Anything to verify? Perhaps the names? return $OCF_SUCCESS } +restore_transient_failed_pvs() +{ + local a=0 + local -a results + + results=(`pvs -o name,vg_name,attr --noheadings | grep $OCF_RESKEY_vg_name | grep -v 'unknown device'`) + while [ ! -z "${results[$a]}" ] ; do + if [[ ${results[$(($a + 2))]} =~ ..m ]] && + [ $OCF_RESKEY_vg_name == ${results[$(($a + 1))]} ]; then + ocf_log notice "Attempting to restore missing PV, ${results[$a]} in $OCF_RESKEY_vg_name" + vgextend --restoremissing $OCF_RESKEY_vg_name ${results[$a]} + if [ $? -ne 0 ]; then + ocf_log notice "Failed to restore ${results[$a]}" + else + ocf_log notice " ${results[$a]} restored" + fi + fi + a=$(($a + 3)) + done +} + # lv_exec_resilient # # Sometimes, devices can come back. Their metadata will conflict # with the good devices that remain. This function filters out those # failed devices when executing the given command # # Finishing with vgscan resets the cache/filter lv_exec_resilient() { declare command=$1 declare all_pvs ocf_log notice "Making resilient : $command" if [ -z "$command" ]; then ocf_log err "lv_exec_resilient: Arguments not supplied" return $OCF_ERR_ARGS fi # pvs will print out only those devices that are valid # If a device dies and comes back, it will not appear # in pvs output (but you will get a Warning). all_pvs=(`pvs --noheadings -o pv_name | grep -v Warning`) # Now we use those valid devices in a filter which we set up. # The device will then be activated because there are no # metadata conflicts. command=$command" --config devices{filter=[" for i in ${all_pvs[*]}; do command=$command'"a|'$i'|",' done command=$command"\"r|.*|\"]}" ocf_log notice "Resilient command: $command" if ! $command ; then ocf_log err "lv_exec_resilient failed" vgscan return $OCF_ERR_GENERIC else vgscan return $OCF_SUCCESS fi } # lv_activate_resilient # # Sometimes, devices can come back. Their metadata will conflict # with the good devices that remain. We must filter out those # failed devices when trying to reactivate lv_activate_resilient() { declare action=$1 declare lv_path=$2 declare op="-ay" if [ -z "$action" ] || [ -z "$lv_path" ]; then ocf_log err "lv_activate_resilient: Arguments not supplied" return $OCF_ERR_ARGS fi if [ $action != "start" ]; then op="-an" + elif [[ "$(lvs -o attr --noheadings $lv_path)" =~ r.......p ]] || + [[ "$(lvs -o attr --noheadings $lv_path)" =~ R.......p ]]; then + # We can activate partial RAID LVs and run just fine. + ocf_log notice "Attempting activation of partial RAID LV, $lv_path" + op="-ay --partial" fi if ! lv_exec_resilient "lvchange $op $lv_path" ; then ocf_log err "lv_activate_resilient $action failed on $lv_path" return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi } lv_status_clustered() { # # Check if device is active # if [[ ! "$(lvs -o attr --noheadings $lv_path)" =~ ....a. ]]; then return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # lv_status # # Is the LV active? lv_status_single() { declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" declare dev="/dev/$lv_path" declare realdev declare owner declare my_name # # Check if device is active # if [[ ! "$(lvs -o attr --noheadings $lv_path)" =~ ....a. ]]; then return $OCF_ERR_GENERIC fi if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." return $OCF_SUCCESS fi # # Check if all links/device nodes are present # if [ -h "$dev" ]; then realdev=$(readlink -f $dev) if [ $? -ne 0 ]; then ocf_log err "Failed to follow link, $dev" return $OCF_ERR_ARGS fi if [ ! -b $realdev ]; then ocf_log err "Device node for $lv_path is not present" return $OCF_ERR_GENERIC fi else ocf_log err "Symbolic link for $lv_path is not present" return $OCF_ERR_GENERIC fi # # Verify that we are the correct owner # owner=`lvs -o tags --noheadings $lv_path` my_name=$(local_node_name) if [ -z "$my_name" ]; then ocf_log err "Unable to determine local machine name" # FIXME: I don't really want to fail on 1st offense return $OCF_SUCCESS fi if [ -z "$owner" ] || [ "$my_name" != "$owner" ]; then ocf_log err "WARNING: $lv_path should not be active" ocf_log err "WARNING: $my_name does not own $lv_path" ocf_log err "WARNING: Attempting shutdown of $lv_path" lv_activate_resilient "stop" $lv_path return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } function lv_status { # We pass in the VG name to see of the logical volume is clustered if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then lv_status_clustered else lv_status_single fi } # lv_activate_and_tag lv_activate_and_tag() { declare action=$1 declare tag=$2 declare lv_path=$3 typeset self_fence="" case ${OCF_RESKEY_self_fence} in "yes") self_fence=1 ;; 1) self_fence=1 ;; *) self_fence="" ;; esac if [ -z "$action" ] || [ -z "$tag" ] || [ -z "$lv_path" ]; then ocf_log err "Supplied args: 1) $action, 2) $tag, 3) $lv_path" return $OCF_ERR_ARGS fi if [ "$action" == "start" ]; then ocf_log notice "Activating $lv_path" lvchange --addtag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Unable to add tag to $lv_path" return $OCF_ERR_GENERIC fi if ! lv_activate_resilient $action $lv_path; then ocf_log err "Unable to activate $lv_path" return $OCF_ERR_GENERIC fi else ocf_log notice "Deactivating $lv_path" if ! lv_activate_resilient $action $lv_path; then if [ "$self_fence" ]; then ocf_log err "Unable to deactivate $lv_path: REBOOTING" sync reboot -fn else ocf_log err "Unable to deactivate $lv_path" fi return $OCF_ERR_GENERIC fi ocf_log notice "Removing ownership tag ($tag) from $lv_path" lvchange --deltag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Unable to delete tag from $lv_path" # Newer versions of LVM require the missing PVs to # be removed from the VG via a separate call before # the tag can be removed. ocf_log err "Attempting volume group clean-up and retry" vgreduce --removemissing --force $OCF_RESKEY_vg_name # Retry tag deletion lvchange --deltag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Failed to delete tag from $lv_path" return $OCF_ERR_GENERIC fi fi if [ "`lvs --noheadings -o lv_tags $lv_path`" == $tag ]; then ocf_log notice "Removing ownership tag ($tag) from $lv_path" lvchange --deltag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Unable to delete tag from $lv_path" return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } # lv_activate # $1: start/stop only # # Basically, if we want to [de]activate an LVM volume, # we must own it. That means that our tag must be on it. # This requires a change to /etc/lvm/lvm.conf: # volume_list = [ "root_volume", "@my_hostname" ] # where "root_volume" is your root volume group and # "my_hostname" is $(local_node_name) # # If there is a node failure, we may wish to "steal" the # LV. For that, we need to check if the node that owns # it is still part of the cluster. We use the tag to # determine who owns the volume then query for their # liveness. If they are dead, we can steal. lv_activate() { declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" declare owner=`lvs -o tags --noheadings $lv_path` declare my_name=$(local_node_name) if [ -z "$my_name" ]; then ocf_log err "Unable to determine cluster node name" return $OCF_ERR_GENERIC fi # # FIXME: This code block is repeated below... might be # nice to put it in a function # if [ ! -z $owner ] && [ $owner != $my_name ]; then if is_node_member_clustat $owner ; then ocf_log err "$owner owns $lv_path unable to $1" return $OCF_ERR_GENERIC fi ocf_log notice "Owner of $lv_path is not in the cluster" ocf_log notice "Stealing $lv_path" lvchange --deltag $owner $lv_path if [ $? -ne 0 ]; then ocf_log err "Failed to steal $lv_path from $owner" return $OCF_ERR_GENERIC fi # Warning --deltag doesn't always result in failure if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then ocf_log err "Failed to steal $lv_path from $owner." return $OCF_ERR_GENERIC fi fi + # If this is a partial VG, attempt to + # restore any transiently failed PVs + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ ...p ]]; then + ocf_log err "Volume group \"$OCF_RESKEY_vg_name\" has PVs marked as missing" + restore_transient_failed_pvs + fi + if ! lv_activate_and_tag $1 $my_name $lv_path; then ocf_log err "Failed to $1 $lv_path" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if vgreduce --removemissing --force --config \ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ $OCF_RESKEY_vg_name; then ocf_log notice "$OCF_RESKEY_vg_name now consistent" owner=`lvs -o tags --noheadings $lv_path` if [ ! -z $owner ] && [ $owner != $my_name ]; then if is_node_member_clustat $owner ; then ocf_log err "$owner owns $lv_path unable to $1" return $OCF_ERR_GENERIC fi ocf_log notice "Owner of $lv_path is not in the cluster" ocf_log notice "Stealing $lv_path" lvchange --deltag $owner $lv_path if [ $? -ne 0 ]; then ocf_log err "Failed to steal $lv_path from $owner" return $OCF_ERR_GENERIC fi # Warning --deltag doesn't always result in failure if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then ocf_log err "Failed to steal $lv_path from $owner." return $OCF_ERR_GENERIC fi fi if ! lv_activate_and_tag $1 $my_name $lv_path; then ocf_log err "Failed second attempt to $1 $lv_path" return $OCF_ERR_GENERIC else ocf_log notice "Second attempt to $1 $lv_path successful" return $OCF_SUCCESS fi else ocf_log err "Failed to $1 $lv_path" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } function lv_start_clustered { - if ! lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then - ocf_log err "Failed to activate logical volume, $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" - ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + if lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + return $OCF_SUCCESS + fi - if ! lvconvert --repair --use-policies $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then - ocf_log err "Failed to cleanup $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + # FAILED exclusive activation: + # This can be caused by an LV being active remotely. + # Before attempting a repair effort, we should attempt + # to deactivate the LV cluster-wide; but only if the LV + # is not open. Otherwise, it is senseless to attempt. + if ! [[ "$(lvs -o attr --noheadings $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name)" =~ ....ao ]]; then + # We'll wait a small amount of time for some settling before + # attempting to deactivate. Then the deactivate will be + # immediately followed by another exclusive activation attempt. + sleep 5 + if ! lvchange -an $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + # Someone could have the device open. + # We can't do anything about that. + ocf_log err "Unable to perform required deactivation of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name before starting" return $OCF_ERR_GENERIC fi - if ! lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then - ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" - return $OCF_ERR_GENERIC + if lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + # Second attempt after deactivation was successful, we now + # have the lock exclusively + return $OCF_SUCCESS fi + fi - ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name successful" - return $OCF_SUCCESS + # Failed to activate: + # This could be due to a device failure (or another machine could + # have snuck in between the deactivation/activation). We don't yet + # have a mechanism to check for remote activation, so we will proceed + # with repair action. + ocf_log err "Failed to activate logical volume, $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + + if ! lvconvert --repair --use-policies $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + ocf_log err "Failed to cleanup $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + return $OCF_ERR_GENERIC fi + + if ! lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + return $OCF_ERR_GENERIC + fi + + ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name successful" return $OCF_SUCCESS } function lv_start_single { if ! lvs $OCF_RESKEY_vg_name >& /dev/null; then lv_count=0 else lv_count=`lvs --noheadings -o name $OCF_RESKEY_vg_name | grep -v _mlog | grep -v _mimage | grep -v nconsistent | wc -l` fi if [ $lv_count -gt 1 ]; then ocf_log err "HA LVM requires Only one logical volume per volume group." ocf_log err "There are currently $lv_count logical volumes in $OCF_RESKEY_vg_name" ocf_log err "Failing HA LVM start of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" exit $OCF_ERR_GENERIC fi if ! lv_activate start; then return 1 fi return 0 } function lv_start { # We pass in the VG name to see of the logical volume is clustered if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then lv_start_clustered else lv_start_single fi } function lv_stop_clustered { lvchange -aln $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name } function lv_stop_single { if ! lv_activate stop; then return 1 fi return 0 } function lv_stop { # We pass in the VG name to see of the logical volume is clustered if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then lv_stop_clustered else lv_stop_single fi } diff --git a/rgmanager/src/resources/lvm_by_vg.sh b/rgmanager/src/resources/lvm_by_vg.sh index be027e3d1..f32445e91 100644 --- a/rgmanager/src/resources/lvm_by_vg.sh +++ b/rgmanager/src/resources/lvm_by_vg.sh @@ -1,438 +1,478 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # vg_owner # # Returns: # 1 == We are the owner # 2 == We can claim it # 0 == Owned by someone else function vg_owner { local owner=`vgs -o tags --noheadings $OCF_RESKEY_vg_name` local my_name=$(local_node_name) if [ -z "$my_name" ]; then ocf_log err "Unable to determine cluster node name" return 0 fi if [ -z "$owner" ]; then # No-one owns this VG yet, so we can claim it return 2 fi if [ $owner != $my_name ]; then if is_node_member_clustat $owner ; then return 0 fi return 2 fi return 1 } function _strip_tags { local i for i in `vgs --noheadings -o tags $OCF_RESKEY_vg_name | sed s/","/" "/g`; do ocf_log info "Stripping tag, $i" vgchange --deltag $i $OCF_RESKEY_vg_name done if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_vg_name` ]; then ocf_log err "Failed to remove ownership tags from $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } function strip_tags { if ! _strip_tags; then ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if ! vgreduce --removemissing --force --config \ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ $OCF_RESKEY_vg_name; then ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" return $OCF_ERR_GENERIC fi ocf_log notice "Cleanup of $OCF_RESKEY_vg_name successful" fi if ! _strip_tags; then ocf_log err "Failed 2nd attempt to remove tags from, $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } function strip_and_add_tag { if ! strip_tags; then ocf_log err "Failed to remove tags from volume group, $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi vgchange --addtag $(local_node_name) $OCF_RESKEY_vg_name if [ $? -ne 0 ]; then ocf_log err "Failed to add ownership tag to $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ocf_log info "New tag \"$(local_node_name)\" added to $OCF_RESKEY_vg_name" return $OCF_SUCCESS } function vg_status_clustered { return $OCF_SUCCESS } # vg_status # # Are all the LVs active? function vg_status_single { local i local dev local my_name=$(local_node_name) # # Check that all LVs are active # for i in `lvs $OCF_RESKEY_vg_name --noheadings -o attr`; do if [[ ! $i =~ ....a. ]]; then return $OCF_ERR_GENERIC fi done # # Check if all links/device nodes are present # for i in `lvs $OCF_RESKEY_vg_name --noheadings -o name`; do dev="/dev/$OCF_RESKEY_vg_name/$i" if [ -h $dev ]; then realdev=$(readlink -f $dev) if [ $? -ne 0 ]; then ocf_log err "Failed to follow link, $dev" return $OCF_ERR_GENERIC fi if [ ! -b $realdev ]; then ocf_log err "Device node for $dev is not present" return $OCF_ERR_GENERIC fi else ocf_log err "Symbolic link for $lv_path is not present" return $OCF_ERR_GENERIC fi done # # Verify that we are the correct owner # vg_owner if [ $? -ne 1 ]; then ocf_log err "WARNING: $OCF_RESKEY_vg_name should not be active" ocf_log err "WARNING: $my_name does not own $OCF_RESKEY_vg_name" ocf_log err "WARNING: Attempting shutdown of $OCF_RESKEY_vg_name" # FIXME: may need more force to shut this down vgchange -an $OCF_RESKEY_vg_name return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ## # Main status function for volume groups ## function vg_status { if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then vg_status_clustered else vg_status_single fi } function vg_verify { # Anything to verify? return $OCF_SUCCESS } function vg_start_clustered { local a local results local all_pvs local resilience + local try_again=false ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" if ! vgchange -aey $OCF_RESKEY_vg_name; then + try_again=true + + # Failure to activate: + # This could be caused by a remotely active LV. Before + # attempting any repair of the VG, we will first attempt + # to deactivate the VG cluster-wide. + # We must check for open LVs though, since these cannot + # be deactivated. We have no choice but to go one-by-one. + + # Allow for some settling + sleep 5 + + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) + a=0 + while [ ! -z "${results[$a]}" ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....ao ]]; then + if ! lvchange -an $OCF_RESKEY_vg_name/${results[$a]}; then + ocf_log err "Unable to perform required deactivation of $OCF_RESKEY_vg_name before starting" + return $OCF_ERR_GENERIC + fi + fi + a=$(($a + 2)) + done + fi + + if try_again && ! vgchange -aey $OCF_RESKEY_vg_name; then ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if ! vgreduce --removemissing --force $OCF_RESKEY_vg_name; then ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" return $OCF_ERR_GENERIC fi if ! vgchange -aey $OCF_RESKEY_vg_name; then ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful" return $OCF_SUCCESS else # The activation commands succeeded, but did they do anything? # Make sure all the logical volumes are active results=(`lvs -o name,attr --noheadings 2> /dev/null $OCF_RESKEY_vg_name`) a=0 - while [ ! -z ${results[$a]} ]; do + while [ ! -z "${results[$a]}" ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then all_pvs=(`pvs --noheadings -o name 2> /dev/null`) resilience=" --config devices{filter=[" for i in ${all_pvs[*]}; do resilience=$resilience'"a|'$i'|",' done resilience=$resilience"\"r|.*|\"]}" vgchange -aey $OCF_RESKEY_vg_name $resilience break fi a=$(($a + 2)) done # We need to check the LVs again if we made the command resilient if [ ! -z "$resilience" ]; then results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then ocf_log err "Failed to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi a=$(($a + 2)) done ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" fi fi return $OCF_SUCCESS } function vg_start_single { local a local results local all_pvs local resilience ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" vg_owner case $? in 0) ocf_log info "Someone else owns this volume group" return $OCF_ERR_GENERIC ;; 1) ocf_log info "I own this volume group" ;; 2) ocf_log info "I can claim this volume group" ;; esac if ! strip_and_add_tag || ! vgchange -ay $OCF_RESKEY_vg_name; then ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if ! vgreduce --removemissing --force --config \ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ $OCF_RESKEY_vg_name; then ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" return $OCF_ERR_GENERIC fi ocf_log notice "Cleanup of $OCF_RESKEY_vg_name successful" if ! strip_and_add_tag || ! vgchange -ay $OCF_RESKEY_vg_name; then ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful" return $OCF_SUCCESS else # The activation commands succeeded, but did they do anything? # Make sure all the logical volumes are active results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then all_pvs=(`pvs --noheadings -o name 2> /dev/null`) resilience=" --config devices{filter=[" for i in ${all_pvs[*]}; do resilience=$resilience'"a|'$i'|",' done resilience=$resilience"\"r|.*|\"]}" vgchange -ay $OCF_RESKEY_vg_name $resilience break fi a=$(($a + 2)) done # We need to check the LVs again if we made the command resilient if [ ! -z "$resilience" ]; then results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then ocf_log err "Failed to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi a=$(($a + 2)) done ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" fi fi return $OCF_SUCCESS } ## # Main start function for volume groups ## function vg_start { + local a=0 + local results + + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) + while [ ! -z ${results[$a]} ]; do + if [[ ! ${results[$(($a + 1))]} =~ ^r ]] || + [[ ! ${results[$(($a + 1))]} =~ ^R ]]; then + ocf_log err "RAID LVs are not supported without an 'lv_name' specification" + return $OCF_ERR_GENERIC + fi + a=$(($a + 2)) + done + if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then vg_start_clustered else vg_start_single fi } function vg_stop_clustered { local a local results typeset self_fence="" case ${OCF_RESKEY_self_fence} in "yes") self_fence=1 ;; 1) self_fence=1 ;; *) self_fence="" ;; esac # Shut down the volume group # Do we need to make this resilient? vgchange -aln $OCF_RESKEY_vg_name # Make sure all the logical volumes are inactive results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then if [ "$self_fence" ]; then ocf_log err "Unable to deactivate $lv_path REBOOT" sync reboot -fn else ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" fi return $OCF_ERR_GENERIC fi a=$(($a + 2)) done return $OCF_SUCCESS } function vg_stop_single { local a local results typeset self_fence="" case ${OCF_RESKEY_self_fence} in "yes") self_fence=1 ;; 1) self_fence=1 ;; *) self_fence="" ;; esac # Shut down the volume group # Do we need to make this resilient? vgchange -an $OCF_RESKEY_vg_name # Make sure all the logical volumes are inactive results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then if [ "$self_fence" ]; then ocf_log err "Unable to deactivate $lv_path REBOOT" sync reboot -fn else ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" fi return $OCF_ERR_GENERIC fi a=$(($a + 2)) done # Make sure we are the owner before we strip the tags vg_owner if [ $? -ne 0 ]; then strip_tags fi return $OCF_SUCCESS } ## # Main stop function for volume groups ## function vg_stop { if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then vg_stop_clustered else vg_stop_single fi }