diff --git a/rgmanager/src/resources/lvm.sh b/rgmanager/src/resources/lvm.sh index 36603d2b6..97ddc5272 100644 --- a/rgmanager/src/resources/lvm.sh +++ b/rgmanager/src/resources/lvm.sh @@ -1,178 +1,180 @@ #!/bin/bash # # LVM Failover Script. # NOTE: Changes to /etc/lvm/lvm.conf are required for proper operation. # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # LC_ALL=C LANG=C PATH=/bin:/sbin:/usr/bin:/usr/sbin export LC_ALL LANG PATH . $(dirname $0)/ocf-shellfuncs . $(dirname $0)/utils/member_util.sh . $(dirname $0)/lvm_by_lv.sh . $(dirname $0)/lvm_by_vg.sh rv=0 ################################################################################ # ha_lvm_proper_setup_check # ################################################################################ function ha_lvm_proper_setup_check { ## # Does the Volume Group exist? # 1) User may have forgotten to create it # 2) User may have misspelled it in the config file ## if ! vgs $OCF_RESKEY_vg_name --config 'global{locking_type=0}'>& /dev/null; then ocf_log err "HA LVM: Unable to get volume group attributes for $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ## # Are we using the "tagging" or "CLVM" variant? # The CLVM variant will have the cluster attribute set ## if [[ "$(vgs -o attr --noheadings --config 'global{locking_type=0}' $OCF_RESKEY_vg_name 2>/dev/null)" =~ .....c ]]; then # Is clvmd running? if ! ps -C clvmd >& /dev/null; then ocf_log err "HA LVM: $OCF_RESKEY_vg_name has the cluster attribute set, but 'clvmd' is not running" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS fi ## # The "tagging" variant is being used if we have gotten this far. ## ## # The default for lvm.conf:activation/volume_list is empty, # this must be changed for HA LVM. ## if ! lvm dumpconfig activation/volume_list >& /dev/null; then ocf_log err "HA LVM: Improper setup detected" ocf_log err "* \"volume_list\" not specified in lvm.conf." return $OCF_ERR_GENERIC fi ## # Machine's cluster node name must be present as # a tag in lvm.conf:activation/volume_list ## if ! lvm dumpconfig activation/volume_list | grep $(local_node_name); then ocf_log err "HA LVM: Improper setup detected" ocf_log err "* @$(local_node_name) missing from \"volume_list\" in lvm.conf" return $OCF_ERR_GENERIC fi ## # The volume group to be failed over must NOT be in # lvm.conf:activation/volume_list; otherwise, machines # will be able to activate the VG regardless of the tags ## if lvm dumpconfig activation/volume_list | grep "\"$OCF_RESKEY_vg_name\""; then ocf_log err "HA LVM: Improper setup detected" ocf_log err "* $OCF_RESKEY_vg_name found in \"volume_list\" in lvm.conf" return $OCF_ERR_GENERIC fi ## # Next, we need to ensure that their initrd has been updated # If not, the machine could boot and activate the VG outside # the control of rgmanager ## # Fixme: we might be able to perform a better check... if [ "$(find /boot -name *.img -newer /etc/lvm/lvm.conf)" == "" ]; then ocf_log err "HA LVM: Improper setup detected" ocf_log err "* initrd image needs to be newer than lvm.conf" # While dangerous if not done the first time, there are many # cases where we don't simply want to fail here. Instead, # keep warning until the user remakes the initrd - or has # it done for them by upgrading the kernel. #return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ################################################################################ # MAIN ################################################################################ case $1 in start) ha_lvm_proper_setup_check || exit 1 if [ -z "$OCF_RESKEY_lv_name" ]; then vg_start || exit 1 else lv_start || exit 1 fi ;; status|monitor) ocf_log notice "Getting status" if [ -z "$OCF_RESKEY_lv_name" ]; then - vg_status || exit 1 + vg_status + exit $? else - lv_status || exit 1 + lv_status + exit $? fi ;; stop) ha_lvm_proper_setup_check if [ -z "$OCF_RESKEY_lv_name" ]; then vg_stop || exit 1 else lv_stop || exit 1 fi ;; recover|restart) $0 stop || exit $OCF_ERR_GENERIC $0 start || exit $OCF_ERR_GENERIC ;; meta-data) cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` ;; validate-all|verify-all) if [ -z "$OCF_RESKEY_lv_name" ]; then vg_verify || exit 1 else lv_verify || exit 1 fi ;; *) echo "usage: $0 {start|status|monitor|stop|restart|meta-data|validate-all}" exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $rv diff --git a/rgmanager/src/resources/lvm_by_lv.sh b/rgmanager/src/resources/lvm_by_lv.sh index 915a7e29d..d92a75a95 100644 --- a/rgmanager/src/resources/lvm_by_lv.sh +++ b/rgmanager/src/resources/lvm_by_lv.sh @@ -1,504 +1,504 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # lv_verify # # Verify the parameters passed in # lv_verify() { # Anything to verify? Perhaps the names? return $OCF_SUCCESS } restore_transient_failed_pvs() { local a=0 local -a results results=(`pvs -o name,vg_name,attr --noheadings | grep $OCF_RESKEY_vg_name | grep -v 'unknown device'`) while [ ! -z "${results[$a]}" ] ; do if [[ ${results[$(($a + 2))]} =~ ..m ]] && [ $OCF_RESKEY_vg_name == ${results[$(($a + 1))]} ]; then ocf_log notice "Attempting to restore missing PV, ${results[$a]} in $OCF_RESKEY_vg_name" vgextend --restoremissing $OCF_RESKEY_vg_name ${results[$a]} if [ $? -ne 0 ]; then ocf_log notice "Failed to restore ${results[$a]}" else ocf_log notice " ${results[$a]} restored" fi fi a=$(($a + 3)) done } # lv_exec_resilient # # Sometimes, devices can come back. Their metadata will conflict # with the good devices that remain. This function filters out those # failed devices when executing the given command # # Finishing with vgscan resets the cache/filter lv_exec_resilient() { declare command=$1 declare all_pvs ocf_log notice "Making resilient : $command" if [ -z "$command" ]; then ocf_log err "lv_exec_resilient: Arguments not supplied" return $OCF_ERR_ARGS fi # pvs will print out only those devices that are valid # If a device dies and comes back, it will not appear # in pvs output (but you will get a Warning). all_pvs=(`pvs --noheadings -o pv_name | grep -v Warning`) # Now we use those valid devices in a filter which we set up. # The device will then be activated because there are no # metadata conflicts. command=$command" --config devices{filter=[" for i in ${all_pvs[*]}; do command=$command'"a|'$i'|",' done command=$command"\"r|.*|\"]}" ocf_log notice "Resilient command: $command" if ! $command ; then ocf_log err "lv_exec_resilient failed" vgscan return $OCF_ERR_GENERIC else vgscan return $OCF_SUCCESS fi } # lv_activate_resilient # # Sometimes, devices can come back. Their metadata will conflict # with the good devices that remain. We must filter out those # failed devices when trying to reactivate lv_activate_resilient() { declare action=$1 declare lv_path=$2 declare op="-ay" if [ -z "$action" ] || [ -z "$lv_path" ]; then ocf_log err "lv_activate_resilient: Arguments not supplied" return $OCF_ERR_ARGS fi if [ $action != "start" ]; then op="-an" elif [[ "$(lvs -o attr --noheadings $lv_path)" =~ r.......p ]] || [[ "$(lvs -o attr --noheadings $lv_path)" =~ R.......p ]]; then # We can activate partial RAID LVs and run just fine. ocf_log notice "Attempting activation of partial RAID LV, $lv_path" op="-ay --partial" fi if ! lv_exec_resilient "lvchange $op $lv_path" ; then ocf_log err "lv_activate_resilient $action failed on $lv_path" return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi } lv_status_clustered() { # # Check if device is active # if [[ ! "$(lvs -o attr --noheadings $lv_path)" =~ ....a. ]]; then - return $OCF_ERR_GENERIC + return $OCF_NOT_RUNNING fi return $OCF_SUCCESS } # lv_status # # Is the LV active? lv_status_single() { declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" declare dev="/dev/$lv_path" declare realdev declare owner declare my_name # # Check if device is active # if [[ ! "$(lvs -o attr --noheadings $lv_path)" =~ ....a. ]]; then - return $OCF_ERR_GENERIC + return $OCF_NOT_RUNNING fi if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." return $OCF_SUCCESS fi # # Check if all links/device nodes are present # if [ -h "$dev" ]; then realdev=$(readlink -f $dev) if [ $? -ne 0 ]; then ocf_log err "Failed to follow link, $dev" return $OCF_ERR_ARGS fi if [ ! -b $realdev ]; then ocf_log err "Device node for $lv_path is not present" return $OCF_ERR_GENERIC fi else ocf_log err "Symbolic link for $lv_path is not present" return $OCF_ERR_GENERIC fi # # Verify that we are the correct owner # - owner=`lvs -o tags --noheadings $lv_path` + owner=`lvs -o tags --noheadings $lv_path | tr -d ' '` my_name=$(local_node_name) if [ -z "$my_name" ]; then ocf_log err "Unable to determine local machine name" # FIXME: I don't really want to fail on 1st offense return $OCF_SUCCESS fi if [ -z "$owner" ] || [ "$my_name" != "$owner" ]; then ocf_log err "WARNING: $lv_path should not be active" ocf_log err "WARNING: $my_name does not own $lv_path" ocf_log err "WARNING: Attempting shutdown of $lv_path" lv_activate_resilient "stop" $lv_path return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } function lv_status { # We pass in the VG name to see of the logical volume is clustered if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then lv_status_clustered else lv_status_single fi } # lv_activate_and_tag lv_activate_and_tag() { declare action=$1 declare tag=$2 declare lv_path=$3 typeset self_fence="" case ${OCF_RESKEY_self_fence} in "yes") self_fence=1 ;; 1) self_fence=1 ;; *) self_fence="" ;; esac if [ -z "$action" ] || [ -z "$tag" ] || [ -z "$lv_path" ]; then ocf_log err "Supplied args: 1) $action, 2) $tag, 3) $lv_path" return $OCF_ERR_ARGS fi if [ "$action" == "start" ]; then ocf_log notice "Activating $lv_path" lvchange --addtag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Unable to add tag to $lv_path" return $OCF_ERR_GENERIC fi if ! lv_activate_resilient $action $lv_path; then ocf_log err "Unable to activate $lv_path" return $OCF_ERR_GENERIC fi else ocf_log notice "Deactivating $lv_path" if ! lv_activate_resilient $action $lv_path; then if [ "$self_fence" ]; then ocf_log err "Unable to deactivate $lv_path: REBOOTING" sync reboot -fn else ocf_log err "Unable to deactivate $lv_path" fi return $OCF_ERR_GENERIC fi ocf_log notice "Removing ownership tag ($tag) from $lv_path" lvchange --deltag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Unable to delete tag from $lv_path" # Newer versions of LVM require the missing PVs to # be removed from the VG via a separate call before # the tag can be removed. ocf_log err "Attempting volume group clean-up and retry" vgreduce --removemissing --force $OCF_RESKEY_vg_name # Retry tag deletion lvchange --deltag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Failed to delete tag from $lv_path" return $OCF_ERR_GENERIC fi fi if [ "`lvs --noheadings -o lv_tags $lv_path`" == $tag ]; then ocf_log notice "Removing ownership tag ($tag) from $lv_path" lvchange --deltag $tag $lv_path if [ $? -ne 0 ]; then ocf_log err "Unable to delete tag from $lv_path" return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } # lv_activate # $1: start/stop only # # Basically, if we want to [de]activate an LVM volume, # we must own it. That means that our tag must be on it. # This requires a change to /etc/lvm/lvm.conf: # volume_list = [ "root_volume", "@my_hostname" ] # where "root_volume" is your root volume group and # "my_hostname" is $(local_node_name) # # If there is a node failure, we may wish to "steal" the # LV. For that, we need to check if the node that owns # it is still part of the cluster. We use the tag to # determine who owns the volume then query for their # liveness. If they are dead, we can steal. lv_activate() { declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" - declare owner=`lvs -o tags --noheadings $lv_path` + declare owner=`lvs -o tags --noheadings $lv_path | tr -d ' '` declare my_name=$(local_node_name) if [ -z "$my_name" ]; then ocf_log err "Unable to determine cluster node name" return $OCF_ERR_GENERIC fi # # FIXME: This code block is repeated below... might be # nice to put it in a function # if [ ! -z $owner ] && [ $owner != $my_name ]; then if is_node_member_clustat $owner ; then ocf_log err "$owner owns $lv_path unable to $1" return $OCF_ERR_GENERIC fi ocf_log notice "Owner of $lv_path is not in the cluster" ocf_log notice "Stealing $lv_path" lvchange --deltag $owner $lv_path if [ $? -ne 0 ]; then ocf_log err "Failed to steal $lv_path from $owner" return $OCF_ERR_GENERIC fi # Warning --deltag doesn't always result in failure - if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then + if [ ! -z `lvs -o tags --noheadings $lv_path | tr -d ' '` ]; then ocf_log err "Failed to steal $lv_path from $owner." return $OCF_ERR_GENERIC fi fi # If this is a partial VG, attempt to # restore any transiently failed PVs if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ ...p ]]; then ocf_log err "Volume group \"$OCF_RESKEY_vg_name\" has PVs marked as missing" restore_transient_failed_pvs fi if ! lv_activate_and_tag $1 $my_name $lv_path; then ocf_log err "Failed to $1 $lv_path" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if vgreduce --removemissing --force --config \ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ $OCF_RESKEY_vg_name; then ocf_log notice "$OCF_RESKEY_vg_name now consistent" - owner=`lvs -o tags --noheadings $lv_path` + owner=`lvs -o tags --noheadings $lv_path | tr -d ' '` if [ ! -z $owner ] && [ $owner != $my_name ]; then if is_node_member_clustat $owner ; then ocf_log err "$owner owns $lv_path unable to $1" return $OCF_ERR_GENERIC fi ocf_log notice "Owner of $lv_path is not in the cluster" ocf_log notice "Stealing $lv_path" lvchange --deltag $owner $lv_path if [ $? -ne 0 ]; then ocf_log err "Failed to steal $lv_path from $owner" return $OCF_ERR_GENERIC fi # Warning --deltag doesn't always result in failure - if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then + if [ ! -z `lvs -o tags --noheadings $lv_path | tr -d ' '` ]; then ocf_log err "Failed to steal $lv_path from $owner." return $OCF_ERR_GENERIC fi fi if ! lv_activate_and_tag $1 $my_name $lv_path; then ocf_log err "Failed second attempt to $1 $lv_path" return $OCF_ERR_GENERIC else ocf_log notice "Second attempt to $1 $lv_path successful" return $OCF_SUCCESS fi else ocf_log err "Failed to $1 $lv_path" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } function lv_start_clustered { if lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then return $OCF_SUCCESS fi # FAILED exclusive activation: # This can be caused by an LV being active remotely. # Before attempting a repair effort, we should attempt # to deactivate the LV cluster-wide; but only if the LV # is not open. Otherwise, it is senseless to attempt. if ! [[ "$(lvs -o attr --noheadings $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name)" =~ ....ao ]]; then # We'll wait a small amount of time for some settling before # attempting to deactivate. Then the deactivate will be # immediately followed by another exclusive activation attempt. sleep 5 if ! lvchange -an $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then # Someone could have the device open. # We can't do anything about that. ocf_log err "Unable to perform required deactivation of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name before starting" return $OCF_ERR_GENERIC fi if lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then # Second attempt after deactivation was successful, we now # have the lock exclusively return $OCF_SUCCESS fi fi # Failed to activate: # This could be due to a device failure (or another machine could # have snuck in between the deactivation/activation). We don't yet # have a mechanism to check for remote activation, so we will proceed # with repair action. ocf_log err "Failed to activate logical volume, $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" if ! lvconvert --repair --use-policies $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then ocf_log err "Failed to cleanup $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" return $OCF_ERR_GENERIC fi if ! lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" return $OCF_ERR_GENERIC fi ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name successful" return $OCF_SUCCESS } function lv_start_single { if ! lvs $OCF_RESKEY_vg_name >& /dev/null; then lv_count=0 else lv_count=`lvs --noheadings -o name $OCF_RESKEY_vg_name | grep -v _mlog | grep -v _mimage | grep -v nconsistent | wc -l` fi if [ $lv_count -gt 1 ]; then ocf_log err "HA LVM requires Only one logical volume per volume group." ocf_log err "There are currently $lv_count logical volumes in $OCF_RESKEY_vg_name" ocf_log err "Failing HA LVM start of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" exit $OCF_ERR_GENERIC fi if ! lv_activate start; then return 1 fi return 0 } function lv_start { # We pass in the VG name to see of the logical volume is clustered if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then lv_start_clustered else lv_start_single fi } function lv_stop_clustered { lvchange -aln $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name } function lv_stop_single { if ! lv_activate stop; then return 1 fi return 0 } function lv_stop { # We pass in the VG name to see of the logical volume is clustered if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then lv_stop_clustered else lv_stop_single fi } diff --git a/rgmanager/src/resources/lvm_by_vg.sh b/rgmanager/src/resources/lvm_by_vg.sh index f32445e91..99a8aee8f 100644 --- a/rgmanager/src/resources/lvm_by_vg.sh +++ b/rgmanager/src/resources/lvm_by_vg.sh @@ -1,478 +1,478 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # vg_owner # # Returns: # 1 == We are the owner # 2 == We can claim it # 0 == Owned by someone else function vg_owner { - local owner=`vgs -o tags --noheadings $OCF_RESKEY_vg_name` + local owner=`vgs -o tags --noheadings $OCF_RESKEY_vg_name | tr -d ' '` local my_name=$(local_node_name) if [ -z "$my_name" ]; then ocf_log err "Unable to determine cluster node name" return 0 fi if [ -z "$owner" ]; then # No-one owns this VG yet, so we can claim it return 2 fi if [ $owner != $my_name ]; then if is_node_member_clustat $owner ; then return 0 fi return 2 fi return 1 } function _strip_tags { local i for i in `vgs --noheadings -o tags $OCF_RESKEY_vg_name | sed s/","/" "/g`; do ocf_log info "Stripping tag, $i" vgchange --deltag $i $OCF_RESKEY_vg_name done - if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_vg_name` ]; then + if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_vg_name | tr -d ' '` ]; then ocf_log err "Failed to remove ownership tags from $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } function strip_tags { if ! _strip_tags; then ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if ! vgreduce --removemissing --force --config \ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ $OCF_RESKEY_vg_name; then ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" return $OCF_ERR_GENERIC fi ocf_log notice "Cleanup of $OCF_RESKEY_vg_name successful" fi if ! _strip_tags; then ocf_log err "Failed 2nd attempt to remove tags from, $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } function strip_and_add_tag { if ! strip_tags; then ocf_log err "Failed to remove tags from volume group, $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi vgchange --addtag $(local_node_name) $OCF_RESKEY_vg_name if [ $? -ne 0 ]; then ocf_log err "Failed to add ownership tag to $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ocf_log info "New tag \"$(local_node_name)\" added to $OCF_RESKEY_vg_name" return $OCF_SUCCESS } function vg_status_clustered { return $OCF_SUCCESS } # vg_status # # Are all the LVs active? function vg_status_single { local i local dev local my_name=$(local_node_name) # # Check that all LVs are active # for i in `lvs $OCF_RESKEY_vg_name --noheadings -o attr`; do if [[ ! $i =~ ....a. ]]; then - return $OCF_ERR_GENERIC + return $OCF_NOT_RUNNING fi done # # Check if all links/device nodes are present # for i in `lvs $OCF_RESKEY_vg_name --noheadings -o name`; do dev="/dev/$OCF_RESKEY_vg_name/$i" if [ -h $dev ]; then realdev=$(readlink -f $dev) if [ $? -ne 0 ]; then ocf_log err "Failed to follow link, $dev" return $OCF_ERR_GENERIC fi if [ ! -b $realdev ]; then ocf_log err "Device node for $dev is not present" return $OCF_ERR_GENERIC fi else ocf_log err "Symbolic link for $lv_path is not present" return $OCF_ERR_GENERIC fi done # # Verify that we are the correct owner # vg_owner if [ $? -ne 1 ]; then ocf_log err "WARNING: $OCF_RESKEY_vg_name should not be active" ocf_log err "WARNING: $my_name does not own $OCF_RESKEY_vg_name" ocf_log err "WARNING: Attempting shutdown of $OCF_RESKEY_vg_name" # FIXME: may need more force to shut this down vgchange -an $OCF_RESKEY_vg_name return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ## # Main status function for volume groups ## function vg_status { if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then vg_status_clustered else vg_status_single fi } function vg_verify { # Anything to verify? return $OCF_SUCCESS } function vg_start_clustered { local a local results local all_pvs local resilience local try_again=false ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" if ! vgchange -aey $OCF_RESKEY_vg_name; then try_again=true # Failure to activate: # This could be caused by a remotely active LV. Before # attempting any repair of the VG, we will first attempt # to deactivate the VG cluster-wide. # We must check for open LVs though, since these cannot # be deactivated. We have no choice but to go one-by-one. # Allow for some settling sleep 5 results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z "${results[$a]}" ]; do if [[ ! ${results[$(($a + 1))]} =~ ....ao ]]; then if ! lvchange -an $OCF_RESKEY_vg_name/${results[$a]}; then ocf_log err "Unable to perform required deactivation of $OCF_RESKEY_vg_name before starting" return $OCF_ERR_GENERIC fi fi a=$(($a + 2)) done fi if try_again && ! vgchange -aey $OCF_RESKEY_vg_name; then ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if ! vgreduce --removemissing --force $OCF_RESKEY_vg_name; then ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" return $OCF_ERR_GENERIC fi if ! vgchange -aey $OCF_RESKEY_vg_name; then ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful" return $OCF_SUCCESS else # The activation commands succeeded, but did they do anything? # Make sure all the logical volumes are active results=(`lvs -o name,attr --noheadings 2> /dev/null $OCF_RESKEY_vg_name`) a=0 while [ ! -z "${results[$a]}" ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then all_pvs=(`pvs --noheadings -o name 2> /dev/null`) resilience=" --config devices{filter=[" for i in ${all_pvs[*]}; do resilience=$resilience'"a|'$i'|",' done resilience=$resilience"\"r|.*|\"]}" vgchange -aey $OCF_RESKEY_vg_name $resilience break fi a=$(($a + 2)) done # We need to check the LVs again if we made the command resilient if [ ! -z "$resilience" ]; then results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then ocf_log err "Failed to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi a=$(($a + 2)) done ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" fi fi return $OCF_SUCCESS } function vg_start_single { local a local results local all_pvs local resilience ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" vg_owner case $? in 0) ocf_log info "Someone else owns this volume group" return $OCF_ERR_GENERIC ;; 1) ocf_log info "I own this volume group" ;; 2) ocf_log info "I can claim this volume group" ;; esac if ! strip_and_add_tag || ! vgchange -ay $OCF_RESKEY_vg_name; then ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" if ! vgreduce --removemissing --force --config \ "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ $OCF_RESKEY_vg_name; then ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" return $OCF_ERR_GENERIC fi ocf_log notice "Cleanup of $OCF_RESKEY_vg_name successful" if ! strip_and_add_tag || ! vgchange -ay $OCF_RESKEY_vg_name; then ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful" return $OCF_SUCCESS else # The activation commands succeeded, but did they do anything? # Make sure all the logical volumes are active results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then all_pvs=(`pvs --noheadings -o name 2> /dev/null`) resilience=" --config devices{filter=[" for i in ${all_pvs[*]}; do resilience=$resilience'"a|'$i'|",' done resilience=$resilience"\"r|.*|\"]}" vgchange -ay $OCF_RESKEY_vg_name $resilience break fi a=$(($a + 2)) done # We need to check the LVs again if we made the command resilient if [ ! -z "$resilience" ]; then results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then ocf_log err "Failed to activate $OCF_RESKEY_vg_name" return $OCF_ERR_GENERIC fi a=$(($a + 2)) done ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" fi fi return $OCF_SUCCESS } ## # Main start function for volume groups ## function vg_start { local a=0 local results results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) while [ ! -z ${results[$a]} ]; do if [[ ! ${results[$(($a + 1))]} =~ ^r ]] || [[ ! ${results[$(($a + 1))]} =~ ^R ]]; then ocf_log err "RAID LVs are not supported without an 'lv_name' specification" return $OCF_ERR_GENERIC fi a=$(($a + 2)) done if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then vg_start_clustered else vg_start_single fi } function vg_stop_clustered { local a local results typeset self_fence="" case ${OCF_RESKEY_self_fence} in "yes") self_fence=1 ;; 1) self_fence=1 ;; *) self_fence="" ;; esac # Shut down the volume group # Do we need to make this resilient? vgchange -aln $OCF_RESKEY_vg_name # Make sure all the logical volumes are inactive results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then if [ "$self_fence" ]; then ocf_log err "Unable to deactivate $lv_path REBOOT" sync reboot -fn else ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" fi return $OCF_ERR_GENERIC fi a=$(($a + 2)) done return $OCF_SUCCESS } function vg_stop_single { local a local results typeset self_fence="" case ${OCF_RESKEY_self_fence} in "yes") self_fence=1 ;; 1) self_fence=1 ;; *) self_fence="" ;; esac # Shut down the volume group # Do we need to make this resilient? vgchange -an $OCF_RESKEY_vg_name # Make sure all the logical volumes are inactive results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) a=0 while [ ! -z ${results[$a]} ]; do if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then if [ "$self_fence" ]; then ocf_log err "Unable to deactivate $lv_path REBOOT" sync reboot -fn else ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" fi return $OCF_ERR_GENERIC fi a=$(($a + 2)) done # Make sure we are the owner before we strip the tags vg_owner if [ $? -ne 0 ]; then strip_tags fi return $OCF_SUCCESS } ## # Main stop function for volume groups ## function vg_stop { if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then vg_stop_clustered else vg_stop_single fi } diff --git a/rgmanager/src/resources/oracledb.sh.in b/rgmanager/src/resources/oracledb.sh.in index ae9f16caf..8d3c39e04 100644 --- a/rgmanager/src/resources/oracledb.sh.in +++ b/rgmanager/src/resources/oracledb.sh.in @@ -1,913 +1,913 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # Author(s): # Hardy Merrill # Lon Hohberger # Michael Moon # # chkconfig: 345 99 01 # description: Service script for starting/stopping \ # Oracle(R) Database 10g on \ # Red Hat Enterprise Linux 5 # # NOTES: # # (1) You can comment out the LOCKFILE declaration below. This will prevent # the need for this script to access anything outside of the ORACLE_HOME # path. # # (2) You MUST customize ORACLE_USER, ORACLE_HOME, ORACLE_SID, and # ORACLE_HOSTNAME to match your installation if not running from within # rgmanager. # # (3) Do NOT place this script in shared storage; place it in ORACLE_USER's # home directory in non-clustered environments and /usr/share/cluster # in rgmanager/Red Hat cluster environments. # # Oracle is a registered trademark of Oracle Corporation. # Oracle9i is a trademark of Oracle Corporation. # Oracle10g is a trademark of Oracle Corporation. # All other trademarks are property of their respective owners. # . /etc/init.d/functions # # Source stuff from /etc/sysconfig, but this may be overridden if # this is being called as a cluster resource agent instead. #. /etc/sysconfig/oracledb declare SCRIPT="`basename $0`" declare SCRIPTDIR="`dirname $0`" [ -n "$OCF_RESKEY_user" ] && ORACLE_USER=$OCF_RESKEY_user [ -n "$OCF_RESKEY_home" ] && ORACLE_HOME=$OCF_RESKEY_home [ -n "$OCF_RESKEY_name" ] && ORACLE_SID=$OCF_RESKEY_name [ -n "$OCF_RESKEY_listener_name" ] && ORACLE_LISTENER=$OCF_RESKEY_listener_name [ -n "$OCF_RESKEY_lockfile" ] && LOCKFILE=$OCF_RESKEY_lockfile [ -n "$OCF_RESKEY_type" ] && ORACLE_TYPE=$OCF_RESKEY_type [ -n "$OCF_RESKEY_vhost" ] && ORACLE_HOSTNAME=$OCF_RESKEY_vhost ###################################################### # Customize these to match your Oracle installation. # ###################################################### # # 1. Oracle user. Must be the same across all cluster members. In the event # that this script is run by the super-user, it will automatically switch # to the Oracle user and restart. Oracle needs to run as the Oracle # user, not as root. # #[ -n "$ORACLE_USER" ] || ORACLE_USER=oracle # # 2. Oracle home. This is set up during the installation phase of Oracle. # From the perspective of the cluster, this is generally the mount point # you intend to use as the mount point for your Oracle Infrastructure # service. # #[ -n "$ORACLE_HOME" ] || ORACLE_HOME=/mnt/oracle/home # # 3. This is your SID. This is set up during oracle installation as well. # #[ -n "$ORACLE_SID" ] || ORACLE_SID=orcl # # 4. The oracle user probably doesn't have the permission to write to # /var/lock/subsys, so use the user's home directory. # #[ -n "$LOCKFILE" ] || LOCKFILE="/home/$ORACLE_USER/.oracle-ias.lock" [ -n "$LOCKFILE" ] || LOCKFILE="$ORACLE_HOME/.oracle-ias.lock" #[ -n "$LOCKFILE" ] || LOCKFILE="/var/lock/subsys/oracle-ias" # Watch privileges # # 5. Type of Oracle Database. Currently supported: 10g 10g-iAS(untested!) # [ -n "$ORACLE_TYPE" ] || ORACLE_TYPE="base-em" # # 6. Oracle virtual hostname. This is the hostname you gave Oracle during # installation. # #[ -n "$ORACLE_HOSTNAME" ] || ORACLE_HOSTNAME=svc0.foo.test.com ########################################################################### ORACLE_TYPE=`echo $ORACLE_TYPE | tr A-Z a-z` export ORACLE_USER ORACLE_HOME ORACLE_SID LOCKFILE ORACLE_TYPE export ORACLE_HOSTNAME ########################## # Set up paths we'll use. Not all are used by all the different types of # Oracle installations # export LD_LIBRARY_PATH=$ORACLE_HOME/lib:$ORACLE_HOME/opmn/lib export PATH=$ORACLE_HOME/bin:$ORACLE_HOME/opmn/bin:$ORACLE_HOME/dcm/bin:$PATH -declare -i RESTART_RETRIES=3 +declare -i RESTART_RETRIES=0 declare -r DB_PROCNAMES="pmon" #declare -r DB_PROCNAMES="pmonXX" # testing #declare -r DB_PROCNAMES="pmon smon dbw0 lgwr" declare -r LSNR_PROCNAME="tnslsnr" #declare -r LSNR_PROCNAME="tnslsnrXX" # testing ########################################################## # (Hopefully) No user-serviceable parts below this line. # ########################################################## meta_data() { cat < 1.0 Oracle 10g Failover Instance Oracle 10g Failover Instance Instance name (SID) of oracle instance Oracle SID Oracle Listener Instance Name. If you have multiple instances of Oracle running, it may be necessary to have multiple listeners on the same machine with different names. Oracle Listener Instance Name Oracle user name. This is the user name of the Oracle user which the Oracle AS instance runs as. Oracle User Name This is the Oracle (application, not user) home directory. This is configured when you install Oracle. Oracle Home Directory This is the Oracle installation type: base - Database Instance and Listener only base-em (or 10g) - Database, Listener, Enterprise Manager, and iSQL*Plus ias (or 10g-ias) - Internet Application Server (Infrastructure) Oracle Installation Type Virtual Hostname matching the installation hostname of Oracle 10g. Note that during the start/stop of an oracledb resource, your hostname will temporarily be changed to this hostname. As such, it is recommended that oracledb resources be instanced as part of an exclusive service only. Virtual Hostname EOT } # # "action"-like macro supporting functions # faction() { echo -n "$1" shift $* if [ $? -eq 0 ]; then echo_success echo return 0 fi echo_failure echo return 1 } # # Start Oracle9i (database portion) # start_db() { declare tmpfile declare logfile declare -i rv tmpfile="$(mktemp /tmp/$SCRIPT-start.XXXXXX)" logfile=@LOGDIR@/$SCRIPT-start.log # # Set up our sqlplus script. Basically, we're trying to # capture output in the hopes that it's useful in the case # that something doesn't work properly. # echo "startup" > $tmpfile echo "quit" >> $tmpfile sqlplus "/ as sysdba" < $tmpfile &> $logfile rv=$? rm -f $tmpfile # Dump logfile to /var/log/messages logger -f $logfile if [ $rv -ne 0 ]; then echo "ORACLE_HOME Incorrectly set?" echo "See $logfile for more information." return 1 fi # # If we see: # ORA-.....: failure, we failed # rm -f $tmpfile grep -q "^ORA-" $logfile if [ $? -eq 0 ]; then echo "ORACLE_SID Incorrectly set?" rm -f $tmpfile echo "See $logfile for more information." return 1 fi return 0 } # # Stop Oracle9i (database portion) # stop_db() { declare tmpfile declare logfile declare -i rv declare how_shutdown="$1" if [ -z "$1" ]; then how_shutdown="immediate" fi tmpfile="$(mktemp /tmp/$SCRIPT-stop.XXXXXX)" logfile=@LOGDIR@/$SCRIPT-stop.log # Setup for Stop ... echo "shutdown $how_shutdown" > $tmpfile echo "quit" >> $tmpfile sqlplus "/ as sysdba" < $tmpfile &> $logfile rv=$? rm -f $tmpfile # Dump logfile to /var/log/messages logger -f $logfile if [ $rv -ne 0 ]; then echo "ORACLE_HOME Incorrectly set?" echo "See $logfile for more information." return 1 fi # # If we see 'failure' in the log, we're done. # rm -f $tmpfile grep -q "^ORA-" $logfile if [ $? -eq 0 ]; then echo_failure echo echo "Possible reason: ORACLE_SID Incorrectly set." echo "See $logfile for more information." return 1 fi return 0 } # # Destroy any remaining processes with refs to $ORACLE_HOME # force_cleanup() { declare pids declare pid # Patch from Shane Bradley to fix 471266 pids=`ps ax | grep $ORACLE_HOME | grep "ora_.*_${ORACLE_SID}" | grep -v grep | awk '{print $1}'` logger -t $SCRIPT " Not all Oracle processes exited cleanly, killing" for pid in $pids; do kill -9 $pid if [ $? -eq 0 ]; then logger -t $SCRIPT "Killed $pid" fi done return 0 } # # Wait for oracle processes to exit. Time out after 60 seconds # exit_idle() { declare -i n=0 while ps ax | grep $ORACLE_HOME | grep -q -v grep; do if [ $n -ge 90 ]; then force_cleanup return 0 fi sleep 1 ((n++)) done return 0 } # # Get database background process status. Restart it if it failed and # we have seen the lock file. # get_db_status() { declare -i subsys_lock=$1 declare -i i=0 declare -i rv=0 declare ora_procname for procname in $DB_PROCNAMES ; do ora_procname="ora_${procname}_${ORACLE_SID}" status $ora_procname if [ $? -eq 0 ] ; then # This one's okay; go to the next one. continue fi # # We're not supposed to be running, and we are, # in fact, not running... # XXX only works when monitoring one db process; consider # extending in future. # if [ $subsys_lock -ne 0 ]; then return 3 fi for (( i=$RESTART_RETRIES ; i; i-- )) ; do # this db process is down - stop and # (re)start all ora_XXXX_$ORACLE_SID processes logger -t $SCRIPT "Restarting Oracle Database..." stop_db immediate if [ $? != 0 ] ; then # stop failed - return 1 return 1 fi start_db if [ $? == 0 ] ; then # ora_XXXX_$ORACLE_SID processes started # successfully, so break out of the # stop/start # 'for' loop break fi done if [ $i -eq 0 ]; then # stop/start's failed - return 1 (failure) return 1 fi done return 0 } # # Get the status of the Oracle listener process # get_lsnr_status() { declare -i subsys_lock=$1 declare -i rv status $LSNR_PROCNAME rv=$? if [ $rv == 0 ] ; then return 0 # Listener is running fine fi # # We're not supposed to be running, and we are, # in fact, not running. Return 3 # if [ $subsys_lock -ne 0 ]; then return 3 fi # # Listener is NOT running (but should be) - try to restart # for (( i=$RESTART_RETRIES ; i; i-- )) ; do action "Restarting Oracle listener:" lsnrctl start \ $ORACLE_LISTENER lsnrctl status $ORACLE_LISTENER >& /dev/null if [ $? == 0 ] ; then break # Listener was (re)started and is running fine fi done if [ $i -eq 0 ]; then # stop/start's failed - return 1 (failure) return 1 fi status $LSNR_PROCNAME if [ $? != 0 ] ; then return 1 # Problem restarting the Listener fi return 0 # Success restarting the Listener } # # usage: get_opmn_proc_status [process-type] # # Get the status of a specific OPMN-managed process. If process-type # is not specified, assume the process-type is the same as the ias-component. # If the lock-file exists (or no lock file is specified), try to restart # the given process-type if it is not running. # get_opmn_proc_status() { declare comp=$1 declare opmntype=$2 declare type_pretty declare _pid _status [ -n "$comp" ] || return 1 if [ -z "$opmntype" ]; then opmntype=$comp else type_pretty=" [$opmntype]" fi for (( i=$RESTART_RETRIES ; i; i-- )) ; do _status=`opmnctl status | grep "^$comp " | grep " $opmntype " | cut -d '|' -f3,4 | sed -e 's/ //g' -e 's/|/ /g'` _pid=`echo $_status | cut -f1 -d' '` _status=`echo $_status | cut -f2 -d' '` if [ "${_status}" == "Alive" ] || [ "${_status}" == "Init" ]; then if [ $i -lt $RESTART_RETRIES ] ; then echo " $comp$type_pretty restarted" fi echo " $comp$type_pretty (pid $_pid) is running..." break else echo " $comp$type_pretty is stopped" # # Try to restart it, but don't worry if we fail. OPMN # is supposed to handle restarting these anyway. # # If it's running and you tell OPMN to "start" it, # you will get an error. # # If it's NOT running and you tell OPMN to "restart" # it, you will also get an error. # opmnctl startproc process-type=$opmntype &> /dev/null fi done if [ $i -eq 0 ]; then # restarts failed - return 1 (failure) return 1 fi return 0 } # # Get the status of the OPMN-managed processes. # get_opmn_status() { declare -i subsys_lock=$1 declare -i ct_errors=0 opmnctl status &> /dev/null if [ $? -eq 2 ]; then # # OPMN not running?? # echo "opmn is stopped" if [ $subsys_lock -eq 0 ]; then # # Don't handle full opmn-restart. XXX # return 1 fi # That's okay, it's not supposed to be! return 3 fi # # Print out the PIDs for everyone. # echo "opmn is running..." echo "opmn components:" # # Check the OPMN-managed processes # get_opmn_proc_status OID || ((ct_errors++)) get_opmn_proc_status HTTP_Server || ((ct_errors++)) get_opmn_proc_status OC4J OC4J_SECURITY || ((ct_errors++)) # # One or more OPMN-managed processes failed and could not be # restarted. # if [ $ct_errors -ne 0 ]; then return 1 fi return 0 } # # Helps us keep a running status so we know what our ultimate return # code will be. Returns 1 if the $1 and $2 are not equivalent, otherwise # returns $1. The return code is meant to be the next $1 when this is # called, so, for example: # # update_status 0 <-- returns 0 # update_status $? 0 <-- returns 0 # update_status $? 3 <-- returns 1 (values different - error condition) # update_status $? 1 <-- returns 1 (same, but happen to be error state!) # # update_status 3 # update_status $? 3 <-- returns 3 # # (and so forth...) # update_status() { declare -i old_status=$1 declare -i new_status=$2 if [ -z "$2" ]; then return $old_status fi if [ $old_status -ne $new_status ]; then return 1 fi return $old_status } # # Print an error message to the user and exit. # oops() { echo "Please configure this script ($0) to" echo "match your installation." echo echo " $1 failed validation checks." exit 1 } # # Do some validation on the user-configurable stuff at the beginning of the # script. # validation_checks() { # # If the oracle user doesn't exist, we're done. # [ -n "$ORACLE_USER" ] || oops "ORACLE_USER" id -u $ORACLE_USER > /dev/null || oops "ORACLE_USER" id -g $ORACLE_USER > /dev/null || oops "ORACLE_USER" # # If the oracle home isn't a directory, we're done # [ -n "$ORACLE_HOME" ] || oops ORACLE_HOME #[ -d "$ORACLE_HOME" ] || oops ORACLE_HOME # # If the oracle SID is NULL, we're done # [ -n "$ORACLE_SID" ] || oops ORACLE_SID # # If we don't know the type, we're done # if [ "$ORACLE_TYPE" = "base" ]; then # Other names for base ORACLE_TYPE="base" elif [ "$ORACLE_TYPE" = "10g" ] || [ "$ORACLE_TYPE" = "base-em" ]; then ORACLE_TYPE="base-em" elif [ "$ORACLE_TYPE" = "10g-ias" ] || [ "$ORACLE_TYPE" = "ias" ]; then ORACLE_TYPE="ias" else oops ORACLE_TYPE fi # # If the hostname is zero-length, fix it # [ -n "$ORACLE_HOSTNAME" ] || ORACLE_HOSTNAME=`hostname` # # Super user? Automatically change UID and exec as oracle user. # Oracle needs to be run as the Oracle user, not root! # if [ "`id -u`" = "0" ]; then echo "Restarting $0 as $ORACLE_USER." # # Breaks on RHEL5 # exec sudo -u $ORACLE_USER $0 $* # su $ORACLE_USER -c "$0 $*" exit $? fi # # If we're not root and not the Oracle user, we're done. # [ "`id -u`" = "`id -u $ORACLE_USER`" ] || exit 1 [ "`id -g`" = "`id -g $ORACLE_USER`" ] || exit 1 # # Go home. # cd $ORACLE_HOME return 0 } # # Start Oracle9i Application Server Infrastructure # start_oracle() { faction "Starting Oracle Database:" start_db || return 1 action "Starting Oracle Listener:" lsnrctl start $ORACLE_LISTENER || return 1 if [ "$ORACLE_TYPE" = "base-em" ]; then action "Starting iSQL*Plus:" isqlplusctl start || return 1 action "Starting Oracle EM DB Console:" emctl start dbconsole || return 1 elif [ "$ORACLE_TYPE" = "ias" ]; then action "Starting Oracle EM:" emctl start em || return 1 action "Starting iAS Infrastructure:" opmnctl startall || return 1 fi if [ -n "$LOCKFILE" ]; then touch $LOCKFILE fi return 0 } # # Stop Oracle9i Application Server Infrastructure # stop_oracle() { if ! [ -e "$ORACLE_HOME/bin/lsnrctl" ]; then echo "Oracle Listener Control is not available" echo " ($ORACLE_HOME not mounted?)" return 0 fi if [ "$ORACLE_TYPE" = "base-em" ]; then action "Stopping Oracle EM DB Console:" emctl stop dbconsole || return 1 action "Stopping iSQL*Plus:" isqlplusctl stop || return 1 elif [ "$ORACLE_TYPE" = "ias" ]; then action "Stopping iAS Infrastructure:" opmnctl stopall || return 1 action "Stopping Oracle EM:" emctl stop em || return 1 fi faction "Stopping Oracle Database:" stop_db immediate if [ $? -ne 0 ]; then faction "Stopping Oracle Database (hard):" stop_db abort || return 1 fi action "Stopping Oracle Listener:" lsnrctl stop $ORACLE_LISTENER faction "Waiting for all Oracle processes to exit:" exit_idle if [ $? -ne 0 ]; then echo "WARNING: Not all Oracle processes exited cleanly" fi if [ -n "$LOCKFILE" ]; then rm -f $LOCKFILE fi return 0 } # # Find and display the status of iAS infrastructure. # # This has three parts: # (1) Oracle database itself # (2) Oracle listener process # (3) OPMN and OPMN-managed processes # # - If all are (cleanly) down, we return 3. In order for this to happen, # $LOCKFILE must not exist. In this case, we try and restart certain parts # of the service - as this may be running in a clustered environment. # # - If some but not all are running (and, if $LOCKFILE exists, we could not # restart the failed portions), we return 1 (ERROR) # # - If all are running, return 0. In the "all-running" case, we recreate # $LOCKFILE if it does not exist. # status_oracle() { declare -i subsys_lock=1 declare -i last # # Check for lock file. Crude and rudimentary, but it works # if [ -z "$LOCKFILE" ] || [ -f $LOCKFILE ]; then subsys_lock=0 fi # Check database status get_db_status $subsys_lock update_status $? # Start last=$? # Check & report listener status get_lsnr_status $subsys_lock update_status $? $last last=$? if [ "$ORACLE_TYPE" = "base-em" ]; then # XXX Add isqlplus status check?! emctl status dbconsole 2>&1 | grep "is running" update_status $? $last last=$? elif [ "$ORACLE_TYPE" = "ias" ]; then # Check & report opmn / opmn-managed process status get_opmn_status $subsys_lock update_status $? $last last=$? fi # # No lock file, but everything's running. Put the lock # file back. XXX - this kosher? # if [ $last -eq 0 ] && [ $subsys_lock -ne 0 ]; then touch $LOCKFILE fi return $last } ######################## # Do some real work... # ######################## if [ "$1" = "meta-data" ]; then meta_data exit 0 fi validation_checks $* case $1 in start) start_oracle exit $? ;; stop) stop_oracle exit $? ;; status|monitor) status_oracle exit $? ;; restart) $0 stop || exit $? $0 start || exit $? exit 0 ;; *) echo "usage: $SCRIPT {start|stop|status|restart|meta-data}" exit 1 ;; esac exit 0 diff --git a/rgmanager/src/resources/tomcat-6.sh b/rgmanager/src/resources/tomcat-6.sh index c9513b267..bc9059df2 100644 --- a/rgmanager/src/resources/tomcat-6.sh +++ b/rgmanager/src/resources/tomcat-6.sh @@ -1,240 +1,248 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # export LC_ALL=C export LANG=C export PATH=/bin:/sbin:/usr/bin:/usr/sbin . $(dirname $0)/ocf-shellfuncs . $(dirname $0)/utils/config-utils.sh . $(dirname $0)/utils/messages.sh . $(dirname $0)/utils/ra-skelet.sh declare TOMCAT_pid_file="`generate_name_for_pid_file`" declare TOMCAT_conf_dir="`generate_name_for_conf_dir`/conf" declare TOMCAT_gen_config_file="$TOMCAT_conf_dir/server.xml" declare TOMCAT_gen_catalina_base="`generate_name_for_conf_dir`" declare CATALINA_HOME declare CATALINA_BASE declare CATALINA_TMPDIR declare CLASSPATH declare TOMCAT_USER ## verify_all() { clog_service_verify $CLOG_INIT if [ -z "$OCF_RESKEY_name" ]; then clog_service_verify $CLOG_FAILED "Invalid Name Of Service" return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_service_name" ]; then clog_service_verify $CLOG_FAILED_NOT_CHILD return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_config_file" ]; then clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" clog_service_verify $CLOG_FAILED return $OCF_ERR_ARGS fi if [ ! -r "$OCF_RESKEY_config_file" ]; then clog_check_file_exist $CLOG_FAILED_NOT_READABLE $OCF_RESKEY_config_file clog_service_verify $CLOG_FAILED return $OCF_ERR_ARGS fi . "$OCF_RESKEY_config_file" if [ $? -ne 0 ]; then clog_service_verify $CLOG_FAILED "Error In The File \"$OCF_RESKEY_config_file\"" return $OCF_ERR_ARGS fi if [ -z "$CATALINA_HOME" ]; then clog_service_verify $CLOG_FAILED "CATALINA_HOME Not Specified In ${OCF_RESKEY_config_file}" return $OCF_ERR_ARGS; fi if [ ! -d "$CATALINA_HOME" ]; then clog_service_verify $CLOG_FAILED "CATALINA_HOME Does Not Exist" return $OCF_ERR_ARGS; fi if [ -z "$CATALINA_TMPDIR" ]; then clog_service_verify $CLOG_FAILED "CATALINA_TMPDIR Not Specified In ${OCF_RESKEY_config_file}" return $OCF_ERR_ARGS; fi if [ ! -d "$CATALINA_TMPDIR" ]; then clog_service_verify $CLOG_FAILED "CATALINA_TMPDIR Does Not Exist" return $OCF_ERR_ARGS; fi if [ -z "$TOMCAT_USER" ]; then clog_service_verify $CLOG_FAILED "TOMCAT_USER Does Not Exist" return $OCF_ERR_ARGS; fi clog_service_verify $CLOG_SUCCEED return 0 } generate_config_file() { declare original_file="$1" declare generated_file="$2" declare ip_addresses="$3" if [ -f "$generated_file" ]; then sha1_verify "$generated_file" if [ $? -ne 0 ]; then clog_check_sha1 $CLOG_FAILED return 0 fi fi clog_generate_config $CLOG_INIT "$original_file" "$generated_file" $(dirname $0)/utils/tomcat-parse-config.pl $ip_addresses < "$original_file" > "$generated_file" sha1_addToFile "$generated_file" clog_generate_config $CLOG_SUCCEED "$original_file" "$generated_file" return 0; } start() { clog_service_start $CLOG_INIT create_pid_directory create_conf_directory "$TOMCAT_conf_dir" check_pid_file "$TOMCAT_pid_file" if [ $? -ne 0 ]; then clog_check_pid $CLOG_FAILED "$TOMCAT_pid_file" clog_service_start $CLOG_FAILED return $OCF_ERR_GENERIC fi clog_looking_for $CLOG_INIT "IP Addresses" get_service_ip_keys "$OCF_RESKEY_service_name" ip_addresses=`build_ip_list` if [ -z "$ip_addresses" ]; then clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" return $OCF_ERR_GENERIC fi clog_looking_for $CLOG_SUCCEED "IP Addresses" . "$OCF_RESKEY_config_file" generate_config_file "$CATALINA_BASE/conf/server.xml" "$TOMCAT_gen_config_file" "$ip_addresses" rm -f "$TOMCAT_gen_catalina_base/conf/tomcat6.conf" ( cat $OCF_RESKEY_config_file | grep -v 'CATALINA_PID=' | grep -v 'CATALINA_BASE='; echo CATALINA_BASE="$TOMCAT_gen_catalina_base"; echo CATALINA_PID="$TOMCAT_pid_file") > "$TOMCAT_gen_catalina_base/conf/tomcat6.conf" ln -s "$CATALINA_BASE"/* "$TOMCAT_gen_catalina_base" &> /dev/null ln -s "$CATALINA_BASE"/conf/* "$TOMCAT_gen_catalina_base"/conf &> /dev/null export TOMCAT_CFG="$TOMCAT_gen_catalina_base/conf/tomcat6.conf" + + tomcat6_options="$tomcat6_options $( + awk '!/^#/ && !/^$/ { ORS=" "; print "export ", $0, ";" }' \ + $TOMCAT_CFG + )" + + eval "$tomcat6_options" + /usr/sbin/tomcat6 start if [ $? -ne 0 ]; then clog_service_start $CLOG_FAILED return $OCF_ERR_GENERIC fi clog_service_start $CLOG_SUCCEED return 0; } stop() { clog_service_stop $CLOG_INIT stop_generic "$TOMCAT_pid_file" "$OCF_RESKEY_shutdown_wait" if [ $? -ne 0 ]; then clog_service_stop $CLOG_FAILED return $OCF_ERR_GENERIC fi if [ -e "$TOMCAT_pid_file" ]; then rm -f "$TOMCAT_pid_file" fi clog_service_stop $CLOG_SUCCEED return 0; } status() { clog_service_status $CLOG_INIT status_check_pid "$TOMCAT_pid_file" if [ $? -ne 0 ]; then clog_service_status $CLOG_FAILED "$TOMCAT_pid_file" return $OCF_ERR_GENERIC fi clog_service_status $CLOG_SUCCEED return 0 } case $1 in meta-data) cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` exit 0 ;; validate-all) verify_all exit $? ;; start) verify_all && start exit $? ;; stop) verify_all && stop exit $? ;; status|monitor) verify_all status exit $? ;; restart) verify_all stop start exit $? ;; *) echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/rgmanager/src/resources/utils/config-utils.sh b/rgmanager/src/resources/utils/config-utils.sh index 55e60a645..64b184783 100644 --- a/rgmanager/src/resources/utils/config-utils.sh +++ b/rgmanager/src/resources/utils/config-utils.sh @@ -1,283 +1,283 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # declare RA_COMMON_pid_dir=/var/run/cluster declare RA_COMMON_conf_dir=/etc/cluster declare -i FAIL=255 declare -a ip_keys generate_configTemplate() { cat > "$1" << EOT # # "$1" was created from the "$2" # # This template configuration was automatically generated, and will be # automatically regenerated if removed. Once this file has been altered, # automatic re-generation will stop. Remember to copy this file to all # other cluster members after making changes, or your service will not # operate correctly. # EOT } generate_configTemplateXML() { cat > "$1" << EOT EOT } sha1_addToFile() { declare sha1line="# rgmanager-sha1 $(sha1sum "$1")" echo $sha1line >> "$1" } sha1_addToFileXML() { declare sha1line="" echo $sha1line >> "$1" } sha1_verify() { declare sha1_new sha1_old declare oldFile=$1 ocf_log debug "Checking: SHA1 checksum of config file $oldFile" sha1_new=`cat "$oldFile" | grep -v "# rgmanager-sha1" | sha1sum | sed 's/^\([a-z0-9]\+\) .*$/\1/'` sha1_old=`tail -n 1 "$oldFile" | sed 's/^\(