diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 index a3ac5a682..1b39e7133 100755 --- a/heartbeat/Raid1 +++ b/heartbeat/Raid1 @@ -1,480 +1,468 @@ #!/bin/sh # # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # Raid1 # Description: Manages a software Raid1 device on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # RAID patches: http://people.redhat.com/mingo/raid-patches/ # Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 # Sympathetic Ear: mailto:linux-raid@vger.kernel.org # # usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} # # # EXAMPLE config file /etc/raidtab.md0 # This file must exist on both machines! # # raiddev /dev/md0 # raid-level 1 # nr-raid-disks 2 # chunk-size 64k # persistent-superblock 1 # #nr-spare-disks 0 # device /dev/sda1 # raid-disk 0 # device /dev/sdb1 # raid-disk 1 # # EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) # # DEVICE /dev/sdb1 /dev/sdc1 # ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } meta_data() { cat < 1.0 Resource script for RAID1. It manages a software Raid1 device on a shared storage medium. Manages a software RAID1 device on shared storage The RAID configuration file. e.g. /etc/raidtab or /etc/mdadm.conf. RAID config file The block device to use. Alternatively, set to "auto" to manage all devices specified in raidconf. block device The value for the homehost directive; this is an mdadm feature to protect RAIDs against being activated by accident. It is recommended to create RAIDs managed by the cluster with "homehost" set to a special value, so they are not accidentially auto-assembled by nodes not supposed to own them. Homehost for mdadm If processes or kernel threads are using the array, it cannot be stopped. We will try to stop processes, first by sending TERM and then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL. The lsof(8) program is required to get the list of array users. Of course, the kernel threads cannot be stopped this way. If the processes are critical for data integrity, then set this parameter to false. Note that in that case the stop operation will fail and the node will be fenced. force stop processes using the array END } list_conf_arrays() { test -f $RAIDCONF || { ocf_log err "$RAIDCONF gone missing!" exit $OCF_ERR_GENERIC } grep ^ARRAY $RAIDCONF | awk '{print $2}' } forall() { local func=$1 local checkall=$2 local mddev rc=0 for mddev in `list_conf_arrays`; do $func $mddev rc=$(($rc | $?)) [ "$checkall" = all ] && continue [ $rc -ne 0 ] && return $rc done return $rc } do_func() { local func=$1 if [ "$MDDEV" = auto ]; then - forall $func all + forall $func $2 else $func $MDDEV fi } # # START: Start up the RAID device # raid1_start() { local rc raid1_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then # md already online, nothing to do. return $OCF_SUCCESS fi if [ $rc -ne $OCF_NOT_RUNNING ]; then # If the array is in a broken state, this agent doesn't # know how to repair that. ocf_log err "$MDDEV in a broken state; cannot start (rc=$rc)" return $OCF_ERR_GENERIC fi # Insert raid personality module $MODPROBE raid1 if [ $? -ne 0 ] ; then # It is not fatal, chance is that we have raid1 builtin... ocf_log warn "Couldn't insert RAID1 module" fi grep -q "^Personalities.*\[raid1\]" /proc/mdstat 2>/dev/null if [ $? -ne 0 ] ; then ocf_log err "We don't have RAID1 support! Exiting" return $OCF_ERR_GENERIC fi if [ $HAVE_RAIDTOOLS = "true" ]; then # Run raidstart to start up the RAID array $RAIDSTART --configfile $RAIDCONF $MDDEV else # Run mdadm if [ "$MDDEV" = auto ]; then $MDADM --assemble --scan --config=$RAIDCONF $MDADM_HOMEHOST else $MDADM --assemble $MDDEV --config=$RAIDCONF $MDADM_HOMEHOST fi fi raid1_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS else ocf_log err "Couldn't start RAID for $MDDEV" return $OCF_ERR_GENERIC fi } # # STOP: stop the RAID device # mark_readonly() { local mddev=$1 local rc ocf_log info "Attempting to mark array $mddev readonly" $MDADM --readonly $mddev --config=$RAIDCONF rc=$? if [ $rc -ne 0 ]; then ocf_log err "Failed to set $mddev readonly (rc=$rc)" fi return $rc } raid1_stop_one() { ocf_log info "Stopping array $1" $MDADM --stop $1 --config=$RAIDCONF --wait-clean -W } get_users_pids() { local mddev=$1 local outp l ocf_log debug "running lsof to list $mddev users..." outp=`lsof $mddev | tail -n +2` echo "$outp" | awk '{print $2}' | sort -u echo "$outp" | while read l; do ocf_log warn "$l" done } stop_raid_users() { local pids - pids=`do_func get_users_pids | sort -u` + pids=`do_func get_users_pids all | sort -u` if [ -z "$pids" ]; then ocf_log warn "lsof reported no users holding arrays" return 2 else ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids fi } stop_arrays() { if [ $HAVE_RAIDTOOLS = "true" ]; then $RAIDSTOP --configfile $RAIDCONF $MDDEV else - if [ "$MDDEV" = auto ]; then - forall raid1_stop_one all - else - raid1_stop_one $MDDEV - fi + do_func raid1_stop_one all fi } raid1_stop() { local rc # See if the MD device is already cleanly stopped: if [ "$MDDEV" != auto ]; then raid1_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi fi # Turn off raid if ! stop_arrays; then if ocf_is_true $FORCESTOP; then if have_binary lsof; then stop_raid_users case $? in 2) false;; *) stop_arrays;; esac else ocf_log warn "install lsof(8) to list users holding the disk" false fi else false fi fi rc=$? if [ $rc -ne 0 ]; then ocf_log warn "Couldn't stop RAID for $MDDEV (rc=$rc)" if [ $HAVE_RAIDTOOLS != "true" ]; then - if [ "$MDDEV" = auto ]; then - forall mark_readonly all - else - mark_readonly $MDDEV - fi + do_func mark_readonly all fi return $OCF_ERR_GENERIC fi if [ "$MDDEV" = auto ]; then local mddev for mddev in `list_conf_arrays`; do raid1_monitor_one $mddev rc=$? [ $rc -ne $OCF_NOT_RUNNING ] && break done else raid1_monitor_one $MDDEV rc=$? fi if [ $rc -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi ocf_log err "RAID $MDDEV still active after stop command!" return $OCF_ERR_GENERIC } # # monitor: a less noisy status # raid1_monitor_one() { local mddev=$1 local md=`echo $mddev | sed 's,/dev/,,'` local rc local TRY_READD=0 # check if the md device exists first if [ ! -b $mddev ]; then ocf_log info "$mddev is not a block device" return $OCF_NOT_RUNNING fi if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then ocf_log info "$md not found in /proc/mdstat" return $OCF_NOT_RUNNING fi if [ $HAVE_RAIDTOOLS != "true" ]; then $MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$? case $rc in 0) ;; 1) ocf_log warn "$mddev has at least one failed device." TRY_READD=1 ;; 2) ocf_log err "$mddev has failed." return $OCF_ERR_GENERIC ;; 4) ocf_log err "mdadm failed on $mddev." return $OCF_ERR_GENERIC ;; *) ocf_log err "mdadm returned an unknown result ($rc)." return $OCF_ERR_GENERIC ;; esac fi if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \ -a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then ocf_log info "Attempting recovery sequence to re-add devices on $mddev:" $MDADM $mddev --fail detached $MDADM $mddev --remove failed $MDADM $mddev --re-add missing # TODO: At this stage, there's nothing to actually do # here. Either this worked or it did not. fi if ! dd if=$mddev count=1 bs=512 of=/dev/null \ iflag=direct >/dev/null 2>&1 ; then ocf_log err "$mddev: I/O error on read" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } raid1_monitor() { - if [ "$MDDEV" = auto ]; then - forall raid1_monitor_one - else - raid1_monitor_one $MDDEV - fi + do_func raid1_monitor_one } # # STATUS: is the raid device online or offline? # raid1_status() { # See if the MD device is online local rc raid1_monitor rc=$? if [ $rc -ne $OCF_SUCCESS ]; then echo "stopped" else echo "running" fi return $rc } raid1_validate_all() { return $OCF_SUCCESS } PROC_CLEANUP_TIME=3 if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac RAIDCONF="$OCF_RESKEY_raidconf" MDDEV="$OCF_RESKEY_raiddev" FORCESTOP="${OCF_RESKEY_force_stop:-1}" if [ -z "$RAIDCONF" ] ; then ocf_log err "Please set OCF_RESKEY_raidconf!" exit $OCF_ERR_CONFIGURED fi if [ ! -r "$RAIDCONF" ] ; then ocf_log err "Configuration file [$RAIDCONF] does not exist, or can not be opend!" exit $OCF_ERR_INSTALLED fi if [ -z "$MDDEV" ] ; then ocf_log err "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" exit $OCF_ERR_CONFIGURED fi if ocf_is_true $FORCESTOP && ! have_binary lsof; then ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..." fi HAVE_RAIDTOOLS=false if have_binary $MDADM >/dev/null 2>&1 ; then if [ -n "$OCF_RESKEY_homehost" ]; then MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" else MDADM_HOMEHOST="" fi else check_binary $RAIDSTART HAVE_RAIDTOOLS=true fi if [ "$MDDEV" = "auto" -a $HAVE_RAIDTOOLS = true ]; then ocf_log err "autoconf supported only with mdadm!" exit $OCF_ERR_INSTALLED fi # At this stage, # [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, # otherwise we have raidtools (raidstart and raidstop) # Look for how we are called case "$1" in start) raid1_start ;; stop) raid1_stop ;; status) raid1_status ;; monitor) raid1_monitor ;; validate-all) raid1_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $?