diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 index 56dd2154f..c4ae50e18 100755 --- a/heartbeat/Raid1 +++ b/heartbeat/Raid1 @@ -1,355 +1,418 @@ #!/bin/sh # # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # Raid1 # Description: Manages a software Raid1 device on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # RAID patches: http://people.redhat.com/mingo/raid-patches/ # Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 # Sympathetic Ear: mailto:linux-raid@vger.kernel.org # # usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} # # # EXAMPLE config file /etc/raidtab.md0 # This file must exist on both machines! # # raiddev /dev/md0 # raid-level 1 # nr-raid-disks 2 # chunk-size 64k # persistent-superblock 1 # #nr-spare-disks 0 # device /dev/sda1 # raid-disk 0 # device /dev/sdb1 # raid-disk 1 # # EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) # # DEVICE /dev/sdb1 /dev/sdc1 # ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } meta_data() { cat < 1.0 Resource script for RAID1. It manages a software Raid1 device on a shared storage medium. Manages a software RAID1 device on shared storage The RAID configuration file. e.g. /etc/raidtab or /etc/mdadm.conf. RAID config file -The block device to use. +The block device to use. Alternatively, set to "auto" to manage +all devices specified in raidconf. block device The value for the homehost directive; this is an mdadm feature to protect RAIDs against being activated by accident. It is recommended to create RAIDs managed by the cluster with "homehost" set to a special value, so they are not accidentially auto-assembled by nodes not supposed to own them. Homehost for mdadm END } +list_conf_arrays() { + test -f $RAIDCONF || { + ocf_log err "$RAIDCONF gone missing!" + exit $OCF_ERR_GENERIC + } + grep ^ARRAY $RAIDCONF | awk '{print $2}' +} +forall() { + local func=$1 + local checkall=$2 + local mddev rc=0 + for mddev in `list_conf_arrays`; do + $func $mddev + rc=$(($rc | $?)) + [ "$checkall" = all ] && continue + [ $rc -ne 0 ] && return $rc + done + return $rc +} + # # START: Start up the RAID device # raid1_start() { raid1_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then # md already online, nothing to do. return $OCF_SUCCESS fi if [ $rc -ne $OCF_NOT_RUNNING ]; then # If the array is in a broken state, this agent doesn't # know how to repair that. ocf_log err "$MDDEV in a broken state; cannot start (rc=$rc)" return $OCF_ERR_GENERIC fi # Insert SCSI module $MODPROBE scsi_hostadapter if [ $? -ne 0 ] ; then ocf_log warn "Couldn't insert SCSI module." fi # Insert raid personality module $MODPROBE raid1 if [ $? -ne 0 ] ; then # It is not fatal, chance is that we have raid1 builtin... ocf_log warn "Couldn't insert RAID1 module" fi grep -q "^Personalities.*\[raid1\]" /proc/mdstat 2>/dev/null if [ $? -ne 0 ] ; then ocf_log err "We don't have RAID1 support! Exiting" return $OCF_ERR_GENERIC fi if [ $HAVE_RAIDTOOLS = "true" ]; then # Run raidstart to start up the RAID array $RAIDSTART --configfile $RAIDCONF $MDDEV else # Run mdadm - $MDADM --assemble $MDDEV --config=$RAIDCONF $MDADM_HOMEHOST + if [ "$MDDEV" = auto ]; then + $MDADM --assemble --scan --config=$RAIDCONF $MDADM_HOMEHOST + else + $MDADM --assemble $MDDEV --config=$RAIDCONF $MDADM_HOMEHOST + fi fi raid1_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS else ocf_log err "Couldn't start RAID for $MDDEV" return $OCF_ERR_GENERIC fi } # # STOP: stop the RAID device # +mark_readonly() { + local mddev=$1 + local rc + ocf_log info "Attempting to mark array $mddev readonly" + $MDADM --readonly $mddev --config=$RAIDCONF + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "Failed to set $mddev readonly (rc=$rc)" + fi + return $rc +} +raid1_stop_one() { + ocf_log info "Stopping array $1" + $MDADM --stop $1 --config=$RAIDCONF --wait-clean -W +} raid1_stop() { + local rc # See if the MD device is already cleanly stopped: - raid1_monitor - if [ $? -eq $OCF_NOT_RUNNING ]; then - return $OCF_SUCCESS + if [ "$MDDEV" != auto ]; then + raid1_monitor + if [ $? -eq $OCF_NOT_RUNNING ]; then + return $OCF_SUCCESS + fi fi # Turn off raid if [ $HAVE_RAIDTOOLS = "true" ]; then $RAIDSTOP --configfile $RAIDCONF $MDDEV else - ocf_log info "Stopping array $MDDEV" - $MDADM --stop $MDDEV --config=$RAIDCONF --wait-clean -W + if [ "$MDDEV" = auto ]; then + forall raid1_stop_one all + else + raid1_stop_one $MDDEV + fi fi rc=$? if [ $rc -ne 0 ]; then ocf_log err "Couldn't stop RAID for $MDDEV (rc=$rc)" if [ $HAVE_RAIDTOOLS != "true" ]; then - ocf_log info "Attempting to mark array $MDDEV readonly" - $MDADM --readonly $MDDEV --config=$RAIDCONF - rc=$? - if [ $rc -ne 0 ]; then - ocf_log err "Failed to set $MDDEV readonly (rc=$rc)" + if [ "$MDDEV" = auto ]; then + forall mark_readonly all + else + mark_readonly $MDDEV fi fi return $OCF_ERR_GENERIC fi - raid1_monitor - if [ $? -eq $OCF_NOT_RUNNING ]; then + if [ "$MDDEV" = auto ]; then + local mddev + for mddev in `list_conf_arrays`; do + raid1_monitor_one $mddev + rc=$? + [ $rc -ne $OCF_NOT_RUNNING ] && break + done + else + raid1_monitor_one $MDDEV + rc=$? + fi + if [ $rc -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi ocf_log err "RAID $MDDEV still active after stop command!" return $OCF_ERR_GENERIC } # # monitor: a less noisy status # -raid1_monitor() { +raid1_monitor_one() { + local mddev=$1 + local md=`echo $mddev | sed 's,/dev/,,'` + local rc TRY_READD=0 # check if the md device exists first - if [ ! -b $MDDEV ]; then - ocf_log info "$MDDEV is not a block device" + if [ ! -b $mddev ]; then + ocf_log info "$mddev is not a block device" return $OCF_NOT_RUNNING fi - if ! grep -e "^$MD[ \t:]" /proc/mdstat >/dev/null ; then - ocf_log info "$MD not found in /proc/mdstat" + if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then + ocf_log info "$md not found in /proc/mdstat" return $OCF_NOT_RUNNING fi if [ $HAVE_RAIDTOOLS != "true" ]; then - $MDADM --detail --test $MDDEV >/dev/null 2>&1 ; rc=$? + $MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$? case $rc in 0) ;; - 1) ocf_log warn "$MDDEV has at least one failed device." + 1) ocf_log warn "$mddev has at least one failed device." TRY_READD=1 ;; - 2) ocf_log err "$MDDEV has failed." + 2) ocf_log err "$mddev has failed." return $OCF_ERR_GENERIC ;; - 4) ocf_log err "mdadm failed on $MDDEV." + 4) ocf_log err "mdadm failed on $mddev." return $OCF_ERR_GENERIC ;; *) ocf_log err "mdadm returned an unknown result ($rc)." return $OCF_ERR_GENERIC ;; esac fi if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \ -a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then - ocf_log info "Attempting recovery sequence to re-add devices on $MDDEV:" - $MDADM $MDDEV --fail detached - $MDADM $MDDEV --remove failed - $MDADM $MDDEV --re-add missing + ocf_log info "Attempting recovery sequence to re-add devices on $mddev:" + $MDADM $mddev --fail detached + $MDADM $mddev --remove failed + $MDADM $mddev --re-add missing # TODO: At this stage, there's nothing to actually do # here. Either this worked or it did not. fi - if ! dd if=$MDDEV count=1 bs=512 of=/dev/null \ + if ! dd if=$mddev count=1 bs=512 of=/dev/null \ iflag=direct >/dev/null 2>&1 ; then - ocf_log err "$MDDEV: I/O error on read" + ocf_log err "$mddev: I/O error on read" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } +raid1_monitor() { + if [ "$MDDEV" = auto ]; then + forall raid1_monitor_one + else + raid1_monitor_one $MDDEV + fi +} + # # STATUS: is the raid device online or offline? # raid1_status() { # See if the MD device is online + local rc raid1_monitor rc=$? if [ $rc -ne $OCF_SUCCESS ]; then echo "stopped" else echo "running" fi return $rc } raid1_validate_all() { return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac RAIDCONF="$OCF_RESKEY_raidconf" MDDEV="$OCF_RESKEY_raiddev" if [ -z "$RAIDCONF" ] ; then ocf_log err "Please set OCF_RESKEY_raidconf!" exit $OCF_ERR_CONFIGURED fi if [ ! -r "$RAIDCONF" ] ; then ocf_log err "Configuration file [$RAIDCONF] does not exist, or can not be opend!" exit $OCF_ERR_INSTALLED fi if [ -z "$MDDEV" ] ; then ocf_log err "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" exit $OCF_ERR_CONFIGURED fi -if [ -e "$MDDEV" -a ! -b "$MDDEV" ] ; then - ocf_log err "$MDDEV is not a block device!" - exit $OCF_ERR_INSTALLED -fi - -# strip off the /dev/ prefix to get the name of the MD device -MD=`echo $MDDEV | sed -e 's/\/dev\///'` - - HAVE_RAIDTOOLS=false if have_binary $MDADM >/dev/null 2>&1 ; then if [ -n "$OCF_RESKEY_homehost" ]; then MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" else MDADM_HOMEHOST="" fi else check_binary $RAIDSTART HAVE_RAIDTOOLS=true fi +if [ "$MDDEV" = "auto" -a $HAVE_RAIDTOOLS = true ]; then + ocf_log err "autoconf supported only with mdadm!" + exit $OCF_ERR_INSTALLED +fi + # At this stage, # [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, # otherwise we have raidtools (raidstart and raidstop) # Look for how we are called case "$1" in start) raid1_start ;; stop) raid1_stop ;; status) raid1_status ;; monitor) raid1_monitor ;; validate-all) raid1_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $?