diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 index f92ac31d9..5499279e7 100644 --- a/heartbeat/Raid1 +++ b/heartbeat/Raid1 @@ -1,409 +1,421 @@ #!/bin/sh # # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # Raid1 # Description: Manages a software Raid1 device on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # RAID patches: http://people.redhat.com/mingo/raid-patches/ # Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 # Sympathetic Ear: mailto:linux-raid@vger.kernel.org # # usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} # # OCF parameters are as below: # OCF_RESKEY_raidconf # (name of MD configuration file. e.g. /etc/raidtab or /etc/mdadm.conf) # OCF_RESKEY_raiddev # (of the form /dev/md* the block device to use) # # in /etc/ha.d/haresources, use a line such as: # nodea 10.0.0.170 Raid1::/etc/raidtab.md0::/dev/md0 Filesystem::/dev/md0::/data1::ext2 # (for systems with raidtools) # or # nodea 10.0.0.170 Raid1::/etc/mdadm.conf::/dev/md0 Filesystem::/dev/md0::/data1::ext2 # (for systems with mdadm) # # The "start" arg starts up the raid device # The "stop" arg stops it. NOTE: all filesystems must be unmounted # and no processes should be accessing the device. # The "status" arg just prints out whether the device is running or not # # # DISCLAIMER: Use at your own risk! # # Besides all of the usual legalese that accompanies free software, # I will warn you that I do not yet use this kind of setup (software RAID # over shared storage) in production, and I have reservations about doing so. # # The linux md driver/scsi drivers under Raid 0.90 and kernel version 2.2 # do not behave well when a drive is in the process of going bad. # The kernel slows down, but doesn't completely crash. This is about the # worst possible thing that could happen in an un-attended HA type # environment. (Once the system is rebooted, the sofware raid stuff works # like a champ.) # My other reservation has to do with the interation of RAID recovery with # journaling filesystems and other parts of the kernel. Subscribe to # linux-raid@vger.kernel.org for other opinions and possible solutions. # # -EZA 25 Oct 2000 # # SETUP: # # You might need to pass the command line parameter: raid=noautodetect # in an HA environment so that the kernel doesn't automatically start # up your raid partitions when you boot the node. This means that it isn't # going to work to use RAID for the system disks and the shared disks. # # 0) partition the disks to use for RAID. Use normal Linux partition # types, not the RAID autodetect type for your partitions. # 1) Create /etc/raidtab.md? on both systems (see example file below) # or for systems with mdadm tools create /etc/mdadm.conf (see example below) # 2) Initialize your raid partition with # /sbin/mkraid --configfile /etc/raidtab.md? /dev/md? # or create mirror raid with the following command # mdadm --create /dev/md? -l 1 -n 2 /dev/sdb? /dev/sdb? # 3) Format your filesystem # mke2fs /dev/md0 # for ext2fs... a journaling filesystem would be nice # 3) Create the mount point on both systems. # DO NOT add your raid filesystem to /etc/fstab # 4) copy this script (to /etc/rc.d/init.d if you wish) and edit it to # reflect your desired settings. # 5) Modify the heartbeat 'haresources' (for non-crm heartbeat) or 'cib.xml' (for crm heartbeat) setup file # 6) unmount the filesystem and stop the raid device with 'raidstop' or 'mdadm -S' # 7) fire up heartbeat! # # # EXAMPLE config file /etc/raidtab.md0 # This file must exist on both machines! # # raiddev /dev/md0 # raid-level 1 # nr-raid-disks 2 # chunk-size 64k # persistent-superblock 1 # #nr-spare-disks 0 # device /dev/sda1 # raid-disk 0 # device /dev/sdb1 # raid-disk 1 # # EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) # # DEVICE /dev/sdb1 /dev/sdb2 # ARRAY /dev/md0 level=raid1 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } meta_data() { cat < 1.0 Resource script for RAID1. It manages a software Raid1 device on a shared storage medium. RAID1 resource agent The RAID configuration file. e.g. /etc/raidtab or /etc/mdadm.conf. RAID config file The block device to use. block device The value for the homehost directive; this is an mdadm feature to protect RAIDs against being activated by accident. It is recommended to create RAIDs managed by the cluster with "homehost" set to a special value, so they are not accidentially auto-assembled by nodes not supposed to own them. Homehost for mdadm END } # # START: Start up the RAID device # raid1_start() { # See if the md device is already mounted. $MOUNT | grep -e "^$MDDEV\>" >/dev/null if [ $? -ne 1 ] ; then ocf_log err "Device $MDDEV is already mounted!" return $OCF_ERR_GENERIC fi if [ "running" = `raid1_status` ]; then # We are already online, do not bother return $OCF_SUCCESS fi # Insert SCSI module $MODPROBE scsi_hostadapter if [ $? -ne 0 ] ; then ocf_log warn "Couldn't insert SCSI module." fi # Insert raid personality module $MODPROBE raid1 if [ $? -ne 0 ] ; then # It is not fatal, chance is that we have raid1 builtin... ocf_log warn "Couldn't insert RAID1 module" fi grep -q "^Personalities.*\[raid1\]" /proc/mdstat 2>/dev/null if [ $? -ne 0 ] ; then ocf_log err "We don't have RAID1 support! Exiting" return $OCF_ERR_GENERIC fi if [ $HAVE_RAIDTOOLS = "true" ]; then # Run raidstart to start up the RAID array $RAIDSTART --configfile $RAIDCONF $MDDEV else # Run mdadm $MDADM --assemble $MDDEV --config=$RAIDCONF $MDADM_HOMEHOST fi if [ "running" = `raid1_status` ]; then return $OCF_SUCCESS else ocf_log err "Couldn't start RAID for $MDDEV" return $OCF_ERR_GENERIC fi } # # STOP: stop the RAID device # raid1_stop() { # See if the MD device is online if [ "stopped" = `raid1_status` ]; then return $OCF_SUCCESS fi # See if the MD device is mounted $MOUNT | grep -e "^$MDDEV\>" >/dev/null if [ $? -ne 1 ] ; then # Kill all processes open on filesystem $FUSER -m -k $MDDEV # the return from fuser doesn't tell us much #if [ $? -ne 0 ] ; then # ocf_log "err" "Couldn't kill processes on $MOUNTPOINT" # return 1; #fi # Unmount the filesystem $UMOUNT $MDDEV $MOUNT | grep -e "^$MDDEV\>" >/dev/null if [ $? -ne 1 ] ; then ocf_log err "filesystem for $MDDEV still mounted" return $OCF_ERR_GENERIC fi fi # Turn off raid if [ $HAVE_RAIDTOOLS = "true" ]; then $RAIDSTOP --configfile $RAIDCONF $MDDEV else $MDADM --stop $MDDEV --config=$RAIDCONF $MDADM_HOMEHOST fi if [ $? -ne 0 ] ; then ocf_log err "Couldn't stop RAID for $MDDEV" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # -# STATUS: is the raid device online or offline? +# monitor: a less noisy status # -raid1_status() { +raid1_monitor() { + # check if the md device exists first + [ -b $MDDEV ] && + grep -e "^$MD[ \t:]" /proc/mdstat >/dev/null && + return $OCF_SUCCESS + return $OCF_NOT_RUNNING +} +# +# STATUS: is the raid device online or offline? +# +raid1_status() { # See if the MD device is online - grep -e "^$MD[ \t:]" /proc/mdstat >/dev/null - if [ $? -ne 0 ] ; then + raid1_monitor + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then echo "stopped" - return $OCF_NOT_RUNNING else echo "running" - return $OCF_SUCCESS fi - -} + return $rc +} raid1_validate_all() { # Utilities used by this script check_binary $MODPROBE check_binary $FSCK check_binary $FUSER check_binary $MOUNT check_binary $UMOUNT if [ $HAVE_RAIDTOOLS = "true" ]; then # $MDDEV should be an md device lsraid -a $MDDEV 2>&1 | grep -q -i "is not an md device" if [ $? -eq 0 ]; then ocf_log err "$MDDEV is not an md device!" exit $OCF_ERR_ARGS fi COMMENT="\(#.*\)" grep -q "^[[:space:]]*raiddev[[:space:]]\+$MDDEV[[:space:]]*$COMMENT\?$" $RAIDCONF 2>/dev/null if [ $? -ne 0 ]; then ocf_log err "Raid device $MDDEV does not appear in $RAIDCONF" exit $OCF_ERR_GENERIC fi else error=`$MDADM --query $MDDEV $MDADM_HOMEHOST 2>&1` if [ $? -ne 0 ]; then ocf_log err "$error" exit $OCF_ERR_GENERIC fi echo $error | grep -q -i "^$MDDEV[ \t:].*is not an md array" if [ $? -eq 0 ]; then ocf_log err "$MDDEV is not an md array!" exit $OCF_ERR_ARGS fi fi return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac # # Check the necessary enviroment virable's setting # RAIDCONF=$OCF_RESKEY_raidconf MDDEV=$OCF_RESKEY_raiddev if [ -z "$RAIDCONF" ] ; then ocf_log err "Please set OCF_RESKEY_raidconf!" exit $OCF_ERR_ARGS fi if [ ! -r "$RAIDCONF" ] ; then ocf_log err "Configuration file [$RAIDCONF] does not exist, or can not be opend!" exit $OCF_ERR_ARGS fi if [ -z "$MDDEV" ] ; then ocf_log err "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" exit $OCF_ERR_ARGS fi if [ ! -b "$MDDEV" ] ; then ocf_log err "$MDDEV is not a block device!" exit $OCF_ERR_ARGS fi # strip off the /dev/ prefix to get the name of the MD device MD=`echo $MDDEV | sed -e 's/\/dev\///'` HAVE_RAIDTOOLS=false if have_binary $RAIDSTART then check_binary $RAIDSTOP HAVE_RAIDTOOLS=true else check_binary $MDADM if [ -n "$OCF_RESKEY_homehost" ]; then MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" else MDADM_HOMEHOST="" fi fi # At this stage, # [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, # otherwise we have raidtools (raidstart and raidstop) # Look for how we are called case "$1" in start) raid1_start ;; stop) raid1_stop ;; - status|monitor) + status) raid1_status ;; + monitor) + raid1_monitor + ;; validate-all) raid1_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $?