diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 index 5faca8793..4bfafecad 100755 --- a/heartbeat/Raid1 +++ b/heartbeat/Raid1 @@ -1,468 +1,465 @@ #!/bin/sh # # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # Raid1 # Description: Manages a software Raid1 device on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # RAID patches: http://people.redhat.com/mingo/raid-patches/ # Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 # Sympathetic Ear: mailto:linux-raid@vger.kernel.org # # usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} # # # EXAMPLE config file /etc/raidtab.md0 # This file must exist on both machines! # # raiddev /dev/md0 # raid-level 1 # nr-raid-disks 2 # chunk-size 64k # persistent-superblock 1 # #nr-spare-disks 0 # device /dev/sda1 # raid-disk 0 # device /dev/sdb1 # raid-disk 1 # # EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) # # DEVICE /dev/sdb1 /dev/sdc1 # ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } meta_data() { cat < 1.0 Resource script for RAID1. It manages a software Raid1 device on a shared storage medium. Manages a software RAID1 device on shared storage The RAID configuration file. e.g. /etc/raidtab or /etc/mdadm.conf. RAID config file -The block device to use. Alternatively, set to "auto" to manage -all devices specified in raidconf. +One or more block devices to use, space separated. Alternatively, +set to "auto" to manage all devices specified in raidconf. block device The value for the homehost directive; this is an mdadm feature to protect RAIDs against being activated by accident. It is recommended to create RAIDs managed by the cluster with "homehost" set to a special value, so they are not accidentially auto-assembled by nodes not supposed to own them. Homehost for mdadm If processes or kernel threads are using the array, it cannot be stopped. We will try to stop processes, first by sending TERM and then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL. The lsof(8) program is required to get the list of array users. Of course, the kernel threads cannot be stopped this way. If the processes are critical for data integrity, then set this parameter to false. Note that in that case the stop operation will fail and the node will be fenced. force stop processes using the array END } list_conf_arrays() { test -f $RAIDCONF || { ocf_log err "$RAIDCONF gone missing!" exit $OCF_ERR_GENERIC } grep ^ARRAY $RAIDCONF | awk '{print $2}' } forall() { local func=$1 local checkall=$2 local mddev rc=0 - for mddev in `list_conf_arrays`; do + for mddev in $RAIDDEVS; do $func $mddev rc=$(($rc | $?)) [ "$checkall" = all ] && continue [ $rc -ne 0 ] && return $rc done return $rc } -do_func() { - local func=$1 - if [ "$MDDEV" = auto ]; then - forall $func $2 - else - $func $MDDEV - fi -} are_arrays_stopped() { local rc mddev - if [ "$MDDEV" = auto ]; then - for mddev in `list_conf_arrays`; do - raid1_monitor_one $mddev - rc=$? - [ $rc -ne $OCF_NOT_RUNNING ] && break - done - else - raid1_monitor_one $MDDEV + for mddev in $RAIDDEVS; do + raid1_monitor_one $mddev rc=$? - fi + [ $rc -ne $OCF_NOT_RUNNING ] && break + done test $rc -eq $OCF_NOT_RUNNING } +md_assemble() { + local mddev=$1 + $MDADM --assemble $mddev --config=$RAIDCONF $MDADM_HOMEHOST +} # # START: Start up the RAID device # raid1_start() { local rc raid1_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then # md already online, nothing to do. return $OCF_SUCCESS fi if [ $rc -ne $OCF_NOT_RUNNING ]; then # If the array is in a broken state, this agent doesn't # know how to repair that. - ocf_log err "$MDDEV in a broken state; cannot start (rc=$rc)" + ocf_log err "$RAIDDEVS in a broken state; cannot start (rc=$rc)" return $OCF_ERR_GENERIC fi # Insert raid personality module $MODPROBE raid1 if [ $? -ne 0 ] ; then # It is not fatal, chance is that we have raid1 builtin... ocf_log warn "Couldn't insert RAID1 module" fi grep -q "^Personalities.*\[raid1\]" /proc/mdstat 2>/dev/null if [ $? -ne 0 ] ; then ocf_log err "We don't have RAID1 support! Exiting" return $OCF_ERR_GENERIC fi if [ $HAVE_RAIDTOOLS = "true" ]; then # Run raidstart to start up the RAID array $RAIDSTART --configfile $RAIDCONF $MDDEV else - # Run mdadm - if [ "$MDDEV" = auto ]; then - $MDADM --assemble --scan --config=$RAIDCONF $MDADM_HOMEHOST - else - $MDADM --assemble $MDDEV --config=$RAIDCONF $MDADM_HOMEHOST - fi + forall md_assemble all fi raid1_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS else - ocf_log err "Couldn't start RAID for $MDDEV" + ocf_log err "Couldn't start RAID for $RAIDDEVS" return $OCF_ERR_GENERIC fi } # # STOP: stop the RAID device # mark_readonly() { local mddev=$1 local rc ocf_log info "Attempting to mark array $mddev readonly" $MDADM --readonly $mddev --config=$RAIDCONF rc=$? if [ $rc -ne 0 ]; then ocf_log err "Failed to set $mddev readonly (rc=$rc)" fi return $rc } raid1_stop_one() { ocf_log info "Stopping array $1" $MDADM --stop $1 --config=$RAIDCONF --wait-clean -W } get_users_pids() { local mddev=$1 local outp l ocf_log debug "running lsof to list $mddev users..." outp=`lsof $mddev | tail -n +2` echo "$outp" | awk '{print $2}' | sort -u echo "$outp" | while read l; do ocf_log warn "$l" done } stop_raid_users() { local pids - pids=`do_func get_users_pids all | sort -u` + pids=`forall get_users_pids all | sort -u` if [ -z "$pids" ]; then ocf_log warn "lsof reported no users holding arrays" return 2 else ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids fi } stop_arrays() { if [ $HAVE_RAIDTOOLS = "true" ]; then $RAIDSTOP --configfile $RAIDCONF $MDDEV else - do_func raid1_stop_one all + forall raid1_stop_one all fi } raid1_stop() { local rc # See if the MD device is already cleanly stopped: if are_arrays_stopped; then return $OCF_SUCCESS fi # Turn off raid if ! stop_arrays; then if ocf_is_true $FORCESTOP; then if have_binary lsof; then stop_raid_users case $? in 2) false;; *) stop_arrays;; esac else ocf_log warn "install lsof(8) to list users holding the disk" false fi else false fi fi rc=$? if [ $rc -ne 0 ]; then - ocf_log warn "Couldn't stop RAID for $MDDEV (rc=$rc)" + ocf_log warn "Couldn't stop RAID for $RAIDDEVS (rc=$rc)" if [ $HAVE_RAIDTOOLS != "true" ]; then - do_func mark_readonly all + forall mark_readonly all fi return $OCF_ERR_GENERIC fi if are_arrays_stopped; then return $OCF_SUCCESS fi - ocf_log err "RAID $MDDEV still active after stop command!" + ocf_log err "RAID $RAIDDEVS still active after stop command!" return $OCF_ERR_GENERIC } # # monitor: a less noisy status # raid1_monitor_one() { local mddev=$1 local md=`echo $mddev | sed 's,/dev/,,'` local rc local TRY_READD=0 # check if the md device exists first if [ ! -b $mddev ]; then ocf_log info "$mddev is not a block device" return $OCF_NOT_RUNNING fi if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then ocf_log info "$md not found in /proc/mdstat" return $OCF_NOT_RUNNING fi if [ $HAVE_RAIDTOOLS != "true" ]; then $MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$? case $rc in 0) ;; 1) ocf_log warn "$mddev has at least one failed device." TRY_READD=1 ;; 2) ocf_log err "$mddev has failed." return $OCF_ERR_GENERIC ;; 4) ocf_log err "mdadm failed on $mddev." return $OCF_ERR_GENERIC ;; *) ocf_log err "mdadm returned an unknown result ($rc)." return $OCF_ERR_GENERIC ;; esac fi if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \ -a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then ocf_log info "Attempting recovery sequence to re-add devices on $mddev:" $MDADM $mddev --fail detached $MDADM $mddev --remove failed $MDADM $mddev --re-add missing # TODO: At this stage, there's nothing to actually do # here. Either this worked or it did not. fi if ! dd if=$mddev count=1 bs=512 of=/dev/null \ iflag=direct >/dev/null 2>&1 ; then ocf_log err "$mddev: I/O error on read" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } raid1_monitor() { - do_func raid1_monitor_one + forall raid1_monitor_one } # # STATUS: is the raid device online or offline? # raid1_status() { # See if the MD device is online local rc raid1_monitor rc=$? if [ $rc -ne $OCF_SUCCESS ]; then echo "stopped" else echo "running" fi return $rc } raid1_validate_all() { return $OCF_SUCCESS } PROC_CLEANUP_TIME=3 if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac RAIDCONF="$OCF_RESKEY_raidconf" MDDEV="$OCF_RESKEY_raiddev" FORCESTOP="${OCF_RESKEY_force_stop:-1}" if [ -z "$RAIDCONF" ] ; then ocf_log err "Please set OCF_RESKEY_raidconf!" exit $OCF_ERR_CONFIGURED fi if [ ! -r "$RAIDCONF" ] ; then ocf_log err "Configuration file [$RAIDCONF] does not exist, or can not be opend!" exit $OCF_ERR_INSTALLED fi if [ -z "$MDDEV" ] ; then ocf_log err "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" exit $OCF_ERR_CONFIGURED fi if ocf_is_true $FORCESTOP && ! have_binary lsof; then ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..." fi HAVE_RAIDTOOLS=false if have_binary $MDADM >/dev/null 2>&1 ; then if [ -n "$OCF_RESKEY_homehost" ]; then MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" else MDADM_HOMEHOST="" fi else check_binary $RAIDSTART HAVE_RAIDTOOLS=true fi -if [ "$MDDEV" = "auto" -a $HAVE_RAIDTOOLS = true ]; then - ocf_log err "autoconf supported only with mdadm!" - exit $OCF_ERR_INSTALLED +if [ $HAVE_RAIDTOOLS = true ]; then + if [ "$MDDEV" = "auto" ]; then + ocf_log err "autoconf supported only with mdadm!" + exit $OCF_ERR_INSTALLED + elif [ `echo $MDDEV|wc -w` -gt 1 ]; then + ocf_log err "multiple devices supported only with mdadm!" + exit $OCF_ERR_INSTALLED + fi +fi + +if [ "$MDDEV" = "auto" ]; then + RAIDDEVS=`list_conf_arrays` +else + RAIDDEVS="$MDDEV" fi # At this stage, # [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, # otherwise we have raidtools (raidstart and raidstop) # Look for how we are called case "$1" in start) raid1_start ;; stop) raid1_stop ;; status) raid1_status ;; monitor) raid1_monitor ;; validate-all) raid1_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/tools/ocft/Raid1 b/tools/ocft/Raid1 index a8e50c5ac..1c239c8a0 100644 --- a/tools/ocft/Raid1 +++ b/tools/ocft/Raid1 @@ -1,132 +1,146 @@ # Raid1 # by dejan@suse.de on # Fri Aug 24 17:01:40 CEST 2012 CONFIG Agent Raid1 AgentRoot /usr/lib/ocf/resource.d/heartbeat InstallPackage mdadm HangTimeout 20 VARIABLE OCFT_disk0=/var/run/resource-agents/ocft-Raid1-disk0 OCFT_disk1=/var/run/resource-agents/ocft-Raid1-disk1 OCFT_disk2=/var/run/resource-agents/ocft-Raid1-disk2 OCFT_disk3=/var/run/resource-agents/ocft-Raid1-disk3 OCFT_raidconf=/var/run/resource-agents/ocft-mdadm.conf OCFT_raiddev=/dev/md8 OCFT_raiddev2=/dev/md9 OCFT_loop0=/dev/loop6 OCFT_loop1=/dev/loop7 OCFT_loop2=/dev/loop4 OCFT_loop3=/dev/loop5 SETUP-AGENT losetup $OCFT_loop0 2>/dev/null && exit 1 losetup $OCFT_loop1 2>/dev/null && exit 1 losetup $OCFT_loop2 2>/dev/null && exit 1 losetup $OCFT_loop3 2>/dev/null && exit 1 dd if=/dev/zero of=$OCFT_disk0 bs=1 count=0 seek=16M 2>/dev/null dd if=/dev/zero of=$OCFT_disk1 bs=1 count=0 seek=16M 2>/dev/null dd if=/dev/zero of=$OCFT_disk2 bs=1 count=0 seek=16M 2>/dev/null dd if=/dev/zero of=$OCFT_disk3 bs=1 count=0 seek=16M 2>/dev/null losetup $OCFT_loop0 $OCFT_disk0 losetup $OCFT_loop1 $OCFT_disk1 losetup $OCFT_loop2 $OCFT_disk2 losetup $OCFT_loop3 $OCFT_disk3 mdadm --create $OCFT_raiddev -l 0 --raid-devices=2 $OCFT_loop0 $OCFT_loop1 mdadm --create $OCFT_raiddev2 -l 0 --raid-devices=2 $OCFT_loop2 $OCFT_loop3 echo DEVICE $OCFT_loop0 $OCFT_loop1 > $OCFT_raidconf echo DEVICE $OCFT_loop2 $OCFT_loop3 >> $OCFT_raidconf echo ARRAY $OCFT_raiddev devices=$OCFT_loop0,$OCFT_loop1 >> $OCFT_raidconf echo ARRAY $OCFT_raiddev2 devices=$OCFT_loop2,$OCFT_loop3 >> $OCFT_raidconf CLEANUP-AGENT mdadm --zero-superblock $OCFT_loop0 mdadm --zero-superblock $OCFT_loop1 mdadm --zero-superblock $OCFT_loop2 mdadm --zero-superblock $OCFT_loop3 mdadm --remove $OCFT_raiddev 2>/dev/null mdadm --remove $OCFT_raiddev2 2>/dev/null losetup -d $OCFT_loop0 losetup -d $OCFT_loop1 losetup -d $OCFT_loop2 losetup -d $OCFT_loop3 rm $OCFT_disk0 $OCFT_disk1 $OCFT_raidconf rm $OCFT_disk2 $OCFT_disk3 CASE-BLOCK required_args Env OCF_RESKEY_raidconf=$OCFT_raidconf Env OCF_RESKEY_raiddev=$OCFT_raiddev -CASE-BLOCK auto_args - Env OCF_RESKEY_raidconf=$OCFT_raidconf - Env OCF_RESKEY_raiddev=auto - CASE-BLOCK default_status AgentRun stop CASE-BLOCK prepare Include required_args Include default_status CASE-BLOCK prepare_auto - Include auto_args + Include required_args + Env OCF_RESKEY_raiddev="auto" + Include default_status + +CASE-BLOCK prepare_multiple + Include required_args + Env OCF_RESKEY_raiddev="$OCFT_raiddev $OCFT_raiddev2" Include default_status CASE "check base env" Include prepare AgentRun start OCF_SUCCESS CASE "check base env: invalid 'OCF_RESKEY_raiddev'" Include prepare Env OCF_RESKEY_raiddev=/dev/no_such_device AgentRun start OCF_ERR_GENERIC CASE "check base env: unset 'OCF_RESKEY_raiddev'" Include prepare Unenv OCF_RESKEY_raiddev AgentRun start OCF_ERR_CONFIGURED CASE "normal start" Include prepare AgentRun start OCF_SUCCESS CASE "normal stop" Include prepare AgentRun start AgentRun stop OCF_SUCCESS CASE "double start" Include prepare AgentRun start AgentRun start OCF_SUCCESS CASE "double stop" Include prepare AgentRun stop OCF_SUCCESS CASE "monitor when running" Include prepare AgentRun start AgentRun monitor OCF_SUCCESS CASE "monitor when not running" Include prepare AgentRun monitor OCF_NOT_RUNNING CASE "normal start (auto)" Include prepare_auto AgentRun start OCF_SUCCESS AgentRun monitor OCF_SUCCESS CASE "normal stop (auto)" Include prepare_auto AgentRun start AgentRun stop OCF_SUCCESS AgentRun monitor OCF_NOT_RUNNING +CASE "normal start (multiple)" + Include prepare + AgentRun start OCF_SUCCESS + AgentRun monitor OCF_SUCCESS + +CASE "normal stop (multiple)" + Include prepare + Env OCF_RESKEY_raiddev="$OCFT_raiddev $OCFT_raiddev2" + AgentRun start + AgentRun stop OCF_SUCCESS + AgentRun monitor OCF_NOT_RUNNING + CASE "unimplemented command" Include prepare AgentRun no_cmd OCF_ERR_UNIMPLEMENTED