diff --git a/crm/test/helper.sh.in b/crm/test/helper.sh.in index 1c74154f64..c8efdc0172 100644 --- a/crm/test/helper.sh.in +++ b/crm/test/helper.sh.in @@ -1,717 +1,717 @@ # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # [[ $0 != *helper.sh ]] || { echo >&2 "helper.sh expects to be sourced!" exit 1 } # no more exit below here, except in err(), use return instead; # gives more flexibility ... : ${required_nodes=0} test_type=`basename $0` test_nodes=1 dump_dir=~/ha-tests HALIB_DIR=@libdir@/heartbeat HAVAR_DIR=@HA_VARLIBDIR@/heartbeat HAINIT_DIR=@INITDIR@ INIT_USER=root CRMD_USER=hacluster ADMIN_USER=root test_node_1="" test_node_2="" test_node_3="" #CRM_OPTS="-VVVVVV" CRM_ERR_SHUTDOWN=0 OUTPUT_STDERR=0 OUTPUT_STDOUT=0 OUTPUT_CMDS=0 OUTPUT_ECHO=1 repeats=1 iteration=0 done=0 ip_rsc_1=192.168.9.161 ip_rsc_2=192.168.9.162 logfile=/var/log/messages local_version=0 bsc_mode=0 crm_only=0 if [ ! -z "$CRM_LOCAL_OPTS" ]; then echo "Checking $CRM_LOCAL_OPTS for local options" if [ -f "$CRM_LOCAL_OPTS" ]; then . "$CRM_LOCAL_OPTS" else echo "File $CRM_LOCAL_OPTS does not exist" fi else echo "Hint: Define CRM_LOCAL_OPTS=/config/file/name.path to automatically set options for your environment" fi while test "$done" = "0"; do case "$1" in --debug | -x) set -x; shift;; --silent | -s) OUTPUT_ECHO=0; shift;; --stdout | -o) OUTPUT_STDOUT=1; shift;; --stderr | -e) OUTPUT_STDERR=1; shift;; --verbose | -v) OUTPUT_CMDS=1; shift;; --crm-only | -C) crm_only=1; shift;; --local | -L) local_version=1; shift;; --bsc | -B) local_version=1; bsc_mode=1; shift;; --logfile | -l) logfile=${2?no logfile??} || return 1; shift 2;; --dumpdir | -d) dump_dir=${2?no dumpdir??} || return 1; shift 2;; --repeat | -r) repeats=${2?no max repetitions??} || return 1; shift 2;; --init | -i) INIT_USER=${2?no init user??} || return 1; shift 2;; --admin | -a) ADMIN_USER=${2?no admin??} || return 1; shift 2;; --crmd | -c) CRMD_USER=${2?no crmd user??} || return 1; shift 2;; --node1 | -1) test_node_1=${2?no node1??} || return 1; shift 2;; --node2 | -2) test_node_2=${2?no node2??} || return 1; shift 2;; --node3 | -3) test_node_3=${2?no node2??} || return 1; shift 2;; --ip1) ip_rsc_1=${2?no ip1??} || return 1; shift 2;; --ip2) ip_rsc_2=${2?no ip2??} || return 1; shift 2;; ""|--) done=1;; *) echo "unknown option: $1"; return 1;; esac done function test_init() { export date_str=`date "+%Y-%b-%d_%H:%M"` export test_dump_dir="$dump_dir/$test_type/$date_str" mkdir -p "$test_dump_dir" echo Test dir: $test_dump_dir crm_test_log_start=$(stat -L -c %s $logfile) } function remote_cmd() { user=$1 host=$2 shift shift if [ $local_version = "1" ]; then su - ${user} -c "$*" else `which ssh` ${user}@${host} "$*" fi return $?; } function start_node() { ha_node="$1" srch_node="$ha_node " if [ $local_version = "1" ]; then srch_node="" fi do_cmd echo "wait for HA to start on $ha_node" crm_log_pos=$(stat -L -c %s $logfile) do_cmd remote_cmd $INIT_USER $ha_node $HALIB_DIR/heartbeat -M "2>&1 >/dev/null" & if [ $local_version = "1" ]; then do_cmd ${testdir}/testutils.pl -l ${logfile} -p $crm_log_pos --search -a -m 100 \ -s "${srch_node}cib.*Hostname: ${ha_node}" cts_assert "Startup of Heartbeat on ${ha_node} failed." else do_cmd ${testdir}/testutils.pl -l ${logfile} -p $crm_log_pos --search -a -m 100 \ -s "${srch_node}ccm(.*) info: Hostname: ${ha_node}" \ -s "${srch_node}cib(.*) Hostname: ${ha_node}" \ -s "${srch_node}heartbeat(.*) info: Starting(.*)lrmd" \ -e "${srch_node}heartbeat(.*)Client(.*) respawning too fast" cts_assert "Startup of Heartbeat on ${ha_node} failed." fi #---- do_cmd echo "Check if CRMd was started too" do_cmd remote_cmd $INIT_USER $ha_node "killall -INT crmd" if [ $? = 1 ]; then crm_log_pos=$(stat -L -c %s $logfile) do_cmd echo "wait for CRMd to start on $ha_node" do_cmd remote_cmd $CRMD_USER $ha_node $HALIB_DIR/crmd "$CRM_OPTS" "2>&1 >/dev/null" & fi do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 20 \ -s "${srch_node}crmd(.*) info:(.*)FSA Hostname: ${ha_node}" \ -s "${srch_node}crmd(.*)State transition S_STARTING \-> S_PENDING" cts_assert "CRMd startup on ${ha_node} failed." } function stop_node() { a_node=$1; shift; down_cmd=$1; shift; do_cmd echo Shutdown $a_node crm_log_pos=$(stat -L -c %s $logfile) srch_node="$a_node " if [ $local_version = "1" ]; then srch_node="" fi if [ $crm_only = 1 ]; then do_cmd echo "Shutting down CRMd" do_cmd remote_cmd $CRMD_USER $a_node $HALIB_DIR/crmadmin -K $a_node else do_cmd echo "Shutting down Heartbeat" do_cmd remote_cmd $INIT_USER $a_node $HALIB_DIR/heartbeat -k fi do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -t 120 \ -s "${srch_node} crmd(.*)State transition" \ -s "${srch_node} crmd(.*)State transition (.*) \-> S_STOPPING" \ -e "${srch_node} crmd(.*)Timer I_TERMINATE just popped" cts_assert "Shutdown of ${a_node} failed." # This pattern does not always get printed... most annoying # -s "${srch_node} crmd(.*)\[crmd\] stopped" \ sleep 5 do_cmd echo "Check if CRMd _really_ stopped" do_cmd remote_cmd $INIT_USER $ha_node "killall -INT crmd" cts_assert_false "CRMd is still running on ${a_node}." } function is_state() { ret=1 host=$1 state=$2 output=`remote_cmd $ADMIN_USER $host $HALIB_DIR/crmadmin -S $host | grep $state` if [ ! -z "$output" ]; then echo "$host is in $state" return 0 fi echo "$host is NOT in $state" return $ret } function is_running() { rsc=$1 askhost=$2 host=$3 if [ -z $host ]; then - output=`remote_cmd $ADMIN_USER $askhost $HALIB_DIR/crmadmin -W $rsc | grep -v $HALIB_DIR/crmadmin` + output=`remote_cmd $ADMIN_USER $askhost $HALIB_DIR/crm_resource -W $rsc | grep -v $HALIB_DIR/crm_resource` output=`echo $output | grep -v NOT` else - output=`remote_cmd $ADMIN_USER $askhost $HALIB_DIR/crmadmin -W $rsc | grep -v $HALIB_DIR/crmadmin` + output=`remote_cmd $ADMIN_USER $askhost $HALIB_DIR/crm_resource -W $rsc | grep -v $HALIB_DIR/crm_resource` output=`echo $output | grep $host` fi if [ -z "$output" ]; then if [ -z $host ]; then echo "$rsc is NOT running" else echo "$rsc is NOT running on host $host" fi return 1 fi if [ -z $host ]; then echo "$rsc is running" else echo "$rsc is running on host $host" fi return 0 } function is_dc() { host=$1 output=`remote_cmd $ADMIN_USER $host $HALIB_DIR/crmadmin -S $host | grep -v S_PENDING | grep -v S_NOT_DC | grep -v S_ELECTION | grep -v S_RECOVERY` if [ ! -z "$output" ]; then echo "$host is DC" return 0 fi echo "$host is NOT DC" remote_cmd $ADMIN_USER $host $HALIB_DIR/crmadmin -S $host return 1 } function wait_for_state() { state=$1 max=$2 host=$3 target=$4 sleep 1 if [ "$host" = "DC" ]; then host=`remote_cmd $ADMIN_USER $target $HALIB_DIR/crmadmin -D` host=${host#*: } fi count=1 until is_state $host $state; do if [ $count -gt $max ]; then echo "Attempt to reach $state on $host failed" return 1 fi count=`expr $count + 1` sleep 1 done echo "$state reached on $host" return 0 } function make_node() { host=$1 uname=$2 type=$3 uuid=`uuidgen` if [ -z $type ]; then type="member" fi node_xml="''"; remote_cmd $ADMIN_USER $host "$HALIB_DIR/cibadmin -C -o nodes -X $node_xml" } function make_node_local() { host=$1 uname=$2 type=$3 uuid=`uuidgen` if [ -z $type ]; then type="member" fi node_xml="''"; remote_cmd $ADMIN_USER $host "$HALIB_DIR/cibadmin -l -C -o nodes -X $node_xml" } function make_incarnation() { host=$1; shift id=$1; shift class=$1; shift type=$1; shift max=$1; shift priority=$1; shift version=$1; shift stopfail=$1; shift args=$* uuid=`uuidgen` node_xml="'" if [ ! -z $priority -a "x$priority" != "x-" ]; then node_xml="$node_xml priority=\\\"${priority}\\\"" fi node_xml="$node_xml &1 > /dev/null } function make_resource() { host=$1 id=$2 class=$3 type=$4 priority=$5 version=$6 stopfail=$7 shift 7 args=$* uuid=`uuidgen` node_xml="&1 > /dev/null } function make_constraint_adv() { host=$1 shift node_xml=$* node_xml=`echo $node_xml | tr '\n' ' '` do_cmd remote_cmd ${ADMIN_USER} ${host} "$HALIB_DIR/cibadmin -C -o constraints -VVVV -X \"$node_xml\"" } function make_constraint() { host=$1 rsc=$2 result=$3 uuid1=`uuidgen` uuid2=`uuidgen` node_xml="" make_constraint_adv $host "$node_xml" } function cts_assert() { do_cts_assert $? 0 "Assert failed - " $* } function cts_assert_false() { do_cts_assert $? 1 "AssertFalse failed - " $* } function do_cts_assert() { result=$1 expected=$2 shift shift do_err=0 if [ $result -ne $expected ]; then do_cmd echo "Expected rc ($expected) != $result" do_err=1 else # this is a hack for the fact that stat dumps core with MALLOC_CHECK_=2 if [ $local_version = 1 ]; then file $HADIR/core* | grep -e "from 'stat'" | rm -f `awk '{ print $1 }' | tr ':' ' '` fi for a_host in $node_list; do remote_cmd ${ADMIN_USER} ${a_host} "ls -al ${HAVAR_DIR}/cores/*/core*" 2>/dev/null if [ $? -eq 0 ]; then do_cmd echo "Core file(s) found on $a_host" do_cmd remote_cmd ${ADMIN_USER} ${a_host} "file ${HAVAR_DIR}/cores/*/core*" do_err=1 fi done fi if [ $do_err = 1 ]; then err $* fi } function err() { do_cmd echo Dumping test nodes to $dump_dir node_num=1 for a_host in $node_list; do dump_node $node_num $a_host node_num=`expr $node_num + 1` done crm_test_log_end=$(stat -L -c %s $logfile) do_cmd echo "ERROR: $*" do_cmd echo "test $test_type: FAILED" if [ $bsc_mode = 0 ]; then echo "see ${test_dump_dir}/test.txt for details..." echo "ERROR: $*" > $test_dump_dir/error.txt $HALIB_DIR/crmtest/testutils.pl --dump -p $crm_test_log_start -ep $crm_test_log_end -l $logfile > $test_dump_dir/cluster.log fi exit 1 } function dump_node() { a_node_index=$1 a_node=$2 dump_file="/tmp/${a_node_index}-${a_node}.tgz" remote_cmd $ADMIN_USER ${a_node} "mkdir /tmp/crm" remote_cmd $ADMIN_USER ${a_node} "stat -L -c %s $logfile > /tmp/crm/.logend" remote_cmd ${ADMIN_USER} ${a_node} "ps axf > /tmp/crm/processes" remote_cmd ${ADMIN_USER} ${a_node} "$HALIB_DIR/crmtest/testutils.pl --dump -pf /tmp/crm/.logstart -l $logfile > /tmp/crm/cluster-test.log" # `which ssh` ${ADMIN_USER}@${a_node} "$HALIB_DIR/crmtest/testutils.pl --dump -pf /tmp/crm/.logstart -epf /tmp/crm/.logend -l $logfile > /tmp/crm/cluster-test.log" remote_cmd ${ADMIN_USER} ${a_node} "ln -s /tmp/crm /tmp/crm-${a_node_index}" remote_cmd ${ADMIN_USER} ${a_node} "ln -s ${HAVAR_DIR}/cores /tmp/crm" remote_cmd ${ADMIN_USER} ${a_node} "cp ${HAVAR_DIR}/crm/cib.xml /tmp/crm" remote_cmd ${ADMIN_USER} ${a_node} "tar -zhcvf ${dump_file} /tmp/crm-${a_node_index}/ " remote_cmd ${ADMIN_USER} ${a_node} "rm -f /tmp/crm-${a_node_index}" cd $test_dump_dir if [ $local_version = 1 ]; then cp -r ${dump_file} . else scp -r ${ADMIN_USER} ${a_node}:${dump_file} . fi tar zxvf ${a_node_index}-${a_node}.tgz cd - remote_cmd ${ADMIN_USER} ${a_node} "rm ${dump_file}" if [ $bsc_mode = 0 ]; then $HALIB_DIR/crmtest/extract.logs ${test_dump_dir} node-${a_node_index} ${test_dump_dir}/tmp/crm-${a_node_index}/cluster-test.log fi } function do_cmd() { cmdline=$* old_OUTPUT_STDOUT=$OUTPUT_STDOUT cmd=$1 test_log_file=$test_dump_dir/test.txt if [ $bsc_mode = 1 ]; then test_log_file=/tmp/linux-ha.testlog fi if [ $OUTPUT_STDERR -eq 1 -o $OUTPUT_STDOUT -eq 1 -o $OUTPUT_CMDS -eq 1 ]; then if [ $cmd = "remote_cmd" ]; then user=$2 host=$3 shift 3 echo `date` ": Running '$*' as ${user}@${host}" | tee -a $test_log_file; elif [ $cmd = "wait_for_state" ]; then host=$4 if [ -z $host ]; then host="the DC" fi echo `date` ": Waiting for state $2 on $host (max $3 attempts)" | tee -a $test_log_file; elif [ $cmd = "${HALIB_DIR}/crmtest/testutils.pl" ]; then echo `date` ": Searching '$cmdline'" | tee -a $test_log_file; elif [ $cmd = "echo" ]; then echo -n "" # if [ $OUTPUT_CMDS -eq 1 -a $OUTPUT_STDOUT -eq 0 -a $OUTPUT_STDERR -eq 0 ]; then # echo -n `date` ": " # $cmdline # elif [ $OUTPUT_ECHO -eq 1 -a $OUTPUT_CMDS -eq 0 -a $OUTPUT_STDOUT -eq 0 -a $OUTPUT_STDERR -eq 0 ]; then # $cmdline # fi else echo `date` ": Running '$cmdline'" | tee -a $test_log_file; fi fi if [ $cmd = "echo" ]; then if [ $OUTPUT_CMDS -eq 1 -a $OUTPUT_STDOUT -eq 0 -a $OUTPUT_STDERR -eq 0 ]; then echo -n `date` ": " | tee -a $test_log_file if [ $local_version != 1 ]; then echo -n -e "\033[01;32m" fi $cmdline | tee -a $test_log_file if [ $local_version != 1 ]; then echo -n -e "\033[00m" fi elif [ $OUTPUT_ECHO -eq 1 -a $OUTPUT_CMDS -eq 0 -a $OUTPUT_STDOUT -eq 0 -a $OUTPUT_STDERR -eq 0 ]; then echo -n `date` ": " | tee -a $test_log_file if [ $local_version != 1 ]; then echo -n -e "\033[01;32m" fi $cmdline | tee -a $test_log_file if [ $local_version != 1 ]; then echo -n -e "\033[00m" fi fi elif [ $cmd = "${HALIB_DIR}/crmtest/testutils.pl" ]; then # for the logs... if [ $OUTPUT_STDERR -eq 0 -a $OUTPUT_STDOUT -eq 0 -a $OUTPUT_CMDS -eq 0 ]; then : Searching for $cmdline # echo `date` ": Searching '$cmdline'" | tee -a $test_log_file; fi $cmdline -v 2>&1 >> $test_log_file rc=$? elif [ $OUTPUT_STDERR -eq 1 -a $OUTPUT_STDOUT -eq 1 ]; then $cmdline 2>&1 >> $test_log_file rc=$? elif [ $OUTPUT_STDERR -eq 0 -a $OUTPUT_STDOUT -eq 0 ]; then $cmdline 2>&1 >/dev/null rc=$? elif [ $OUTPUT_STDOUT -eq 0 ]; then $cmdline >/dev/null 2>> $test_log_file rc=$? elif [ $OUTPUT_STDERR -eq 0 ]; then $cmdline 2>/dev/null >> $test_log_file rc=$? else $cmdline 2>&1 >> $test_log_file rc=$? fi OUTPUT_STDOUT=$old_OUTPUT_STDOUT # echo Result: $rc >> $test_log_file return $rc } function crm-cleanup() { if [ -z $1 ]; then test_init fi for a_host in $node_list; do crm-cleanup-node $a_host $1 done } function crm-cleanup-node() { ha_host=$1 do_cmd echo Cleaning up on $ha_host # be *very* sure everything has stopped do_cmd remote_cmd $INIT_USER $ha_host "killall -q9 heartbeat ccm lrmd crmd crmadmin cibadmin ha_logd stonithd cib pengine tengine" # resources too do_cmd remote_cmd $INIT_USER $ha_host "/etc/ha.d/resource.d/IPaddr $ip_rsc_1 stop" 2>&1 > /dev/null do_cmd remote_cmd $INIT_USER $ha_host "/etc/ha.d/resource.d/IPaddr $ip_rsc_2 stop" 2>&1 > /dev/null if [ -z $2 ]; then # make *sure* theres nothing left over from last time do_cmd remote_cmd $INIT_USER $ha_host "rm -f $HAVAR_DIR/crm/cib*.xml" remote_cmd ${INIT_USER} ${ha_host} "rm -f /tmp/crm/cores" 2>/dev/null remote_cmd ${INIT_USER} ${ha_host} "rm -f ${HAVAR_DIR}/cores/*/*" 2>/dev/null do_cmd remote_cmd ${INIT_USER} ${ha_host} "ls -al ${HAVAR_DIR}/cores/" remote_cmd ${INIT_USER} ${ha_host} "rm -rf /tmp/crm/* /tmp/crm/.log*" 2>/dev/null do_cmd remote_cmd ${INIT_USER} ${ha_host} "ls -al ${HAVAR_DIR}/cores/" if [ `dirname $logfile` = "/tmp/crm" ]; then do_cmd remote_cmd $INIT_USER $ha_host "/etc/init.d/syslog restart" fi remote_cmd $ADMIN_USER $ha_host "stat -L -c %s $logfile > /tmp/crm/.logstart" do_cmd remote_cmd $ADMIN_USER $ha_host "logger -i -p local7.info __crmtest_${test_type}_log_mark__" do_cmd sleep 2 do_cmd remote_cmd $ADMIN_USER $ha_host "echo 1 > /proc/sys/kernel/core_uses_pid" # zero out logs, maybe fi } function mark_log() { export crm_log_pos=$(stat -L -c %s $logfile) } function gres() { if [ $# -lt "3" ] then echo Usage: gres pattern replacement files return 1 fi pattern=$1 replacement=$2 shift; shift; filelist=$* A="`echo | tr '\012' '\001' `" for file in $filelist; do fname=`basename $file` dname=`dirname $file` is_backup=${fname##.gres.} if [ "$is_backup" = "$fname" ]; then grep -q "$pattern" "$file" isthere=$? if [ $isthere = 0 ]; then # echo Processing $file... cp "$file" "$dname/.gres.$fname" && sed -e "s$A$pattern$A$replacement$A" "$dname/.gres.$fname" > "$file" fi # else # echo "Ignoring backup of $is_backup in $dname" fi done } # successfully sourced if [ $local_version = 1 ]; then test_node_1=`uname -n` test_node_2="" test_node_3="" echo "Set test_node_1=\"$test_node_1\"" echo "Unset test_node_2 and test_node_3" fi node_list="$test_node_1 $test_node_2 $test_node_3" node_count=0 for a_host in $node_list; do node_count=`expr $node_count + 1` done if [ $node_count -lt $required_nodes ]; then do_cmd echo "$required_nodes required for this test. You specified only $node_list" do_cmd echo "test: FAILED" return false fi true