diff --git a/tools/hb_report.in b/tools/hb_report.in index 223cc67a97..bf4146f52d 100755 --- a/tools/hb_report.in +++ b/tools/hb_report.in @@ -1,599 +1,600 @@ #!/bin/sh # Copyright (C) 2007 Dejan Muhamedagic # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # . @sysconfdir@/ha.d/shellfuncs . $HA_NOARCHBIN/utillib.sh PROG=`basename $0` LOGD_CF=`findlogdcf @sysconfdir@ $HA_DIR` export LOGD_CF : ${SSH_OPTS="-T -o Batchmode=yes"} LOG_PATTERNS="CRIT: ERROR:" # # the instance where user runs hb_report is the master # the others are slaves # if [ x"$1" = x__slave ]; then SLAVE=1 fi # # if this is the master, allow ha.cf and logd.cf in the current dir # (because often the master is the log host) # if [ "$SLAVE" = "" ]; then [ -f ha.cf ] && HA_CF=ha.cf [ -f logd.cf ] && LOGD_CF=logd.cf fi usage() { cat< $DESTDIR/.env" done } start_remote_collectors() { for node in `getnodes`; do [ "$node" = "$WE" ] && continue ssh $SSH_OPTS $SSH_USER@$node "$HA_NOARCHBIN/hb_report __slave $DESTDIR" | (cd $DESTDIR && tar xf -) & SLAVEPIDS="$SLAVEPIDS $!" done } # # does ssh work? # findsshuser() { for n in `getnodes`; do [ "$node" = "$WE" ] && continue trysshusers $n $TRY_SSH && break done } checkssh() { for n in `getnodes`; do [ "$node" = "$WE" ] && continue checksshuser $n $SSH_USER || return 1 done return 0 } # # the usual stuff # getbacktraces() { flist=`find_files $HA_VARLIB/cores $1 $2` [ "$flist" ] && getbt $flist > $3 } getpeinputs() { n=`basename $3` flist=$( if [ -f $3/ha-log ]; then grep " $n peng.*PEngine Input stored" $3/ha-log | awk '{print $NF}' else find_files $HA_VARLIB/pengine $1 $2 fi | sed "s,$HA_VARLIB/,,g" ) [ "$flist" ] && (cd $HA_VARLIB && tar cf - $flist) | (cd $3 && tar xf -) } touch_DC_if_dc() { dc=`crmadmin -D 2>/dev/null | awk '{print $NF}'` if [ "$WE" = "$dc" ]; then touch $1/DC fi } # # some basic system info and stats # sys_info() { echo "Heartbeat version: `hb_ver`" crm_info echo "Platform: `uname`" echo "Kernel release: `uname -r`" echo "Architecture: `arch`" [ `uname` = Linux ] && echo "Distribution: `distro`" } sys_stats() { set -x uptime ps axf ps auxw top -b -n 1 netstat -i set +x } # # replace sensitive info with '****' # sanitize() { for f in $1/ha.cf $1/cib.xml $1/pengine/*; do [ -f "$f" ] && sanitize_one $f done } # # remove duplicates if files are same, make links instead # consolidate() { for n in `getnodes`; do if [ -f $1/$2 ]; then rm $1/$n/$2 else mv $1/$n/$2 $1 fi ln -s ../$2 $1/$n done } # # some basic analysis of the report # checkcrmvfy() { for n in `getnodes`; do if [ -s $1/$n/crm_verify.txt ]; then echo "WARN: crm_verify reported warnings at $n:" cat $1/$n/crm_verify.txt fi done } checkbacktraces() { for n in `getnodes`; do [ -s $1/$n/backtraces.txt ] && { echo "WARN: coredumps found at $n:" egrep 'Core was generated|Program terminated' \ $1/$n/backtraces.txt | sed 's/^/ /' } done } checklogs() { logs=`find $1 -name ha-log` [ "$logs" ] || return pattfile=`maketempfile` || fatal "cannot create temporary files" for p in $LOG_PATTERNS; do echo "$p" done > $pattfile echo "" echo "Log patterns:" for n in `getnodes`; do cat $logs | grep -f $pattfile done rm -f $pattfile } # # check if files have same content in the cluster # cibdiff() { crm_diff -c -n $1 -o $2 } txtdiff() { diff $1 $2 } diffcheck() { case `basename $1` in ccm_tool.txt) txtdiff $1 $2;; # worddiff? cib.xml) cibdiff $1 $2;; ha.cf) txtdiff $1 $2;; # confdiff? crm_mon.txt|sysinfo.txt) txtdiff $1 $2;; esac } analyze_one() { rc=0 node0="" for n in `getnodes`; do if [ "$node0" ]; then diffcheck $1/$node0/$2 $1/$n/$2 rc=$((rc+$?)) else node0=$n fi done return $rc } analyze() { flist="ccm_tool.txt cib.xml crm_mon.txt ha.cf sysinfo.txt" for f in $flist; do perl -e "printf \"Diff $f... \"" ls $1/*/$f >/dev/null 2>&1 || continue if analyze_one $1 $f; then echo "OK" consolidate $1 $f else echo "varies" fi done checkcrmvfy $1 checkbacktraces $1 checklogs $1 } # # description template, editing, and other notes # mktemplate() { cat<=100{exit 1}' || cat < $DESTDIR/$WE/ha-log else cat > $DESTDIR/ha-log # we are log server, probably fi else warning "could not find the log file on $WE" fi # # part 6: get all other info (config, stats, etc) # if [ "$THIS_IS_NODE" ]; then getconfig $DESTDIR/$WE getpeinputs $FROM_TIME $TO_TIME $DESTDIR/$WE getbacktraces $FROM_TIME $TO_TIME $DESTDIR/$WE/backtraces.txt touch_DC_if_dc $DESTDIR/$WE sanitize $DESTDIR/$WE sys_info > $DESTDIR/$WE/sysinfo.txt sys_stats > $DESTDIR/$WE/sysstats.txt 2>&1 fi # # part 7: endgame: # slaves tar their results to stdout, the master waits # for them, analyses results, asks the user to edit the # problem description template, and prints final notes # if [ "$SLAVE" ]; then (cd $DESTDIR && tar cf - $WE) else wait $SLAVEPIDS analyze $DESTDIR > $DESTDIR/analysis.txt mktemplate > $DESTDIR/description.txt [ "$NO_DESCRIPTION" ] || { echo press enter to edit the problem description... read junk edittemplate $DESTDIR/description.txt } cd $DESTDIR/.. tar czf $DESTDIR.tar.gz $DESTDIR/ finalword checksize fi [ "$REMOVE_DEST" ] && rm -r $DESTDIR diff --git a/tools/utillib.sh b/tools/utillib.sh index de5b7b7de5..05e259120a 100644 --- a/tools/utillib.sh +++ b/tools/utillib.sh @@ -1,384 +1,384 @@ # Copyright (C) 2007 Dejan Muhamedagic # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # # ha.cf/logd.cf parsing # getcfvar() { [ -f $HA_CF ] || return sed 's/#.*//' < $HA_CF | grep -w "^$1" | sed 's/^[^[:space:]]*[[:space:]]*//' } iscfvarset() { test "`getcfvar \"$1\"`" } iscfvartrue() { getcfvar "$1" | egrep -qsi "^(true|y|yes|on|1)" } getnodes() { getcfvar node } # # ssh # checksshuser() { ssh -o Batchmode=yes $2@$1 true 2>/dev/null } trysshusers() { n=$1 shift 1 for u; do if checksshuser $n $u; then echo $u break fi done } # # logging # syslogmsg() { severity=$1 shift 1 logtag="" [ "$HA_LOGTAG" ] && logtag="-t $HA_LOGTAG" logger -p ${HA_LOGFACILITY:-"daemon"}.$severity $logtag $* } # # find log destination # uselogd() { iscfvartrue use_logd && return 0 # if use_logd true iscfvarset logfacility || iscfvarset logfile || iscfvarset debugfile || return 0 # or none of the log options set false } findlogdcf() { for f in \ - `which strings > /dev/null && + `which strings > /dev/null 2>&1 && strings $HA_BIN/ha_logd | grep 'logd\.cf'` \ `for d; do echo $d/logd.cf $d/ha_logd.cf; done` do if [ -f "$f" ]; then echo $f return 0 fi done return 1 } getlogvars() { savecf=$HA_CF if uselogd; then [ -f "$LOGD_CF" ] || fatal "could not find logd.cf or ha_logd.cf" HA_CF=$LOGD_CF fi HA_LOGFACILITY=`getcfvar logfacility` HA_LOGFILE=`getcfvar logfile` HA_DEBUGFILE=`getcfvar debugfile` HA_SYSLOGMSGFMT="" iscfvartrue syslogmsgfmt && HA_SYSLOGMSGFMT=1 HA_CF=$savecf } findmsg() { # this is tricky, we try a few directories syslogdir="/var/log /var/logs /var/syslog /var/adm /var/log/ha /var/log/cluster" favourites="ha-*" mark=$1 log="" for d in $syslogdir; do [ -d $d ] || continue log=`fgrep -l "$mark" $d/$favourites` && break log=`fgrep -l "$mark" $d/*` && break done 2>/dev/null echo $log } # # print a segment of a log file # str2time() { perl -e "\$time='$*';" -e ' eval "use Date::Parse"; if (!$@) { print str2time($time); } else { eval "use Date::Manip"; if (!$@) { print UnixDate(ParseDateString($time), "%s"); } } ' } getstamp() { if [ "$HA_SYSLOGMSGFMT" -o "$HA_LOGFACILITY" ]; then awk '{print $1,$2,$3}' else awk '{print $2}' | sed 's/_/ /' fi } linetime() { l=`tail -n +$2 $1 | head -1 | getstamp` str2time "$l" } findln_by_time() { logf=$1 tm=$2 first=1 last=`wc -l < $logf` while [ $first -le $last ]; do mid=$(((last+first)/2)) tmid=`linetime $logf $mid` if [ -z "$tmid" ]; then warning "cannot extract time: $logf:$mid" return fi if [ $tmid -gt $tm ]; then last=$((mid-1)) elif [ $tmid -lt $tm ]; then first=$((mid+1)) else break fi done echo $mid } dumplog() { logf=$1 from_time=$2 to_time=$3 from_line=`findln_by_time $logf $from_time` if [ -z "$from_line" ]; then warning "couldn't find line for time $from_time; corrupt log file?" return fi tail -n +$from_line $logf | if [ "$to_time" != 0 ]; then to_line=`findln_by_time $logf $to_time` if [ -z "$to_line" ]; then warning "couldn't find line for time $to_time; corrupt log file?" return fi head -$((to_line-from_line+1)) else cat fi } # # find files newer than a and older than b # touchfile() { t=`maketempfile` && perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' && echo $t } find_files() { dir=$1 from_time=$2 to_time=$3 from_stamp=`touchfile $from_time` findexp="-newer $from_stamp" if [ "$to_time" -a "$to_time" -gt 0 ]; then to_stamp=`touchfile $to_time` findexp="$findexp ! -newer $to_stamp" fi find $dir -type f $findexp rm -f $from_stamp $to_stamp } # # coredumps # findbinary() { - random_binary=`which cat` # suppose we are lucky + random_binary=`which cat 2>/dev/null` # suppose we are lucky binary=`gdb $random_binary $1 < /dev/null 2>/dev/null | grep 'Core was generated' | awk '{print $5}' | sed "s/^.//;s/[.']*$//"` [ x = x"$binary" ] && return fullpath=`which $binary 2>/dev/null` if [ x = x"$fullpath" ]; then [ -x $HA_BIN/$binary ] && echo $HA_BIN/$binary else echo $fullpath fi } getbt() { - which gdb > /dev/null || { + which gdb > /dev/null 2>&1 || { warning "please install gdb to get backtraces" return } for corefile; do absbinpath=`findbinary $corefile` [ x = x"$absbinpath" ] && return 1 echo "====================== start backtrace ======================" ls -l $corefile gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \ $absbinpath $corefile 2>/dev/null echo "======================= end backtrace =======================" done } # # heartbeat configuration/status # iscrmrunning() { crmadmin -D >/dev/null 2>&1 } dumpstate() { crm_mon -1 | grep -v '^Last upd' > $1/crm_mon.txt cibadmin -Ql > $1/cib.xml ccm_tool -p > $1/ccm_tool.txt 2>&1 } getconfig() { cp -p $HA_CF $1/ [ -f $LOGD_CF ] && cp -p $LOGD_CF $1/ if iscrmrunning; then dumpstate $1 else cp -p $HA_VARLIB/crm/cib.xml $1/ 2>/dev/null fi [ -f "$1/cib.xml" ] && crm_verify -V -x $1/cib.xml >$1/crm_verify.txt 2>&1 } # # remove values of sensitive attributes # # this is not proper xml parsing, but it will work under the # circumstances sanitize_xml_attrs() { sed $( for patt in $SANITIZE; do echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/" done ) } sanitize_hacf() { awk ' $1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; } {print} ' } sanitize_one() { file=$1 compress="" echo $file | grep -qs 'gz$' && compress=gzip echo $file | grep -qs 'bz2$' && compress=bzip2 if [ "$compress" ]; then decompress="$compress -dc" else compress=cat decompress=cat fi tmp=`maketempfile` && ref=`maketempfile` || fatal "cannot create temporary files" touch -r $file $ref # save the mtime if [ "`basename $file`" = ha.cf ]; then sanitize_hacf else $decompress | sanitize_xml_attrs | $compress fi < $file > $tmp mv $tmp $file touch -r $ref $file rm -f $ref } # # keep the user posted # fatal() { echo "ERROR: $*" >&2 exit 1 } warning() { echo "WARN: $*" >&2 } info() { echo "INFO: $*" >&2 } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } # # run a command everywhere # forall() { c="$*" for n in `getnodes`; do if [ "$n" = "`uname -n`" ]; then $c else if [ "$SSH_USER" ]; then echo $c | ssh $SSH_OPTS $SSH_USER@$n fi fi done } # # get some system info # distro() { - which lsb_release >/dev/null && { + which lsb_release >/dev/null 2>&1 && { lsb_release -d return } relf=`ls /etc/debian_version 2>/dev/null` || relf=`ls /etc/slackware-version 2>/dev/null` || relf=`ls -d /etc/*-release 2>/dev/null` && { for f in $relf; do test -f $f && { echo "`ls $f` `cat $f`" return } done } warning "no lsb_release no /etc/*-release no /etc/debian_version" } hb_ver() { - which dpkg > /dev/null && { + which dpkg > /dev/null 2>&1 && { dpkg-query -f '${Version}' -W heartbeat 2>/dev/null || dpkg-query -f '${Version}' -W heartbeat-2 return } - which rpm > /dev/null && { + which rpm > /dev/null 2>&1 && { rpm -q --qf '%{version}' heartbeat return } # more packagers? } crm_info() { $HA_BIN/crmd version 2>&1 }