diff --git a/tools/hb_report.in b/tools/hb_report.in index dd4307d462..223cc67a97 100755 --- a/tools/hb_report.in +++ b/tools/hb_report.in @@ -1,598 +1,599 @@ #!/bin/sh # Copyright (C) 2007 Dejan Muhamedagic # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # . @sysconfdir@/ha.d/shellfuncs . $HA_NOARCHBIN/utillib.sh PROG=`basename $0` LOGD_CF=`findlogdcf @sysconfdir@ $HA_DIR` export LOGD_CF : ${SSH_OPTS="-T -o Batchmode=yes"} LOG_PATTERNS="CRIT: ERROR:" # # the instance where user runs hb_report is the master # the others are slaves # if [ x"$1" = x__slave ]; then SLAVE=1 fi # # if this is the master, allow ha.cf and logd.cf in the current dir # (because often the master is the log host) # if [ "$SLAVE" = "" ]; then [ -f ha.cf ] && HA_CF=ha.cf [ -f logd.cf ] && LOGD_CF=logd.cf fi usage() { cat< $DESTDIR/.env" done } start_remote_collectors() { for node in `getnodes`; do [ "$node" = "$WE" ] && continue ssh $SSH_OPTS $SSH_USER@$node "$HA_NOARCHBIN/hb_report __slave $DESTDIR" | (cd $DESTDIR && tar xf -) & SLAVEPIDS="$SLAVEPIDS $!" done } # # does ssh work? # findsshuser() { for n in `getnodes`; do [ "$node" = "$WE" ] && continue trysshusers $n $TRY_SSH && break done } checkssh() { for n in `getnodes`; do [ "$node" = "$WE" ] && continue checksshuser $n $SSH_USER || return 1 done return 0 } # # the usual stuff # getbacktraces() { flist=`find_files $HA_VARLIB/cores $1 $2` [ "$flist" ] && getbt $flist > $3 } getpeinputs() { n=`basename $3` flist=$( if [ -f $3/ha-log ]; then grep " $n peng.*PEngine Input stored" $3/ha-log | awk '{print $NF}' else find_files $HA_VARLIB/pengine $1 $2 fi | sed "s,$HA_VARLIB/,,g" ) [ "$flist" ] && (cd $HA_VARLIB && tar cf - $flist) | (cd $3 && tar xf -) } touch_DC_if_dc() { dc=`crmadmin -D 2>/dev/null | awk '{print $NF}'` if [ "$WE" = "$dc" ]; then touch $1/DC fi } # # some basic system info and stats # sys_info() { echo "Heartbeat version: `hb_ver`" crm_info echo "Platform: `uname`" echo "Kernel release: `uname -r`" echo "Architecture: `arch`" [ `uname` = Linux ] && echo "Distribution: `distro`" } sys_stats() { set -x uptime ps axf ps auxw top -b -n 1 + netstat -i set +x } # # replace sensitive info with '****' # sanitize() { for f in $1/ha.cf $1/cib.xml $1/pengine/*; do [ -f "$f" ] && sanitize_one $f done } # # remove duplicates if files are same, make links instead # consolidate() { for n in `getnodes`; do if [ -f $1/$2 ]; then rm $1/$n/$2 else mv $1/$n/$2 $1 fi ln -s ../$2 $1/$n done } # # some basic analysis of the report # checkcrmvfy() { for n in `getnodes`; do if [ -s $1/$n/crm_verify.txt ]; then echo "WARN: crm_verify reported warnings at $n:" cat $1/$n/crm_verify.txt fi done } checkbacktraces() { for n in `getnodes`; do [ -s $1/$n/backtraces.txt ] && { echo "WARN: coredumps found at $n:" egrep 'Core was generated|Program terminated' \ $1/$n/backtraces.txt | sed 's/^/ /' } done } checklogs() { logs=`find $1 -name ha-log` [ "$logs" ] || return pattfile=`maketempfile` || fatal "cannot create temporary files" for p in $LOG_PATTERNS; do echo "$p" done > $pattfile echo "" echo "Log patterns:" for n in `getnodes`; do cat $logs | grep -f $pattfile done rm -f $pattfile } # # check if files have same content in the cluster # cibdiff() { crm_diff -c -n $1 -o $2 } txtdiff() { diff $1 $2 } diffcheck() { case `basename $1` in ccm_tool.txt) txtdiff $1 $2;; # worddiff? cib.xml) cibdiff $1 $2;; ha.cf) txtdiff $1 $2;; # confdiff? crm_mon.txt|sysinfo.txt) txtdiff $1 $2;; esac } analyze_one() { rc=0 node0="" for n in `getnodes`; do if [ "$node0" ]; then diffcheck $1/$node0/$2 $1/$n/$2 rc=$((rc+$?)) else node0=$n fi done return $rc } analyze() { flist="ccm_tool.txt cib.xml crm_mon.txt ha.cf sysinfo.txt" for f in $flist; do perl -e "printf \"Diff $f... \"" ls $1/*/$f >/dev/null 2>&1 || continue if analyze_one $1 $f; then echo "OK" consolidate $1 $f else echo "varies" fi done checkcrmvfy $1 checkbacktraces $1 checklogs $1 } # # description template, editing, and other notes # mktemplate() { cat<=100{exit 1}' || cat < $DESTDIR/$WE/ha-log else cat > $DESTDIR/ha-log # we are log server, probably fi else warning "could not find the log file on $WE" fi # # part 6: get all other info (config, stats, etc) # if [ "$THIS_IS_NODE" ]; then getconfig $DESTDIR/$WE getpeinputs $FROM_TIME $TO_TIME $DESTDIR/$WE getbacktraces $FROM_TIME $TO_TIME $DESTDIR/$WE/backtraces.txt touch_DC_if_dc $DESTDIR/$WE sanitize $DESTDIR/$WE sys_info > $DESTDIR/$WE/sysinfo.txt sys_stats > $DESTDIR/$WE/sysstats.txt 2>&1 fi # # part 7: endgame: # slaves tar their results to stdout, the master waits # for them, analyses results, asks the user to edit the # problem description template, and prints final notes # if [ "$SLAVE" ]; then (cd $DESTDIR && tar cf - $WE) else wait $SLAVEPIDS analyze $DESTDIR > $DESTDIR/analysis.txt mktemplate > $DESTDIR/description.txt [ "$NO_DESCRIPTION" ] || { echo press enter to edit the problem description... read junk edittemplate $DESTDIR/description.txt } cd $DESTDIR/.. tar czf $DESTDIR.tar.gz $DESTDIR/ finalword checksize fi [ "$REMOVE_DEST" ] && rm -r $DESTDIR