diff --git a/tools/crm_report.in b/tools/crm_report.in index 15df3ec7ba..e67d1bb92b 100755 --- a/tools/crm_report.in +++ b/tools/crm_report.in @@ -1,429 +1,430 @@ #!/bin/sh # Copyright (C) 2010 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Note the quotes around `$TEMP': they are essential! TEMP=`getopt \ -o hv?xl:f:t:n:T:Lpc:dSACHu:MVs \ --long help,cts:,cts-log:,node:,nodes:,from:,to:logfile:,as-directory,single-node,cluster:,user:,version,features \ -n 'crm_report' -- "$@"` eval set -- "$TEMP" times="" tests="" nodes="" compress=1 cluster="any" ssh_user="root" search_logs=1 report_data=`dirname $0` extra_logs="" sanitize_patterns="" log_patterns="CRIT: ERROR:" usage() { cat< "$l_base/$HALOG_F" fi cat<$l_base/.env LABEL="$label" REPORT_HOME="$r_base" REPORT_MASTER="$host" LOG_START=$start LOG_END=$end REMOVE=1 SANITIZE="$sanitize_patterns" CLUSTER=$cluster LOG_PATTERNS="$log_patterns" EXTRA_LOGS="$extra_logs" SEARCH_LOGS=$search_logs verbose=$verbose EOF for node in $nodes; do if [ `uname -n` = $node ]; then cat $l_base/.env $report_data/report.common $report_data/report.collector > $r_base/collector bash $r_base/collector else cat $l_base/.env $report_data/report.common $report_data/report.collector \ | ssh -l $ssh_user -T $node -- "mkdir -p $r_base; cat > $r_base/collector; bash $r_base/collector" | (cd $l_base && tar xf -) fi done analyze $l_base > $l_base/$ANALYSIS_F if [ -f $l_base/$HALOG_F ]; then node_events $l_base/$HALOG_F > $l_base/$EVENTS_F fi for node in $nodes; do cat $l_base/$node/$ANALYSIS_F >> $l_base/$ANALYSIS_F if [ -s $l_base/$node/$EVENTS_F ]; then cat $l_base/$node/$EVENTS_F >> $l_base/$EVENTS_F elif [ -s $l_base/$HALOG_F ]; then awk "\$4==\"$nodes\"" $l_base/$EVENTS_F >> $l_base/$n/$EVENTS_F fi done log " " if [ $compress = 1 ]; then fname=`shrink $l_base` rm -rf $l_base log "Collected results are available in $fname" log " " log "Please create a bug entry at" log " http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker" log "Include a description of your problem and attach this tarball" log " " log "Thank you for taking time to create this report." else log "Collected results are available in $l_base" fi log " " } # # check if files have same content in the cluster # cibdiff() { d1=`dirname $1` d2=`dirname $2` if [ -f $d1/RUNNING -a -f $d2/RUNNING ] || [ -f $d1/STOPPED -a -f $d2/STOPPED ]; then if which crm_diff > /dev/null 2>&1; then crm_diff -c -n $1 -o $2 else info "crm_diff(8) not found, cannot diff CIBs" fi else echo "can't compare cibs from running and stopped systems" fi } diffcheck() { [ -f "$1" ] || { echo "$1 does not exist" return 1 } [ -f "$2" ] || { echo "$2 does not exist" return 1 } case `basename $1` in $CIB_F) cibdiff $1 $2;; $B_CONF) diff -u $1 $2;; # confdiff? *) diff -u $1 $2;; esac } # # remove duplicates if files are same, make links instead # consolidate() { for n in $NODES; do if [ -f $1/$2 ]; then rm $1/$n/$2 else mv $1/$n/$2 $1 fi ln -s ../$2 $1/$n done } analyze_one() { rc=0 node0="" for n in $NODES; do if [ "$node0" ]; then diffcheck $1/$node0/$2 $1/$n/$2 rc=$(($rc+$?)) else node0=$n fi done return $rc } analyze() { flist="$HOSTCACHE $MEMBERSHIP_F $CIB_F $CRM_MON_F $B_CONF logd.cf $SYSINFO_F" for f in $flist; do printf "Diff $f... " ls $1/*/$f >/dev/null 2>&1 || { echo "no $1/*/$f :/" continue } if analyze_one $1 $f; then echo "OK" [ "$f" != $CIB_F ] && consolidate $1 $f else echo "" fi done } do_cts() { test_sets=`echo $tests | tr ',' ' '` for test_set in $test_sets; do start_time=0 start_test=`echo $test_set | tr '-' ' ' | awk '{print $1}'` end_time=0 end_test=`echo $test_set | tr '-' ' ' | awk '{print $2}'` if [ x$end_test = x ]; then msg="Extracting test $start_test" label="CTS-`date +"%a-%d-%b-%Y"`-$start_test" end_test=`expr $start_test + 1` else msg="Extracting set $start_test to $end_test" label="CTS-`date +"%a-%d-%b-%Y"`-$start_test-$end_test" end_test=`expr $end_test + 1` fi if [ $start_test = 0 ]; then start_pat="BEGINNING [0-9].* TESTS" else start_pat="Running test.*\[ *$start_test\]" fi if [ x$ctslog = x ]; then ctslog=`findmsg 1 "$start_pat"` fi if [ x$ctslog = x ]; then fatal "No CTS control file detected" fi + log "Using CTS control file: $ctslog" line=`grep -n "$start_pat" $ctslog | tail -1 | sed 's/:.*//'` if [ ! -z "$line" ]; then start_time=`linetime $ctslog $line` fi line=`grep -n "Running test.*\[ *$end_test\]" $ctslog | tail -1 | sed 's/:.*//'` if [ ! -z "$line" ]; then end_time=`linetime $ctslog $line` fi if [ -z "$nodes" ]; then - debug "Using CTS control file: $ctslog" nodes=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u | tr '\\n' ' '` + log "Calculated node list: $nodes" fi if [ $end_time -lt $start_time ]; then debug "Test didn't complete, grabbing everything up to now" end_time=`date +%s` fi if [ $start_time != 0 ];then log "$msg (`time2str $start_time` to `time2str $end_time`)" collect_data $label $start_time $end_time $ctslog else fatal "$msg failed: not found" fi done } getnodes() { if [ -z $1 ]; then cluster=`get_cluster_type` else cluster=$1 fi cluster_cf=`find_cluster_cf $cluster` # 1. Live if ps -ef | egrep -qs [c]rmd then debug "Querying CRM for nodes" cibadmin -Ql -o nodes | awk ' /type="normal"/ { for( i=1; i<=NF; i++ ) if( $i~/^uname=/ ) { sub("uname=.","",$i); sub("\".*","",$i); print $i; next; } } ' # 2. hostcache elif [ -f $HA_STATE_DIR/hostcache ]; then debug "Reading nodes from $HA_STATE_DIR/hostcache" awk '{print $1}' $HA_STATE_DIR/hostcache # 3. ha.cf elif [ "x$cluster" = "xheartbeat" ]; then debug "Reading nodes from $cluster_cf" getcfvar $cluster node $cluster_cf else # Look in the logs... logfile=`findmsg 1 "crm_update_peer"` debug "Reading nodes from $logfile" if [ ! -z "$logfile" ]; then grep crm_update_peer: $logfile | sed s/.*crm_update_peer// | sed s/://g | awk '{print $2}' | grep -v "(null)" | sort -u | tr '\n' ' ' fi fi } if [ "x$tests" != "x" ]; then do_cts elif [ "x$start_time" != "x" ]; then masterlog="" if [ -z "$sanitize_patterns" ]; then log "WARNING: The tarball produced by this program may contain" log " sensitive information such as passwords." log "" log "We will attempt to remove such information if you use the" log "-p option. For example: -p \"pass.*\" -p \"user.*\"" log "" log "However, doing this may reduce the ability for the recipients" log "to diagnose issues and generally provide assistance." log "" log "IT IS YOUR RESPONSIBILITY TO PROTECT SENSITIVE DATA FROM EXPOSURE" log "" fi if [ -z "$nodes" ]; then nodes=`getnodes $cluster` log "Calculated node list: $nodes" fi if [ -z "$nodes" ]; then fatal "Cannot determine node list, please specify manually with --nodes" fi if echo $nodes | grep -qs $host then debug "We are a cluster node" else debug "We are a log master" masterlog=`findmsg 1 "crmd\\|CTS"` fi if [ -z $end_time ]; then end_time=`perl -e 'print time()'` fi label="pcmk-`date +"%a-%d-%b-%Y"`" log "Collecting data from $nodes (`time2str $start_time` to `time2str $end_time`)" collect_data $label $start_time $end_time $masterlog else fatal "Not sure what to do, no tests or time ranges to extract" fi diff --git a/tools/report.collector b/tools/report.collector index 930047e433..39c59f4d75 100644 --- a/tools/report.collector +++ b/tools/report.collector @@ -1,654 +1,670 @@ # Copyright (C) 2007 Dejan Muhamedagic # Almost everything as part of hb_report # Copyright (C) 2010 Andrew Beekhof # Cleanups, refactoring, extensions # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # if echo $REPORT_HOME | grep -qs '^/' then debug "Using full path to working directory: $REPORT_HOME" else REPORT_HOME="$HOME/$REPORT_HOME" debug "Canonicalizing working directory path: $REPORT_HOME" fi findlogdcf() { for f in \ `test -x $CRM_DAEMON_DIR/ha_logd && which strings > /dev/null 2>&1 && strings $CRM_DAEMON_DIR/ha_logd | grep 'logd\.cf'` \ `for d; do echo $d/logd.cf $d/ha_logd.cf; done` do if [ -f "$f" ]; then echo $f debug "Located logd.cf at: $f" return 0 fi done debug "Could not determine logd.cf location" return 1 } # # find files newer than a and older than b # isnumber() { echo "$*" | grep -qs '^[0-9][0-9]*$' } touchfile() { t=`mktemp` && perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' && echo $t } find_files_clean() { [ -z "$from_stamp" ] || rm -f "$from_stamp" [ -z "$to_stamp" ] || rm -f "$to_stamp" from_stamp="" to_stamp="" } find_files() { dirs=$1 from_time=$2 to_time=$3 isnumber "$from_time" && [ "$from_time" -gt 0 ] || { warning "sorry, can't find files in [ $1 ] based on time if you don't supply time" return } trap find_files_clean 0 if ! from_stamp=`touchfile $from_time`; then warning "sorry, can't create temporary file for find_files" return fi findexp="-newer $from_stamp" if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then if ! to_stamp=`touchfile $to_time`; then warning "sorry, can't create temporary file for find_files" find_files_clean return fi findexp="$findexp ! -newer $to_stamp" fi find $dirs -type f $findexp find_files_clean trap "" 0 } # # check permissions of files/dirs # pl_checkperms() { perl -e ' # check permissions and ownership # uid and gid are numeric # everything must match exactly # no error checking! (file should exist, etc) ($filename, $perms, $in_uid, $in_gid) = @ARGV; ($mode,$uid,$gid) = (stat($filename))[2,4,5]; $p=sprintf("%04o", $mode & 07777); $p ne $perms and exit(1); $uid ne $in_uid and exit(1); $gid ne $in_gid and exit(1); ' $* } num_id() { getent $1 $2 | awk -F: '{print $3}' } chk_id() { [ "$2" ] && return 0 echo "$1: id not found" return 1 } check_perms() { while read type f p uid gid; do [ -$type $f ] || { echo "$f wrong type or doesn't exist" continue } n_uid=`num_id passwd $uid` chk_id "$uid" "$n_uid" || continue n_gid=`num_id group $gid` chk_id "$gid" "$n_gid" || continue pl_checkperms $f $p $n_uid $n_gid || { echo "wrong permissions or ownership for $f:" ls -ld $f } done } # # coredumps # findbinary() { random_binary=`which cat 2>/dev/null` # suppose we are lucky binary=`gdb $random_binary $1 < /dev/null 2>/dev/null | grep 'Core was generated' | awk '{print $5}' | sed "s/^.//;s/[.':]*$//"` if [ x = x"$binary" ]; then debug "Could not detect the program name for core $1 from the gdb output; will try with file(1)" binary=$(file $1 | awk '/from/{ for( i=1; i<=NF; i++ ) if( $i == "from" ) { print $(i+1) break } }') binary=`echo $binary | tr -d "'"` binary=$(echo $binary | tr -d '`') if [ "$binary" ]; then binary=`which $binary 2>/dev/null` fi fi if [ x = x"$binary" ]; then warning "Could not find the program path for core $1" return fi fullpath=`which $binary 2>/dev/null` if [ x = x"$fullpath" ]; then if [ -x $CRM_DAEMON_DIR/$binary ]; then echo $CRM_DAEMON_DIR/$binary debug "Found the program at $CRM_DAEMON_DIR/$binary for core $1" else warning "Could not find the program path for core $1" fi else echo $fullpath debug "Found the program at $fullpath for core $1" fi } getbt() { which gdb > /dev/null 2>&1 || { warning "Please install gdb to get backtraces" return } for corefile; do absbinpath=`findbinary $corefile` [ x = x"$absbinpath" ] && continue echo "====================== start backtrace ======================" ls -l $corefile gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \ $absbinpath $corefile 2>/dev/null echo "======================= end backtrace =======================" done } getconfig() { target=$1; shift; for cf in $*; do if [ -e "$cf" ]; then cp -a "$cf" $target/ fi done crm_uuid -r > $target/$HB_UUID_F 2>&1 if ps -ef | egrep -qs [c]rmd then crm_mon -1 2>&1 | grep -v '^Last upd' > $target/$CRM_MON_F cibadmin -Ql 2>/dev/null > $target/${CIB_F}.live crm_node -p > $target/$MEMBERSHIP_F 2>&1 echo "$host" > $target/RUNNING else echo "$host" > $target/STOPPED fi if [ -f "$target/$CIB_F" ]; then crm_verify -V -x $target/$CIB_F >$target/$CRM_VERIFY_F 2>&1 CIB_file=$target/$CIB_F crm configure show >$target/$CIB_TXT_F 2>&1 fi } # # remove values of sensitive attributes # # this is not proper xml parsing, but it will work under the # circumstances sanitize_xml_attrs() { sed $( for patt in $SANITIZE; do echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/" done ) } sanitize_hacf() { awk ' $1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; } {print} ' } sanitize_one_clean() { [ -z "$tmp" ] || rm -f "$tmp" tmp="" [ -z "$ref" ] || rm -f "$ref" ref="" } sanitize() { file=$1 compress="" if [ -z $SANITIZE ]; then return fi echo $file | grep -qs 'gz$' && compress=gzip echo $file | grep -qs 'bz2$' && compress=bzip2 if [ "$compress" ]; then decompress="$compress -dc" else compress=cat decompress=cat fi trap sanitize_one_clean 0 tmp=`mktemp` ref=`mktemp` if [ -z "$tmp" -o -z "$ref" ]; then sanitize_one_clean fatal "cannot create temporary files" fi touch -r $file $ref # save the mtime if [ "`basename $file`" = ha.cf ]; then sanitize_hacf else $decompress | sanitize_xml_attrs | $compress fi < $file > $tmp mv $tmp $file # note: cleaning $tmp up is still needed even after it's renamed # because its temp directory is still there. touch -r $ref $file sanitize_one_clean trap "" 0 } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } # # get some system info # distro() { if which lsb_release >/dev/null 2>&1 then lsb_release -d debug "Using lsb_release for distribution info" return fi relf=`ls /etc/debian_version 2>/dev/null` || relf=`ls /etc/slackware-version 2>/dev/null` || relf=`ls -d /etc/*-release 2>/dev/null` && { for f in $relf; do test -f $f && { echo "`ls $f` `cat $f`" debug "Found `echo $relf | tr '\n' ' '` distribution release file(s)" return } done } warning "No lsb_release, no /etc/*-release, no /etc/debian_version: no distro information" } pkg_ver() { if which dpkg >/dev/null 2>&1 ; then pkg_mgr="deb" elif which rpm >/dev/null 2>&1 ; then pkg_mgr="rpm" elif which pkg_info >/dev/null 2>&1 ; then pkg_mgr="pkg_info" elif which pkginfo >/dev/null 2>&1 ; then pkg_mgr="pkginfo" else warning "Unknown package manager" return fi debug "The package manager is: $pkg_mgr" echo "The package manager is: $pkg_mgr" # for Linux .deb based systems case $pkg_mgr in deb) dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W for pkg in $*; do if dpkg-query -W $pkg 2>/dev/null ; then debug "Verifying installation of: $pkg" echo "Verifying installation of: $pkg" debsums -s $pkg 2>/dev/null fi done ;; rpm) rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' for pkg in $*; do if rpm -q $pkg >/dev/null 2>&1 ; then debug "Verifying installation of: $pkg" echo "Verifying installation of: $pkg" rpm --verify $pkg fi done ;; pkg_info) pkg_info ;; pkginfo) pkginfo | awk '{print $3}' # format? ;; esac } getbacktraces() { debug "Looking for backtraces: $*" flist=$( for f in `find_files "$CRM_CORE_DIRS" $1 $2`; do bf=`basename $f` test `expr match $bf core` -gt 0 && echo $f done) if [ "$flist" ]; then log "Found core files: `echo $flist | tr '\n' ' '`" getbt "$flist" fi } getpeinputs() { flist=$( find_files $PE_STATE_DIR $1 $2 | sed "s,`dirname $PE_STATE_DIR`/,,g" ) if [ "$flist" ]; then (cd `dirname $PE_STATE_DIR` && tar cf - $flist) | (cd $3 && tar xf -) debug "found `echo $flist | wc -w` pengine input files in $PE_STATE_DIR" fi } # # some basic system info and stats # sys_info() { cluster=$1; shift echo "Platform: `uname`" echo "Kernel release: `uname -r`" echo "Architecture: `uname -m`" if [ `uname` = Linux ]; then echo "Distribution: `distro`" fi cibadmin --version 2>&1 cibadmin -! 2>&1 case $1 in openais) : echo "openais version: how?" ;; + cman) + cman_tool -V + /usr/sbin/corosync -v 2>&1 + ;; corosync) /usr/sbin/corosync -v 2>&1 ;; heartbeat) heartbeat version: `$CRM_DAEMON_DIR/heartbeat -V` 2>&1 ;; esac # Cluster glue version hash (if available) stonith -V # Resource agents version hash echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`" pkg_ver $* } sys_stats() { set -x uname -n uptime ps axf ps auxw top -b -n 1 ifconfig -a ip addr list netstat -i arp -an test -d /proc && { cat /proc/cpuinfo } lsscsi lspci mount df set +x } dlm_dump() { if which dlm_tool >/dev/null 2>&1 ; then echo NOTICE - Lockspace overview: dlm_tool ls dlm_tool ls | grep name | while read X N ; do echo NOTICE - Lockspace $N: dlm_tool lockdump $N done echo NOTICE - Lockspace history: dlm_tool dump fi } iscfvarset() { test "`getcfvar $1 $2`" } iscfvartrue() { getcfvar $1 $2 $3 | egrep -qsi "^(true|y|yes|on|1)" } uselogd() { cf_file=$2 case $1 in heartbeat) iscfvartrue $1 use_logd $cf_file && return 0 # if use_logd true iscfvarset $1 logfacility $cf_file || iscfvarset $1 logfile $cf_file || iscfvarset $1 debugfile $cf_file || return 0 # or none of the log options set false ;; *) iscfvartrue $1 use_logd $cf_file ;; esac } get_logfile() { cf_type=$1 cf_file="$2" cf_logd="$3" facility_var="logfacility" if [ -f "$cf_logd" ]; then if uselogd; then cf_file="$cf_logd" cf_type="logd" fi fi debug "Reading $cf_type log settings" case $cf_type in - openais|corosync) + cman|openais|corosync) debug "Reading log settings from $cf_file" if iscfvartrue $cf_type to_syslog $cf_file; then facility_var=syslog_facility elif iscfvartrue $cf_type to_file $cf_file; then logfile=`getcfvar $cf_type logfile $cf_file syslog_facility` fi ;; heartbeat|logd) debug "Reading log settings from $cf_file" if iscfvartrue $cf_type debug $cf_file then logfile=`getcfvar $cf_type debugfile $cf_file` else logfile=`getcfvar $cf_type logfile $cf_file` fi ;; *) debug "Unknown cluster type: $cf_type" echo "/var/log/messages" ;; esac if [ "x$logfile" != "x" -a -f "$logfile" ]; then echo $logfile else facility=`getcfvar $cf_type $facility_var $cf_file` [ "" = "$facility" ] && facility="daemon" if [ "none" = "$facility" ]; then fatal "No logging is configured" fi msg="Mark:pcmk:`perl -e 'print time()'`" logger -p $facility.info $msg >/dev/null 2>&1 findmsg 1 "$msg" fi } essential_files() { cat< $SYSINFO_F essential_files $cluster | check_perms > $PERMISSIONS_F 2>&1 getconfig "$REPORT_HOME/$host" "$cluster_cf" "$logd_cf" "$HA_STATE_DIR/crm/$CIB_F" "$HA_STATE_DIR/hostcache" "/etc/drbd.conf" "/etc/drbd.d" getpeinputs $LOG_START $LOG_END $REPORT_HOME/$host getbacktraces $LOG_START $LOG_END > $REPORT_HOME/$host/$BT_F dc=`crm_mon -1 2>/dev/null | awk '/Current DC/ {print $3}'` if [ "$host" = "$dc" ]; then echo "$host" > DC fi dlm_dump > $DLM_DUMP_F 2>&1 sys_stats > $SYSSTATS_F 2>&1 debug "Sanitizing files" # # replace sensitive info with '****' # -for f in `basename $cluster_cf` $CIB_F $CIB_TXT_F $CIB_F.live pengine/*; do +cf="" +if [ ! -z "$cluster_cf" ]; then + cf=`basename $cluster_cf` +fi +for f in $cf $CIB_F $CIB_TXT_F $CIB_F.live pengine/*; do if [ -f "$f" ]; then sanitize $f fi done # Grab logs #debug "Gathering logs: $logfile $EXTRA_LOGS" trap '[ -z "$pattfile" ] || rm -f "$pattfile"' 0 pattfile=`mktemp` || fatal "cannot create temporary files" for p in $LOG_PATTERNS; do echo "$p" done > $pattfile for l in $logfile $EXTRA_LOGS; do b=`basename $l` if [ ! -f "$l" ]; then # Not a file continue elif [ -f "$b" ]; then # We already have it continue fi dumplogset "$l" $LOG_START $LOG_END > "$b" echo "Log patterns $host:" > $ANALYSIS_F cat $b | grep -f $pattfile >> $ANALYSIS_F done rm -f $pattfile trap "" 0 # Purge files containing no information for f in `ls -1`; do if [ -d "$f" ]; then continue elif [ ! -s "$f" ]; then debug "Removing empty file: $f" rm -f $f fi done # Parse for events for l in $logfile $EXTRA_LOGS; do node_events `basename $logfile` > $EVENTS_F # Link the first logfile to a standard name if it doesn't yet exist if [ ! -e $HALOG_F ]; then ln -s `basename $l` $HALOG_F fi done if [ "$REPORT_MASTER" != "$host" ]; then debug "Streaming report back to $REPORT_MASTER" (cd $REPORT_HOME && tar cf - $host) if [ "$REMOVE" = "1" ]; then cd rm -rf $REPORT_HOME fi fi diff --git a/tools/report.common b/tools/report.common index 991c165110..1f90620591 100644 --- a/tools/report.common +++ b/tools/report.common @@ -1,633 +1,649 @@ # Copyright (C) 2007 Dejan Muhamedagic # Almost everything as part of hb_report # Copyright (C) 2010 Andrew Beekhof # Cleanups, refactoring, extensions # # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # host=`uname -n` shorthost=`echo $host | sed s:\\\\..*::` if [ -z $verbose ]; then verbose=0 fi # Target Files EVENTS_F=events.txt ANALYSIS_F=analysis.txt DESCRIPTION_F=description.txt HALOG_F=cluster-log.txt BT_F=backtraces.txt SYSINFO_F=sysinfo.txt SYSSTATS_F=sysstats.txt DLM_DUMP_F=dlm_dump.txt CRM_MON_F=crm_mon.txt MEMBERSHIP_F=members.txt HB_UUID_F=hb_uuid.txt HOSTCACHE=hostcache CRM_VERIFY_F=crm_verify.txt PERMISSIONS_F=permissions.txt CIB_F=cib.xml CIB_TXT_F=cib.txt EVENT_PATTERNS=" state do_state_transition membership pcmk_peer_update.*(lost|memb): quorum crmd.*crm_update_quorum|crmd.*ais.disp.*quorum.(lost|ac?quir) pause Process.pause.detected resources lrmd.*rsc:(start|stop) stonith te_fence_node|stonith-ng.*log_oper.*report|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=) start_stop Starting.heartbeat|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete " PACKAGES="pacemaker pacemaker-libs libpacemaker3 pacemaker-pygui pacemaker-pymgmt pymgmt-client openais libopenais2 libopenais3 corosync libcorosync4 resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord heartbeat heartbeat-common heartbeat-resources libheartbeat2 ocfs2-tools ocfs2-tools-o2cb ocfs2console ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen lvm2 lvm2-clvm cmirrord libdlm libdlm2 libdlm3 hawk ruby lighttpd kernel-default kernel-pae kernel-xen glibc " # # keep the user posted # log() { printf "%-10s $*\n" "$shorthost:" 1>&2 } debug() { if [ $verbose -gt 0 ]; then log "Debug: $*" fi } info() { log "$*" } warning() { log "WARN: $*" } fatal() { log "ERROR: $*" exit 1 } detect_host() { local_state_dir=/var if [ -d $local_state_dir/run/crm ]; then CRM_STATE_DIR=$local_state_dir/run/crm else for d in `find / -type d -name run`; do if [ -d $d/crm ]; then CRM_STATE_DIR=$d/crm local_state_dir=`dirname $d` fi done fi if [ ! -d $CRM_STATE_DIR ]; then fatal "Non-standard Pacemaker installation: State directory not found" fi debug "Machine state directory: $local_state_dir" debug "State files located in: $CRM_STATE_DIR" if [ -d $local_state_dir/lib/pengine ]; then PE_STATE_DIR=$local_state_dir/lib/pengine else for d in `find / -type d -name pengine`; do PE_STATE_DIR=$d break done fi if [ -z $PE_STATE_DIR ]; then fatal "Non-standard Pacemaker installation: Policy Engine directory not found" fi debug "PE files located in: $PE_STATE_DIR" HA_STATE_DIR=$local_state_dir/lib/heartbeat if [ ! -d $HA_STATE_DIR ]; then # TODO: Go looking fatal "Non-standard Heartbeat installation: Heartbeat state directory not found" fi debug "Heartbeat state files located in: $HA_STATE_DIR" CRM_CORE_DIRS="" for d in $HA_STATE_DIR/cores $local_state_dir/lib/corosync $local_state_dir/lib/openais; do if [ -d $d ]; then CRM_CORE_DIRS="$CRM_CORE_DIRS $d" fi done debug "Core files located under: $CRM_CORE_DIRS" for d in /usr/lib/heartbeat /usr/lib64/heartbeat; do if [ -f $d/crmd ]; then CRM_DAEMON_DIR=$d break fi done if [ ! -d $CRM_DAEMON_DIR ]; then for d in `find / -type d -name heartbeat`; do if [ -f $d/crmd ]; then CRM_DAEMON_DIR=$d break fi done fi if [ ! -d $CRM_DAEMON_DIR ]; then for f in `find / -type f -name crmd`; do if [ -f $f ]; then CRM_DAEMON_DIR=`basename $f` fi done fi if [ ! -d $CRM_DAEMON_DIR ]; then fatal "Non-standard Pacemaker installation: daemons not found" fi debug "Pacemaker daemons located under: $CRM_DAEMON_DIR" } time2str() { perl -e "use POSIX; print strftime('%x %X',localtime($1));" } get_time() { perl -e "\$time='$*';" -e ' eval "use Date::Parse"; if (!$@) { print str2time($time); } else { eval "use Date::Manip"; if (!$@) { print UnixDate(ParseDateString($time), "%s"); } } ' } get_time_() { warning "No time format specified for: $*" } get_time_syslog() { awk '{print $1,$2,$3}' } get_time_legacy() { awk '{print $2}' | sed 's/_/ /' } get_time_format() { t=0 l="" func="" trycnt=10 while [ $trycnt -gt 0 ] && read l; do t=$(get_time `echo $l | get_time_syslog`) if [ "$t" ]; then func="syslog" break fi t=$(get_time `echo $l | get_time_legacy`) if [ "$t" ]; then func="legacy" break fi trycnt=$(($trycnt-1)) done #debug "Logfile uses the $func time format" echo $func } linetime() { format=`get_time_format < $1` l=`tail -n +$2 $1 | grep ":[0-5][0-9]:" | head -1 | get_time_$format` get_time "$l" } # Find pattern in a logfile somewhere # Return $max ordered results by age (newest first) findmsg() { max=$1 pattern=$2 logfiles="" syslogdirs="/var/log /var/logs /var/syslog /var/adm /var/log/ha /var/log/cluster" for d in $syslogdirs; do if [ -d $d ]; then logfiles=`grep -l -e "$pattern" $d/*` && break fi done 2>/dev/null if [ "x$logfiles" != "x" ]; then list=`ls -t $logfiles | head -n $max | tr '\n' ' '` echo $list debug "Pattern \'$pattern\' found in: [ $list ]" else debug "Pattern \'$pattern\' not found anywhere" fi } node_events() { Epatt=`echo "$EVENT_PATTERNS" | while read title p; do [ -n "$p" ] && echo -n "|$p"; done | sed 's/.//' ` grep -E "$Epatt" $1 } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } shrink() { src=$* target=$1.tar tar_options=cf variant=`pickfirst bzip2 gzip false` case $variant in bz*) tar_options="jcf" target="$target.bz2" ;; gz*) tar_options="zcf" target="$target.gz" ;; *) warning "Could not find a compression program, the resulting tarball may be huge" ;; esac tar $tar_options $target $src >/dev/null 2>&1 echo $target } findln_by_time() { local logf=$1 local tm=$2 local first=1 local last=`wc -l < $logf` while [ $first -le $last ]; do mid=$((($last+$first)/2)) trycnt=10 while [ $trycnt -gt 0 ]; do tmid=`linetime $logf $mid` [ "$tmid" ] && break warning "cannot extract time: $logf:$mid; will try the next one" trycnt=$(($trycnt-1)) # shift the whole first-last segment first=$(($first-1)) last=$(($last-1)) mid=$((($last+$first)/2)) done if [ -z "$tmid" ]; then warning "giving up on log..." return fi if [ $tmid -gt $tm ]; then last=$(($mid-1)) elif [ $tmid -lt $tm ]; then first=$(($mid+1)) else break fi done echo $mid } dumplog() { local logf=$1 local from_line=$2 local to_line=$3 [ "$from_line" ] || return tail -n +$from_line $logf | if [ "$to_line" ]; then head -$(($to_line-$from_line+1)) else cat fi } # # find log/set of logs which are interesting for us # # # find log slices # find_decompressor() { if echo $1 | grep -qs 'bz2$'; then echo "bzip2 -dc" elif echo $1 | grep -qs 'gz$'; then echo "gzip -dc" else echo "cat" fi } # # check if the log contains a piece of our segment # is_our_log() { local logf=$1 local from_time=$2 local to_time=$3 local cat=`find_decompressor $logf` local format=`$cat $logf | get_time_format` local first_time=$(get_time "`$cat $logf | head -1 | get_time_$format`") local last_time=$(get_time "`$cat $logf | tail -1 | get_time_$format`") if [ x = "x$first_time" -o x = "x$last_time" ]; then return 0 # skip (empty log?) fi if [ $from_time -gt $last_time ]; then # we shouldn't get here anyway if the logs are in order return 2 # we're past good logs; exit fi if [ $from_time -ge $first_time ]; then return 3 # this is the last good log fi # have to go further back if [ x = "x$to_time" -o $to_time -ge $first_time ]; then return 1 # include this log else return 0 # don't include this log fi } # # go through archived logs (timewise backwards) and see if there # are lines belonging to us # (we rely on untouched log files, i.e. that modify time # hasn't been changed) # arch_logs() { local logf=$1 local from_time=$2 local to_time=$3 # look for files such as: ha-log-20090308 or # ha-log-20090308.gz (.bz2) or ha-log.0, etc ls -t $logf $logf*[0-9z] 2>/dev/null | while read next_log; do is_our_log $next_log $from_time $to_time case $? in 0) ;; # noop, continue 1) echo $next_log # include log and continue debug "Found log $next_log" ;; 2) break;; # don't go through older logs! 3) echo $next_log # include log and continue debug "Found log $next_log" break ;; # don't go through older logs! esac done } # # print part of the log # drop_tmp_file() { [ -z "$tmp" ] || rm -f "$tmp" } print_logseg() { local logf=$1 local from_time=$2 local to_time=$3 # uncompress to a temp file (if necessary) local cat=`find_decompressor $logf` if [ "$cat" != "cat" ]; then tmp=`mktemp` $cat $logf > $tmp trap drop_tmp_file 0 sourcef=$tmp else sourcef=$logf tmp="" fi if [ "$from_time" = 0 ]; then FROM_LINE=1 else FROM_LINE=`findln_by_time $sourcef $from_time` fi if [ -z "$FROM_LINE" ]; then warning "couldn't find line for time $from_time; corrupt log file?" return fi TO_LINE="" if [ "$to_time" != 0 ]; then TO_LINE=`findln_by_time $sourcef $to_time` if [ -z "$TO_LINE" ]; then warning "couldn't find line for time $to_time; corrupt log file?" return fi - fi - if [ $FROM_LINE -lt $TO_LINE ]; then - dumplog $sourcef $FROM_LINE $TO_LINE - log "Including segment [$FROM_LINE-$TO_LINE] from $logf" + if [ $FROM_LINE -lt $TO_LINE ]; then + dumplog $sourcef $FROM_LINE $TO_LINE + log "Including segment [$FROM_LINE-$TO_LINE] from $logf" + else + log "Segment from $logf finished before it started, line: $FROM_LINE to $TO_LINE" + fi else - log "Segment from $logf finished before it started, line: $FROM_LINE to $TO_LINE" + dumplog $sourcef $FROM_LINE $TO_LINE + log "Including all logs after line $FROM_LINE from $logf" fi drop_tmp_file trap "" 0 } # # find log/set of logs which are interesting for us # dumplogset() { local logf=$1 local from_time=$2 local to_time=$3 local logf_set=`arch_logs $logf $from_time $to_time` if [ x = "x$logf_set" ]; then return fi local num_logs=`echo "$logf_set" | wc -l` local oldest=`echo $logf_set | awk '{print $NF}'` local newest=`echo $logf_set | awk '{print $1}'` local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` # the first logfile: from $from_time to $to_time (or end) # logfiles in the middle: all # the last logfile: from beginning to $to_time (or end) case $num_logs in 1) print_logseg $newest $from_time $to_time;; *) print_logseg $oldest $from_time 0 for f in $mid_logfiles; do `find_decompressor $f` $f debug "including complete $f logfile" done print_logseg $newest 0 $to_time ;; esac } # cut out a stanza getstanza() { awk -v name="$1" ' !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start if ($1 == name) in_stanza = 1 } in_stanza { print } in_stanza && NF==1 && $1 == "}" { exit } ' } # supply stanza in $1 and variable name in $2 # (stanza is optional) getcfvar() { cf_type=$1; shift; cf_var=$1; shift; cf_file=$* [ -f "$cf_file" ] || return case $cf_type in + cman) + grep $cf_var $cf_file | sed s/.*$cf_var=\"// | sed s/\".*// + ;; corosync|openais) sed 's/#.*//' < $cf_file | if [ $# -eq 2 ]; then getstanza "$cf_var" shift 1 else cat fi | awk -v varname="$cf_var" ' NF==2 && match($1,varname":$")==1 { print $2; exit; } ' ;; heartbeat) sed 's/#.*//' < $cf_file | grep -w "^$cf_var" | sed 's/^[^[:space:]]*[[:space:]]*//' ;; logd) sed 's/#.*//' < $cf_file | grep -w "^$cf_var" | sed 's/^[^[:space:]]*[[:space:]]*//' ;; esac } # # figure out the cluster type, depending on the process list # and existence of configuration files # get_cluster_type() { if ps -ef | egrep -qs '[c]orosync' then - stack="corosync" + quorum=`corosync-objctl -a | grep quorum.provider | sed s/.*=//` + if [ x"$quorum" = x"quorum_cman" ]; then + stack="cman" + else + stack="corosync" + fi + elif ps -ef | egrep -qs '[a]isexec' then stack="openais" elif ps -ef | grep -v -e grep -e "eartbeat/[clasp]" | egrep -qs '[h]eartbeat' then stack="heartbeat" # Now we're guessing... + elif [ -f /etc/cluster/cluster.conf ]; then + stack="cman" + # TODO: Technically these could be anywhere :-/ elif [ -f /etc/corosync/corosync.conf ]; then stack="corosync" elif [ -f /etc/ais/openais.conf ]; then stack="openais" else stack="heartbeat" fi debug "Detected the '$stack' cluster stack" echo $stack } find_cluster_cf() { case $1 in + cman) echo "/etc/cluster/cluster.conf";; corosync) best_size=0 best_file="" # TODO: Technically these could be anywhere :-/ for cf in /etc/ais/openais.conf /etc/corosync/corosync.conf; do if [ -f $cf ]; then size=`wc -l $cf | awk '{print $1}'` if [ $size -gt $best_size ]; then best_size=$size best_file=$cf fi fi done echo "$best_file" ;; openais) # TODO: Technically it could be anywhere :-/ cf="/etc/ais/openais.conf" if [ -f $cf ]; then echo "$cf" fi ;; heartbeat) cf="/etc/ha.d/ha.cf" if [ -f $cf ]; then echo "$cf" fi ;; *) warning "Unknown cluster type: $1" ;; esac } # # check for the major prereq for a) parameter parsing and b) # parsing logs # t=`get_time "12:00"` if [ "$t" = "" ]; then fatal "please install the perl Date::Parse module" fi detect_host