Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4512013
report.common.in
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
20 KB
Referenced Files
None
Subscribers
None
report.common.in
View Options
# Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
# Almost everything as part of hb_report
# Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
# Cleanups, refactoring, extensions
#
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
host=`uname -n`
shorthost=`echo $host | sed s:\\\\..*::`
if [ -z $verbose ]; then
verbose=0
fi
# Target Files
EVENTS_F=events.txt
ANALYSIS_F=analysis.txt
DESCRIPTION_F=description.txt
HALOG_F=cluster-log.txt
BT_F=backtraces.txt
SYSINFO_F=sysinfo.txt
SYSSTATS_F=sysstats.txt
DLM_DUMP_F=dlm_dump.txt
CRM_MON_F=crm_mon.txt
MEMBERSHIP_F=members.txt
HB_UUID_F=hb_uuid.txt
HOSTCACHE=hostcache
CRM_VERIFY_F=crm_verify.txt
PERMISSIONS_F=permissions.txt
CIB_F=cib.xml
CIB_TXT_F=cib.txt
EVENT_PATTERNS="
state do_state_transition
membership pcmk_peer_update.*(lost|memb):
quorum crmd.*crm_update_quorum|crmd.*ais.disp.*quorum.(lost|ac?quir)
pause Process.pause.detected
resources lrmd.*rsc:(start|stop)
stonith te_fence_node|stonith-ng.*log_oper.*report|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=)
start_stop sutdown.decision|Starting.heartbeat|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete
"
# superset of all packages of interest on all distros
# (the package manager will be used to validate the installation
# of any of these packages that are installed)
PACKAGES="pacemaker pacemaker-libs pacemaker-cluster-libs libpacemaker3
pacemaker-remote pacemaker-pygui pacemaker-pymgmt pymgmt-client
openais libopenais2 libopenais3 corosync libcorosync4
resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord
heartbeat heartbeat-common heartbeat-resources libheartbeat2
ocfs2-tools ocfs2-tools-o2cb ocfs2console
ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace
drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace
drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen
lvm2 lvm2-clvm cmirrord
libdlm libdlm2 libdlm3
hawk ruby lighttpd
kernel-default kernel-pae kernel-xen
glibc
"
# Potential locations of system log files
SYSLOGS="
/var/log/*
/var/logs/*
/var/syslog/*
/var/adm/*
/var/log/ha/*
/var/log/cluster/*
"
#
# keep the user posted
#
record() {
if [ x != x"$REPORT_HOME" -a -d "${REPORT_HOME}/$shorthost" ]; then
rec="${REPORT_HOME}/$shorthost/report.out"
elif [ x != x"${l_base}" -a -d "${l_base}" ]; then
rec="${l_base}/report.summary"
else
rec="/dev/null"
fi
printf "%-10s $*\n" "$shorthost:" 2>&1 >> "${rec}"
}
log() {
printf "%-10s $*\n" "$shorthost:" 1>&2
record "$*"
}
debug() {
if [ $verbose -gt 0 ]; then
log "Debug: $*"
else
record "Debug: $*"
fi
}
info() {
log "$*"
}
warning() {
log "WARN: $*"
}
fatal() {
log "ERROR: $*"
exit 1
}
is_running() {
ps -ef | egrep -qs $(echo "$1" | sed -e 's/^\(.\)/[\1]/')
}
has_remoted() {
# TODO: the binary might be elsewhere
if which pacemaker_remoted >/dev/null 2>&1; then
return 0
elif [ -x "@sbindir@/pacemaker_remoted" ]; then
return 0
fi
return 1
}
# found_dir <description> <dirname>
found_dir() {
echo "$2"
info "Pacemaker $1 found in: $2"
}
detect_daemon_dir() {
info "Searching for where Pacemaker daemons live... this may take a while"
for d in \
{/usr,/usr/local,/opt/local,@exec_prefix@}/{libexec,lib64,lib}/{pacemaker,heartbeat}
do
# pacemaker and pacemaker-cts packages can install to daemon directory,
# so check for a file from each
if [ -e $d/pengine ] || [ -e $d/lrmd_test ]; then
found_dir "daemons" "$d"
return
fi
done
for f in $(find / -maxdepth $maxdepth -type f -name pengine -o -name lrmd_test); do
d=$(dirname "$f")
found_dir "daemons" "$d"
return
done
# Pacemaker Remote nodes don't need to install daemons
if has_remoted; then
info "Not found (this appears to be a Pacemaker Remote node)"
else
fatal "Pacemaker daemons not found (nonstandard installation?)"
fi
}
detect_cib_dir() {
for d in $local_state_dir/lib/{pacemaker/cib,heartbeat/crm}; do
if [ "-f $d/cib.xml" ]; then
found_dir "config files" "$d"
return
fi
done
info "Searching for where Pacemaker keeps config information... this may take a while"
# TODO: What about false positives where someone copied the CIB?
for f in $(find / -maxdepth $maxdepth -type f -name cib.xml); do
d=$(dirname $f)
found_dir "config files" "$d"
return
done
# Pacemaker Remote nodes don't need a CIB
if has_remoted; then
info "Not found (this appears to be a Pacemaker Remote node)"
else
warning "Pacemaker config not found (nonstandard installation?)"
fi
}
detect_state_dir() {
if [ -n "$CRM_CONFIG_DIR" ]; then
# Assume new layout
# $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores)
dirname "$CRM_CONFIG_DIR"
# Pacemaker Remote nodes might not have a CRM_CONFIG_DIR
elif [ -d "$local_state_dir/lib/pacemaker" ]; then
echo $local_state_dir/lib/pacemaker
fi
}
detect_pe_dir() {
config_root="$1"
d="$config_root/pengine"
if [ -d "$d" ]; then
found_dir "policy engine inputs" "$d"
return
fi
info "Searching for where Pacemaker keeps Policy Engine inputs... this may take a while"
for d in $(find / -maxdepth $maxdepth -type d -name pengine); do
found_dir "policy engine inputs" "$d"
return
done
if has_remoted; then
info "Not found (this appears to be a Pacemaker Remote node)"
else
fatal "Pacemaker policy engine inputs not found (nonstandard installation?)"
fi
}
detect_host() {
local_state_dir=@localstatedir@
if [ -d $local_state_dir/run ]; then
CRM_STATE_DIR=$local_state_dir/run/crm
else
info "Searching for where Pacemaker keeps runtime data... this may take a while"
for d in `find / -maxdepth $maxdepth -type d -name run`; do
local_state_dir=`dirname $d`
CRM_STATE_DIR=$d/crm
break
done
info "Found: $CRM_STATE_DIR"
fi
debug "Machine runtime directory: $local_state_dir"
debug "Pacemaker runtime data located in: $CRM_STATE_DIR"
CRM_DAEMON_DIR=$(detect_daemon_dir)
CRM_CONFIG_DIR=$(detect_cib_dir)
config_root=$(detect_state_dir)
# Older versions had none
BLACKBOX_DIR=$config_root/blackbox
debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR"
PE_STATE_DIR=$(detect_pe_dir "$config_root")
HA_STATE_DIR=$local_state_dir/lib/heartbeat
debug "Assuming Heartbeat state files, if any, are located in: $HA_STATE_DIR"
CRM_CORE_DIRS=""
for d in $config_root/cores $HA_STATE_DIR/cores $local_state_dir/lib/corosync $local_state_dir/lib/openais; do
if [ -d $d ]; then
CRM_CORE_DIRS="$CRM_CORE_DIRS $d"
fi
done
debug "Core files located under: $CRM_CORE_DIRS"
}
time2str() {
perl -e "use POSIX; print strftime('%x %X',localtime($1));"
}
get_time() {
perl -e "\$time=\"$*\";" -e '
$unix_tm = 0;
eval "use Date::Parse";
if (index($time, ":") < 0) {
} elsif (!$@) {
$unix_tm = str2time($time);
} else {
eval "use Date::Manip";
if (!$@) {
$unix_tm = UnixDate(ParseDateString($time), "%s");
}
}
if ($unix_tm != "") {
print int($unix_tm);
} else {
print "";
}
'
}
get_time_() {
warning "Unknown time format used by: $*"
}
get_time_syslog() {
awk '{print $1,$2,$3}'
}
get_time_legacy() {
awk '{print $2}' | sed 's/_/ /'
}
get_time_iso8601() {
awk '{print $1}'
}
get_time_format_for_string() {
l="$*"
t=$(get_time `echo $l | get_time_syslog`)
if [ "x$t" != x ]; then
echo syslog
return
fi
t=$(get_time `echo $l | get_time_iso8601`)
if [ "x$t" != x ]; then
echo iso8601
return
fi
t=$(get_time `echo $l | get_time_legacy`)
if [ "x$t" != x ]; then
echo legacy
return
fi
}
get_time_format() {
t=0 l="" func=""
trycnt=10
while [ $trycnt -gt 0 ] && read l; do
func=$(get_time_format_for_string $l)
if [ "x$func" != x ]; then
break
fi
trycnt=$(($trycnt-1))
done
#debug "Logfile uses the $func time format"
echo $func
}
get_first_time() {
l=""
format=$1
while read l; do
t=$(echo $l | get_time_$format)
ts=$(get_time $t)
if [ "x$ts" != x ]; then
echo "$ts"
return
fi
done
}
get_last_time() {
l=""
best=`date +%s` # Now
format=$1
while read l; do
t=$(echo $l | get_time_$format)
ts=$(get_time $t)
if [ "x$ts" != x ]; then
best=$ts
fi
done
echo $best
}
linetime() {
l=`tail -n +$2 $1 | grep -a ":[0-5][0-9]:" | head -n 1`
format=`get_time_format_for_string $l`
t=`echo $l | get_time_$format`
get_time "$t"
}
#
# findmsg <max> <pattern>
#
# Print the names of up to <max> system logs that contain <pattern>,
# ordered by most recently modified.
#
findmsg() {
max=$1
pattern="$2"
found=0
# List all potential system logs ordered by most recently modified.
candidates=$(ls -1td $SYSLOGS 2>/dev/null)
if [ -z "$candidates" ]; then
debug "No system logs found to search for pattern \'$pattern\'"
return
fi
# Portable way to handle files with spaces in their names.
SAVE_IFS=$IFS
IFS="
"
# Check each log file for matches.
logfiles=""
for f in $candidates; do
local cat=$(find_decompressor "$f")
$cat "$f" 2>/dev/null | grep -q -e "$pattern"
if [ $? -eq 0 ]; then
# Add this file to the list of hits
# (using newline as separator to handle spaces in names).
if [ -z "$logfiles" ]; then
logfiles="$f"
else
logfiles="$logfiles
$f"
fi
# If we have enough hits, print them and return.
found=$(($found+1))
if [ $found -ge $max ]; then
debug "Pattern \'$pattern\' found in: [ $logfiles ]"
IFS=$SAVE_IFS
echo "$logfiles"
return
fi
fi
done 2>/dev/null
IFS=$SAVE_IFS
debug "Pattern \'$pattern\' not found in any system logs"
}
node_events() {
if [ -e $1 ]; then
Epatt=`echo "$EVENT_PATTERNS" |
while read title p; do [ -n "$p" ] && echo -n "|$p"; done |
sed 's/.//'
`
grep -E "$Epatt" $1
fi
}
pickfirst() {
for x; do
which $x >/dev/null 2>&1 && {
echo $x
return 0
}
done
return 1
}
shrink() {
olddir=$PWD
dir=`dirname $1`
base=`basename $1`
target=$1.tar
tar_options="cf"
variant=`pickfirst bzip2 gzip xz false`
case $variant in
bz*)
tar_options="jcf"
target="$target.bz2"
;;
gz*)
tar_options="zcf"
target="$target.gz"
;;
xz*)
tar_options="Jcf"
target="$target.xz"
;;
*)
warning "Could not find a compression program, the resulting tarball may be huge"
;;
esac
if [ -e $target ]; then
fatal "Destination $target already exists, specify an alternate name with --dest"
fi
cd $dir >/dev/null 2>&1
tar $tar_options $target $base >/dev/null 2>&1
cd $olddir >/dev/null 2>&1
echo $target
}
findln_by_time() {
local logf=$1
local tm=$2
local first=1
# Some logs can be massive (over 1,500,000,000 lines have been seen in the wild)
# Even just 'wc -l' on these files can take 10+ minutes
local fileSize=`ls -lh | awk '{ print $5 }' | grep -ie G`
if [ x$fileSize != x ]; then
warning "$logf is ${fileSize} in size and could take many hours to process. Skipping."
return
fi
local last=`wc -l < $logf`
while [ $first -le $last ]; do
mid=$((($last+$first)/2))
trycnt=10
while [ $trycnt -gt 0 ]; do
tmid=`linetime $logf $mid`
[ "$tmid" ] && break
warning "cannot extract time: $logf:$mid; will try the next one"
trycnt=$(($trycnt-1))
# shift the whole first-last segment
first=$(($first-1))
last=$(($last-1))
mid=$((($last+$first)/2))
done
if [ -z "$tmid" ]; then
warning "giving up on log..."
return
fi
if [ $tmid -gt $tm ]; then
last=$(($mid-1))
elif [ $tmid -lt $tm ]; then
first=$(($mid+1))
else
break
fi
done
echo $mid
}
dumplog() {
local logf=$1
local from_line=$2
local to_line=$3
[ "$from_line" ] ||
return
tail -n +$from_line $logf |
if [ "$to_line" ]; then
head -$(($to_line-$from_line+1))
else
cat
fi
}
#
# find log/set of logs which are interesting for us
#
#
# find log slices
#
find_decompressor() {
case $1 in
*bz2) echo "bzip2 -dc" ;;
*gz) echo "gzip -dc" ;;
*xz) echo "xz -dc" ;;
*) echo "cat" ;;
esac
}
#
# check if the log contains a piece of our segment
#
is_our_log() {
local logf=$1
local from_time=$2
local to_time=$3
local cat=`find_decompressor $logf`
local format=`$cat $logf | get_time_format`
local first_time=`$cat $logf | head -10 | get_first_time $format`
local last_time=`$cat $logf | tail -10 | get_last_time $format`
if [ x = "x$first_time" -o x = "x$last_time" ]; then
warning "Skipping bad logfile '$1': Could not determine log dates"
return 0 # skip (empty log?)
fi
if [ $from_time -gt $last_time ]; then
# we shouldn't get here anyway if the logs are in order
return 2 # we're past good logs; exit
fi
if [ $from_time -ge $first_time ]; then
return 3 # this is the last good log
fi
# have to go further back
if [ x = "x$to_time" -o $to_time -ge $first_time ]; then
return 1 # include this log
else
return 0 # don't include this log
fi
}
#
# go through archived logs (timewise backwards) and see if there
# are lines belonging to us
# (we rely on untouched log files, i.e. that modify time
# hasn't been changed)
#
arch_logs() {
local logf=$1
local from_time=$2
local to_time=$3
# look for files such as: ha-log-20090308 or
# ha-log-20090308.gz (.bz2) or ha-log.0, etc
ls -t $logf $logf*[0-9z] 2>/dev/null |
while read next_log; do
is_our_log $next_log $from_time $to_time
case $? in
0) ;; # noop, continue
1) echo $next_log # include log and continue
debug "Found log $next_log"
;;
2) break;; # don't go through older logs!
3) echo $next_log # include log and continue
debug "Found log $next_log"
break
;; # don't go through older logs!
esac
done
}
#
# print part of the log
#
drop_tmp_file() {
[ -z "$tmp" ] || rm -f "$tmp"
}
print_logseg() {
local logf=$1
local from_time=$2
local to_time=$3
# uncompress to a temp file (if necessary)
local cat=`find_decompressor $logf`
if [ "$cat" != "cat" ]; then
tmp=`mktemp`
$cat $logf > $tmp
trap drop_tmp_file 0
sourcef=$tmp
else
sourcef=$logf
tmp=""
fi
if [ "$from_time" = 0 ]; then
FROM_LINE=1
else
FROM_LINE=`findln_by_time $sourcef $from_time`
fi
if [ -z "$FROM_LINE" ]; then
warning "couldn't find line for time $from_time; corrupt log file?"
return
fi
TO_LINE=""
if [ "$to_time" != 0 ]; then
TO_LINE=`findln_by_time $sourcef $to_time`
if [ -z "$TO_LINE" ]; then
warning "couldn't find line for time $to_time; corrupt log file?"
return
fi
if [ $FROM_LINE -lt $TO_LINE ]; then
dumplog $sourcef $FROM_LINE $TO_LINE
log "Including segment [$FROM_LINE-$TO_LINE] from $logf"
else
debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf"
fi
else
dumplog $sourcef $FROM_LINE $TO_LINE
log "Including all logs after line $FROM_LINE from $logf"
fi
drop_tmp_file
trap "" 0
}
#
# find log/set of logs which are interesting for us
#
dumplogset() {
local logf=$1
local from_time=$2
local to_time=$3
local logf_set=`arch_logs $logf $from_time $to_time`
if [ x = "x$logf_set" ]; then
return
fi
local num_logs=`echo "$logf_set" | wc -l`
local oldest=`echo $logf_set | awk '{print $NF}'`
local newest=`echo $logf_set | awk '{print $1}'`
local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'`
# the first logfile: from $from_time to $to_time (or end)
# logfiles in the middle: all
# the last logfile: from beginning to $to_time (or end)
case $num_logs in
1) print_logseg $newest $from_time $to_time;;
*)
print_logseg $oldest $from_time 0
for f in $mid_logfiles; do
`find_decompressor $f` $f
debug "including complete $f logfile"
done
print_logseg $newest 0 $to_time
;;
esac
}
# cut out a stanza
getstanza() {
awk -v name="$1" '
!in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start
if ($1 == name)
in_stanza = 1
}
in_stanza { print }
in_stanza && NF==1 && $1 == "}" { exit }
'
}
# supply stanza in $1 and variable name in $2
# (stanza is optional)
getcfvar() {
cf_type=$1; shift;
cf_var=$1; shift;
cf_file=$*
[ -f "$cf_file" ] || return
case $cf_type in
cman)
grep $cf_var $cf_file | sed s/.*$cf_var=\"// | sed s/\".*//
;;
corosync|openais)
sed 's/#.*//' < $cf_file |
if [ $# -eq 2 ]; then
getstanza "$cf_var"
shift 1
else
cat
fi |
awk -v varname="$cf_var" '
NF==2 && match($1,varname":$")==1 { print $2; exit; }
'
;;
heartbeat)
sed 's/#.*//' < $cf_file |
grep -w "^$cf_var" |
sed 's/^[^[:space:]]*[[:space:]]*//'
;;
logd)
sed 's/#.*//' < $cf_file |
grep -w "^$cf_var" |
sed 's/^[^[:space:]]*[[:space:]]*//'
;;
esac
}
pickfirst() {
for x; do
which $x >/dev/null 2>&1 && {
echo $x
return 0
}
done
return 1
}
#
# figure out the cluster type, depending on the process list
# and existence of configuration files
#
get_cluster_type() {
if is_running corosync; then
tool=`pickfirst corosync-objctl corosync-cmapctl`
case $tool in
*objctl) quorum=`$tool -a | grep quorum.provider | sed 's/.*=\s*//'`;;
*cmapctl) quorum=`$tool | grep quorum.provider | sed 's/.*=\s*//'`;;
esac
if [ x"$quorum" = x"quorum_cman" ]; then
stack="cman"
else
stack="corosync"
fi
elif is_running aisexec; then
stack="openais"
elif
ps -ef | grep -v -e grep -e "eartbeat/[clasp]" | egrep -qs '[h]eartbeat'
then
stack="heartbeat"
# Now we're guessing...
elif [ -f /etc/cluster/cluster.conf ]; then
stack="cman"
# TODO: Technically these could be anywhere :-/
elif [ -f /etc/corosync/corosync.conf ]; then
stack="corosync"
elif [ -f /etc/ais/openais.conf ]; then
stack="openais"
elif [ -f /etc/ha.d/ha.cf ]; then
stack="heartbeat"
else
# We still don't know. This might be a Pacemaker Remote node,
# or the configuration might be in a nonstandard location.
stack="any"
fi
debug "Detected the '$stack' cluster stack"
echo $stack
}
find_cluster_cf() {
case $1 in
cman) echo "/etc/cluster/cluster.conf";;
corosync)
best_size=0
best_file=""
# TODO: Technically these could be anywhere :-/
for cf in /etc/ais/openais.conf /etc/corosync/corosync.conf; do
if [ -f $cf ]; then
size=`wc -l $cf | awk '{print $1}'`
if [ $size -gt $best_size ]; then
best_size=$size
best_file=$cf
fi
fi
done
if [ -z "$best_file" ]; then
debug "Looking for corosync configuration file. This may take a while..."
for f in `find / -maxdepth $maxdepth -type f -name corosync.conf`; do
best_file=$f
break
done
fi
debug "Located corosync config file: $best_file"
echo "$best_file"
;;
openais)
# TODO: Technically it could be anywhere :-/
cf="/etc/ais/openais.conf"
if [ -f $cf ]; then
echo "$cf"
fi
;;
heartbeat)
cf="/etc/ha.d/ha.cf"
if [ -f $cf ]; then
echo "$cf"
fi
;;
any)
# Cluster type is undetermined. Don't complain, because this
# might be a Pacemaker Remote node.
;;
*)
warning "Unknown cluster type: $1"
;;
esac
}
#
# check for the major prereq for a) parameter parsing and b)
# parsing logs
#
t=`get_time "12:00"`
if [ "$t" = "" ]; then
fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)"
fi
# vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80:
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jun 25, 3:29 AM (1 d, 1 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1952042
Default Alt Text
report.common.in (20 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment