Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4512753
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
19 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/tools/report.collector b/tools/report.collector
index 5910c51d48..df7e58da03 100644
--- a/tools/report.collector
+++ b/tools/report.collector
@@ -1,778 +1,784 @@
# Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
# Almost everything as part of hb_report
# Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
# Cleanups, refactoring, extensions
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
if
echo $REPORT_HOME | grep -qs '^/'
then
debug "Using full path to working directory: $REPORT_HOME"
else
REPORT_HOME="$HOME/$REPORT_HOME"
debug "Canonicalizing working directory path: $REPORT_HOME"
fi
detect_host
findlogdcf() {
for f in \
`test -x $CRM_DAEMON_DIR/ha_logd &&
which strings > /dev/null 2>&1 &&
strings $CRM_DAEMON_DIR/ha_logd | grep 'logd\.cf'` \
`for d; do echo $d/logd.cf $d/ha_logd.cf; done`
do
if [ -f "$f" ]; then
echo $f
debug "Located logd.cf at: $f"
return 0
fi
done
debug "Could not determine logd.cf location"
return 1
}
#
# find files newer than a and older than b
#
isnumber() {
echo "$*" | grep -qs '^[0-9][0-9]*$'
}
touchfile() {
t=`mktemp` &&
perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' &&
echo $t
}
find_files_clean() {
[ -z "$from_stamp" ] || rm -f "$from_stamp"
[ -z "$to_stamp" ] || rm -f "$to_stamp"
from_stamp=""
to_stamp=""
}
find_files() {
dirs=
from_time=$2
to_time=$3
for d in $1; do
if [ -d $d ]; then
dirs="$dirs $d"
fi
done
if [ x"$dirs" = x ]; then
return
fi
isnumber "$from_time" && [ "$from_time" -gt 0 ] || {
warning "sorry, can't find files in [ $1 ] based on time if you don't supply time"
return
}
trap find_files_clean 0
if ! from_stamp=`touchfile $from_time`; then
warning "sorry, can't create temporary file for find_files"
return
fi
findexp="-newer $from_stamp"
if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then
if ! to_stamp=`touchfile $to_time`; then
warning "sorry, can't create temporary file for find_files"
find_files_clean
return
fi
findexp="$findexp ! -newer $to_stamp"
fi
find $dirs -type f $findexp
find_files_clean
trap "" 0
}
#
# check permissions of files/dirs
#
pl_checkperms() {
perl -e '
# check permissions and ownership
# uid and gid are numeric
# everything must match exactly
# no error checking! (file should exist, etc)
($filename, $perms, $in_uid, $in_gid) = @ARGV;
($mode,$uid,$gid) = (stat($filename))[2,4,5];
$p=sprintf("%04o", $mode & 07777);
$p ne $perms and exit(1);
$uid ne $in_uid and exit(1);
$gid ne $in_gid and exit(1);
' $*
}
num_id() {
getent $1 $2 | awk -F: '{print $3}'
}
chk_id() {
[ "$2" ] && return 0
echo "$1: id not found"
return 1
}
check_perms() {
while read type f p uid gid; do
[ -$type $f ] || {
echo "$f wrong type or doesn't exist"
continue
}
n_uid=`num_id passwd $uid`
chk_id "$uid" "$n_uid" || continue
n_gid=`num_id group $gid`
chk_id "$gid" "$n_gid" || continue
pl_checkperms $f $p $n_uid $n_gid || {
echo "wrong permissions or ownership for $f:"
ls -ld $f
}
done
}
#
# coredumps
#
findbinary() {
random_binary=`which cat 2>/dev/null` # suppose we are lucky
binary=`gdb $random_binary $1 < /dev/null 2>/dev/null |
grep 'Core was generated' | awk '{print $5}' |
sed "s/^.//;s/[.':]*$//"`
if [ x = x"$binary" ]; then
debug "Could not detect the program name for core $1 from the gdb output; will try with file(1)"
binary=$(file $1 | awk '/from/{
for( i=1; i<=NF; i++ )
if( $i == "from" ) {
print $(i+1)
break
}
}')
binary=`echo $binary | tr -d "'"`
binary=$(echo $binary | tr -d '`')
if [ "$binary" ]; then
binary=`which $binary 2>/dev/null`
fi
fi
if [ x = x"$binary" ]; then
warning "Could not find the program path for core $1"
return
fi
fullpath=`which $binary 2>/dev/null`
if [ x = x"$fullpath" ]; then
if [ -x $CRM_DAEMON_DIR/$binary ]; then
echo $CRM_DAEMON_DIR/$binary
debug "Found the program at $CRM_DAEMON_DIR/$binary for core $1"
else
warning "Could not find the program path for core $1"
fi
else
echo $fullpath
debug "Found the program at $fullpath for core $1"
fi
}
getbt() {
which gdb > /dev/null 2>&1 || {
warning "Please install gdb to get backtraces"
return
}
for corefile; do
absbinpath=`findbinary $corefile`
[ x = x"$absbinpath" ] && continue
echo "====================== start backtrace ======================"
ls -l $corefile
# Summary first...
gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt"} -ex quit \
$absbinpath $corefile 2>/dev/null
echo "====================== start detail ======================"
# Now the unreadable details...
gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \
$absbinpath $corefile 2>/dev/null
echo "======================= end backtrace ======================="
done
}
getconfig() {
cluster=$1; shift;
target=$1; shift;
for cf in $*; do
if [ -e "$cf" ]; then
cp -a "$cf" $target/
fi
done
crm_uuid -r > $target/$HB_UUID_F 2>&1
if
ps -ef | egrep -qs [c]rmd
then
crm_mon -1 2>&1 | grep -v '^Last upd' > $target/$CRM_MON_F
cibadmin -Ql 2>/dev/null > $target/${CIB_F}.live
case $cluster in
cman) crm_node -p --cman > $target/$MEMBERSHIP_F 2>&1;;
corosync|openais) crm_node -p --openais > $target/$MEMBERSHIP_F 2>&1;;
heartbeat) crm_node -p --heartbeat > $target/$MEMBERSHIP_F 2>&1;;
*) crm_node -p > $target/$MEMBERSHIP_F 2>&1;;
esac
echo "$host" > $target/RUNNING
else
echo "$host" > $target/STOPPED
fi
if [ -f "$target/$CIB_F" ]; then
crm_verify -V -x $target/$CIB_F >$target/$CRM_VERIFY_F 2>&1
CIB_file=$target/$CIB_F crm configure show >$target/$CIB_TXT_F 2>&1
fi
}
#
# remove values of sensitive attributes
#
# this is not proper xml parsing, but it will work under the
# circumstances
sanitize_xml_attrs() {
sed $(
for patt in $SANITIZE; do
echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/"
done
)
}
sanitize_hacf() {
awk '
$1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; }
{print}
'
}
sanitize_one_clean() {
[ -z "$tmp" ] || rm -f "$tmp"
tmp=""
[ -z "$ref" ] || rm -f "$ref"
ref=""
}
sanitize() {
file=$1
compress=""
if [ -z "$SANITIZE" ]; then
return
fi
echo $file | grep -qs 'gz$' && compress=gzip
echo $file | grep -qs 'bz2$' && compress=bzip2
if [ "$compress" ]; then
decompress="$compress -dc"
else
compress=cat
decompress=cat
fi
trap sanitize_one_clean 0
tmp=`mktemp`
ref=`mktemp`
if [ -z "$tmp" -o -z "$ref" ]; then
sanitize_one_clean
fatal "cannot create temporary files"
fi
touch -r $file $ref # save the mtime
if [ "`basename $file`" = ha.cf ]; then
sanitize_hacf
else
$decompress | sanitize_xml_attrs | $compress
fi < $file > $tmp
mv $tmp $file
# note: cleaning $tmp up is still needed even after it's renamed
# because its temp directory is still there.
touch -r $ref $file
sanitize_one_clean
trap "" 0
}
#
# get some system info
#
distro() {
if
which lsb_release >/dev/null 2>&1
then
lsb_release -d
debug "Using lsb_release for distribution info"
return
fi
relf=`ls /etc/debian_version 2>/dev/null` ||
relf=`ls /etc/slackware-version 2>/dev/null` ||
relf=`ls -d /etc/*-release 2>/dev/null` && {
for f in $relf; do
test -f $f && {
echo "`ls $f` `cat $f`"
debug "Found `echo $relf | tr '\n' ' '` distribution release file(s)"
return
}
done
}
warning "No lsb_release, no /etc/*-release, no /etc/debian_version: no distro information"
}
pkg_ver() {
if which dpkg >/dev/null 2>&1 ; then
pkg_mgr="deb"
elif which rpm >/dev/null 2>&1 ; then
pkg_mgr="rpm"
elif which pkg_info >/dev/null 2>&1 ; then
pkg_mgr="pkg_info"
elif which pkginfo >/dev/null 2>&1 ; then
pkg_mgr="pkginfo"
else
warning "Unknown package manager"
return
fi
debug "The package manager is: $pkg_mgr"
echo "The package manager is: $pkg_mgr"
# for Linux .deb based systems
case $pkg_mgr in
deb)
dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W | sort
for pkg in $*; do
if dpkg-query -W $pkg 2>/dev/null ; then
debug "Verifying installation of: $pkg"
echo "Verifying installation of: $pkg"
debsums -s $pkg 2>/dev/null
fi
done
;;
rpm)
rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' | sort
for pkg in $*; do
if rpm -q $pkg >/dev/null 2>&1 ; then
debug "Verifying installation of: $pkg"
echo "Verifying installation of: $pkg"
rpm --verify $pkg 2>&1
fi
done
;;
pkg_info)
pkg_info
;;
pkginfo)
pkginfo | awk '{print $3}' # format?
;;
esac
}
getbacktraces() {
debug "Looking for backtraces: $*"
flist=$(
for f in `find_files "$CRM_CORE_DIRS" $1 $2`; do
bf=`basename $f`
test `expr match $bf core` -gt 0 &&
echo $f
done)
if [ "$flist" ]; then
for core in $flist; do
log "Found core file: `ls -al $core`"
done
# Make a copy of them in case we need more data later
# Luckily they compress well
mkdir cores &> /dev/null
cp -a $flist cores/
shrink cores
rm -rf cores
# Now get as much as we can from them automagically
for f in $flist; do
getbt $f
done
fi
}
getpeinputs() {
flist=$(
find_files $PE_STATE_DIR $1 $2 | sed "s,`dirname $PE_STATE_DIR`/,,g"
)
if [ "$flist" ]; then
(cd `dirname $PE_STATE_DIR` && tar cf - $flist) | (cd $3 && tar xf -)
debug "found `echo $flist | wc -w` pengine input files in $PE_STATE_DIR"
fi
}
getblackboxes() {
flist=$(
find_files $BLACKBOX_DIR $1 $2
)
for bb in $flist; do
bb_short=`basename $bb`
qb-blackbox $bb &> $3/${bb_short}.blackbox
info "Extracting contents of blackbox: $bb_short"
done
}
#
# some basic system info and stats
#
sys_info() {
cluster=$1; shift
echo "Platform: `uname`"
echo "Kernel release: `uname -r`"
echo "Architecture: `uname -m`"
if [ `uname` = Linux ]; then
echo "Distribution: `distro`"
fi
cibadmin --version 2>&1
cibadmin -! 2>&1
case $1 in
openais)
: echo "openais version: how?"
;;
cman)
cman_tool -V
/usr/sbin/corosync -v 2>&1
;;
corosync)
/usr/sbin/corosync -v 2>&1
;;
heartbeat)
heartbeat version: `$CRM_DAEMON_DIR/heartbeat -V` 2>&1
;;
esac
# Cluster glue version hash (if available)
stonith -V 2>/dev/null
# Resource agents version hash
echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`"
pkg_ver $*
}
sys_stats() {
set -x
uname -n
uptime
ps axf
ps auxw
top -b -n 1
ifconfig -a
ip addr list
netstat -i
arp -an
test -d /proc && {
cat /proc/cpuinfo
}
lsscsi
lspci
mount
df
set +x
}
dlm_dump() {
if which dlm_tool >/dev/null 2>&1 ; then
if
ps -ef | egrep -qs '[d]lm_controld'
then
echo "--- Lockspace overview:"
dlm_tool ls -n
echo "---Lockspace history:"
dlm_tool dump
echo "---Lockspace status:"
dlm_tool status
dlm_tool status -v
echo "---Lockspace config:"
dlm_tool dump_config
dlm_tool log_plock
dlm_tool ls | grep name |
while read X N ; do
echo "--- Lockspace $N:"
dlm_tool lockdump "$N"
dlm_tool lockdebug -svw "$N"
done
fi
fi
}
iscfvarset() {
test "`getcfvar $1 $2`"
}
iscfvartrue() {
getcfvar $1 $2 $3 | egrep -qsi "^(true|y|yes|on|1)"
}
uselogd() {
cf_file=$2
case $1 in
heartbeat)
iscfvartrue $1 use_logd $cf_file && return 0 # if use_logd true
iscfvarset $1 logfacility $cf_file ||
iscfvarset $1 logfile $cf_file ||
iscfvarset $1 debugfile $cf_file ||
return 0 # or none of the log options set
false
;;
*)
iscfvartrue $1 use_logd $cf_file
;;
esac
}
get_logfiles() {
cf_type=$1
cf_file="$2"
cf_logd="$3"
facility_var="logfacility"
if [ -f "$cf_logd" ]; then
if uselogd; then
cf_file="$cf_logd"
cf_type="logd"
fi
fi
debug "Reading $cf_type log settings"
case $cf_type in
cman|openais|corosync)
debug "Reading log settings from $cf_file"
if iscfvartrue $cf_type to_syslog $cf_file; then
facility_var=syslog_facility
fi
if iscfvartrue $cf_type to_logfile $cf_file; then
logfile=`getcfvar $cf_type logfile $cf_file`
fi
;;
heartbeat|logd)
debug "Reading log settings from $cf_file"
if
iscfvartrue $cf_type debug $cf_file
then
logfile=`getcfvar $cf_type debugfile $cf_file`
else
logfile=`getcfvar $cf_type logfile $cf_file`
fi
;;
*) debug "Unknown cluster type: $cf_type"
echo "/var/log/messages"
;;
esac
if [ "x$logfile" != "x" -a -f "$logfile" ]; then
echo $logfile
fi
if [ "x$facility" = x ]; then
facility=`getcfvar $cf_type $facility_var $cf_file`
[ "" = "$facility" ] && facility="daemon"
fi
if [ "x$facility" = x ]; then
facility="daemon"
fi
# Always include system logs (if we can find them)
msg="Mark:pcmk:`perl -e 'print time()'`"
logger -p $facility.info $msg >/dev/null 2>&1
sleep 2 # Give syslog time to catch up in case its busy
findmsg 1 "$msg"
# Initial pacemakerd logs and tracing might also go to a file (other than the syslog log file)
findmsg 3 "Starting Pacemaker"
# Make sure we get something from the Policy Engine
findmsg 3 "Calculated Transition"
}
essential_files() {
cat<<EOF
d $HA_STATE_DIR 0755 root root
d $PE_STATE_DIR 0750 hacluster haclient
d $CRM_CONFIG_DIR 0750 hacluster haclient
d $CRM_STATE_DIR 0750 hacluster haclient
EOF
case $1 in
openais|corosync|cman)
;;
heartbeat)
cat<<EOF
d $HA_STATE_DIR/ccm 0750 hacluster haclient
EOF
;;
esac
}
debug "Initializing $REPORT_TARGET subdir"
if [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then
if [ -e $REPORT_HOME/$REPORT_TARGET ]; then
warning "Directory $REPORT_HOME/$REPORT_TARGET already exists, using /tmp/$$/$REPORT_TARGET instead"
REPORT_HOME=/tmp/$$
fi
fi
mkdir -p $REPORT_HOME/$REPORT_TARGET
cd $REPORT_HOME/$REPORT_TARGET
case $CLUSTER in
any) cluster=`get_cluster_type`;;
*) cluster=$CLUSTER;;
esac
logd_cf=`findlogdcf`
cluster_cf=`find_cluster_cf $cluster`
if [ -z $cluster_cf ]; then
warning "Could not determine the location of your cluster configuration"
fi
if [ $SEARCH_LOGS = 1 ]; then
logfiles=`get_logfiles $cluster "$cluster_cf" "$logd_cf" | sort -u`
-fi
+ if [ -z "$logfiles" ]; then
+ fatal "Logfile discovery disabled, try specifying --logfile /some/path"
+ fi
-if [ -z "$logfiles" ]; then
+elif [ -z "$extra_logs" ]; then
fatal "Could not determine the location of your cluster logs, try specifying --logfile /some/path"
+
+else
+ logfiles="$extra_logs"
fi
+
debug "Config: $cluster $cluster_cf $logd_cf $logfiles"
sys_info $cluster $PACKAGES > $SYSINFO_F
essential_files $cluster | check_perms > $PERMISSIONS_F 2>&1
getconfig $cluster "$REPORT_HOME/$REPORT_TARGET" "$cluster_cf" "$logd_cf" "$CRM_CONFIG_DIR/$CIB_F" "$HA_STATE_DIR/hostcache" "/etc/drbd.conf" "/etc/drbd.d" "/etc/booth"
getpeinputs $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET
getbacktraces $LOG_START $LOG_END > $REPORT_HOME/$REPORT_TARGET/$BT_F
getblackboxes $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET
case $cluster in
cman|corosync)
if
ps -ef | egrep -qs '[c]orosync'
then
corosync-blackbox &> corosync-blackbox-live.txt
fi
# corosync-fplay > corosync-blackbox.txt
tool=`pickfirst corosync-objctl corosync-cmapctl`
case $tool in
*objctl) $tool -a > corosync.dump 2>/dev/null;;
*cmapctl) $tool > corosync.dump 2>/dev/null;;
esac
corosync-quorumtool -s -i > corosync.quorum 2>&1
;;
esac
dc=`crm_mon -1 2>/dev/null | awk '/Current DC/ {print $3}'`
if [ "$REPORT_TARGET" = "$dc" ]; then
echo "$REPORT_TARGET" > DC
fi
dlm_dump > $DLM_DUMP_F 2>&1
sys_stats > $SYSSTATS_F 2>&1
debug "Sanitizing files: $SANITIZE"
#
# replace sensitive info with '****'
#
cf=""
if [ ! -z "$cluster_cf" ]; then
cf=`basename $cluster_cf`
fi
for f in $cf $CIB_F $CIB_TXT_F $CIB_F.live pengine/*; do
if [ -f "$f" ]; then
sanitize $f
fi
done
# Grab logs
start=`date -d @${LOG_START} +"%F %T"`
end=`date -d @${LOG_END} +"%F %T"`
debug "Gathering logs from $start to $end: $logfiles $EXTRA_LOGS"
trap '[ -z "$pattfile" ] || rm -f "$pattfile"' 0
pattfile=`mktemp` || fatal "cannot create temporary files"
for p in $LOG_PATTERNS; do
echo "$p"
done > $pattfile
for l in $logfiles $EXTRA_LOGS; do
b=`basename $l`
if [ ! -f "$l" ]; then
# Not a file
continue
elif [ -f "$b" ]; then
# We already have it
continue
fi
dumplogset "$l" $LOG_START $LOG_END > "$b"
echo "Log patterns $REPORT_TARGET:" > $ANALYSIS_F
cat $b | grep -f $pattfile >> $ANALYSIS_F
done
which journalctl > /dev/null 2>&1
if [ $? = 0 ]; then
log "Including segment [$LOG_START-$LOG_END] from journald"
journalctl --since "$start" --until "$end" > journal.log
cat journal.log | grep -f $pattfile >> $ANALYSIS_F
fi
rm -f $pattfile
trap "" 0
# Purge files containing no information
for f in `ls -1`; do
if [ -d "$f" ]; then
continue
elif [ ! -s "$f" ]; then
case $f in
*core*) log "Detected empty core file: $f";;
*) debug "Removing empty file: `ls -al $f`"
rm -f $f
;;
esac
fi
done
# Parse for events
for l in $logfiles $EXTRA_LOGS; do
node_events `basename $l` > $EVENTS_F
# Link the first logfile to a standard name if it doesn't yet exist
f=`basename $l`
if [ -e $f -a ! -e $HALOG_F ]; then
ln -s $f $HALOG_F
fi
done
if [ -e $REPORT_HOME/.env ]; then
debug "Localhost: $REPORT_MASTER $REPORT_TARGET"
elif [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then
debug "Streaming report back to $REPORT_MASTER"
(cd $REPORT_HOME && tar cf - $REPORT_TARGET)
if [ "$REMOVE" = "1" ]; then
cd
rm -rf $REPORT_HOME
fi
fi
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Wed, Jun 25, 6:47 AM (11 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1952475
Default Alt Text
(19 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment