diff --git a/cts/benchmark/clubench.in b/cts/benchmark/clubench.in index 6adbe46cb0..e65b60d62c 100644 --- a/cts/benchmark/clubench.in +++ b/cts/benchmark/clubench.in @@ -1,190 +1,200 @@ #!/bin/sh # +# Copyright 2010-2021 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. SSHOPTS="-l root -o PasswordAuthentication=no -o ConnectTimeout=5" msg() { echo "$@" >&2 } usage() { echo "usage: $0 " echo " dir: working directory (with the control file)" exit 0 } [ $# -eq 0 ] && usage WORKDIR=$1 test -d "$WORKDIR" || usage CTSCTRL=~/.cts CTRL=$WORKDIR/control CSV=$WORKDIR/bench.csv STATS=$WORKDIR/bench.stats test -f $CTRL && . $CTRL @datadir@/@PACKAGE@/tests/cts/cluster_test 500 || { msg "cluster_test failed" exit 1 } test -f $CTSCTRL || { msg no CTS control file $CTSCTRL exit 1 } . $CTSCTRL : ${CTS_logfacility:=local7} : ${CTS_stack:=corosync} : ${CTS_logfile:="@CRM_LOG_DIR@/ha-log-bench"} : ${CTS_adv:="--schema pacemaker-1.2 --clobber-cib -r"} : ${RUNS:=3} : ${CTSTESTS:="--benchmark"} : ${CTSDIR:="@datadir@/@PACKAGE@/tests/cts"} +: ${CTS_node_list:=""} +: ${CTS_boot:=""} +: ${CTS_stonith:=""} +: ${CTS_stonith_args:=""} -[ "$CTS_node_list" ] || { +[ -n "$CTS_node_list" ] || { msg no node list specified exit 1 } case "$CTS_stack" in corosync) CRM_REPORT_OPTS="--corosync";; *) msg "$CTS_stack: cluster stack not recognized"; exit 1;; esac CTSOPTS="--stack $CTS_stack --at-boot $CTS_boot $CTS_adv" CTSOPTS="$CTSOPTS --facility $CTS_logfacility --logfile $CTS_logfile" if [ "x$CTS_stonith" != "x" ]; then CTSOPTS="$CTSOPTS --stonith-type $CTS_stonith" [ "x$CTS_stonith_args" != "x" ] && CTSOPTS="$CTSOPTS --stonith-params \"$CTS_stonith_args\"" else CTSOPTS="$CTSOPTS --stonith 0" fi CTSOPTS="$CTSOPTS $CTSTESTS" fibonacci() { F_LIMIT=$1 F_N=2 F_N_PREV=1 while [ $F_N -le $F_LIMIT ]; do echo $F_N F_N_TMP=$F_N F_N=$((F_N+F_N_PREV)) F_N_PREV=$F_N_TMP done [ $F_N_PREV -ne $F_LIMIT ] && echo $F_LIMIT } [ "$SERIES" ] || SERIES=$(fibonacci "$(echo $CTS_node_list | wc -w)") get_nodes() { GN_C_NODES=$(echo $CTS_node_list | awk -v n="$1" ' { for( i=1; i<=NF; i++ ) node[cnt++]=$i } END{for( i=0; i "$RC_ODIR/ctsrun.out" 2>&1 & ctspid=$! tail -f "$RC_ODIR/ctsrun.out" & tailpid=$! wait $ctspid kill $tailpid >/dev/null 2>&1 } bench_re='CTS:.*runtime:' diginfo() { DI_CTS_DIR="$1" DI_S="$2" filter="$3" ( cd "$DI_CTS_DIR" || return for r in [0-9]*.tar.bz2; do tar xjf $r DI_D=$(basename "$r" .tar.bz2) for DI_V in $(grep "$bench_re" "$DI_D/ha-log.txt" | eval "$filter"); do DI_S="$DI_S,$DI_V" done rm -r "$DI_D" done echo $DI_S ) } printheader() { diginfo $1 "" "awk '{print \$(NF-2)}'" } printstats() { diginfo $1 "$clusize" "awk '{print \$(NF)}'" } printmedians() { PM_F="$1" PM_S="$clusize" PM_MIDDLE=$((RUNS/2 + 1)) set $(head -1 "$PM_F" | sed 's/,/ /g') PM_COLS=$# for PM_I in $(seq 2 $PM_COLS); do PM_V=$(awk -v i=$PM_I -F, '{print $i}' < $PM_F | sort -n | head -$PM_MIDDLE | tail -1) PM_S="$PM_S,$PM_V" done echo $PM_S } rm -f $CSV tmpf=`mktemp` test -f "$tmpf" || { msg "can't create temporary file" exit 1 } trap "rm -f $tmpf" 0 for clusize in $SERIES; do nodes=`get_nodes $clusize` outdir=$WORKDIR/$clusize rm -rf $outdir mkdir -p $outdir rm -f $tmpf node_cleanup for i in `seq $RUNS`; do true > $CTS_logfile mkdir -p $outdir/$i runcts $outdir/$i mkreports $outdir/$i printstats $outdir/$i >> $tmpf done [ -f "$CSV" ] || printheader $outdir/1 > $CSV printmedians $tmpf >> $CSV cat $tmpf >> $STATS msg "Statistics for $clusize-node cluster saved" done msg "Tests done for series $SERIES, output in $CSV and $STATS" diff --git a/cts/lxc_autogen.sh.in b/cts/lxc_autogen.sh.in index 87d01124be..b758cde1b0 100644 --- a/cts/lxc_autogen.sh.in +++ b/cts/lxc_autogen.sh.in @@ -1,541 +1,545 @@ #!@BASH_PATH@ # # Copyright 2013-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # containers="2" download=0 share_configs=0 # different than default libvirt network in case this is run nested in a KVM instance addr="192.168.123.1" restore=0 restore_pcmk=0 restore_all=0 generate=0 key_gen=0 cib=0 anywhere=0 add_clone=0 verify=0 working_dir="@CRM_CONFIG_CTS@/lxc" run_dirs="/run /var/run /usr/var/run" # must be on one line b/c used inside quotes SSH_RSYNC_OPTS="-o UserKnownHostsFile=/dev/null -o BatchMode=yes -o StrictHostKeyChecking=no" function helptext() { echo "lxc_autogen.sh - generate libvirt LXC containers for testing purposes" echo "" echo "Usage: lxc-autogen [options]" echo "" echo "Options:" echo "-g, --generate Generate libvirt LXC environment in directory this script is run from" echo "-k, --key-gen Generate Pacemaker Remote key only" echo "-r, --restore-libvirt Restore the default network and libvirt config to before this script ran" echo "-p, --restore-cib Remove CIB entries this script generated" echo "-R, --restore-all Restore both libvirt and CIB, and clean working directory" echo " (libvirt xml files are not removed, so resource can be stopped properly)" echo "" echo "-A, --allow-anywhere Allow the containers to live anywhere in the cluster" echo "-a, --add-cib Add CIB entries to create a guest node for each LXC instance" echo "-C, --add-clone Add promotable clone resource shared between LXC guest nodes" echo "-d, --download-agent Download and install latest VirtualDomain agent" echo "-s, --share-configs Synchronize on all known cluster nodes" echo "-c, --containers Specify number of containers to generate (default $containers; used with -g)" echo "-n, --network Network to override libvirt default (example: -n 192.168.123.1; used with -g)" echo "-v, --verify Verify environment is capable of running LXC" echo "" exit "$1" } while true ; do case "$1" in --help|-h|-\?) helptext 0;; -c|--containers) containers="$2"; shift; shift;; -d|--download-agent) download=1; shift;; -s|--share-configs) share_configs=1; shift;; -n|--network) addr="$2"; shift; shift;; -r|--restore-libvirt) restore=1; shift;; -p|--restore-cib) restore_pcmk=1; shift;; -R|--restore-all) restore_all=1 restore=1 restore_pcmk=1 shift;; -g|--generate) generate=1; key_gen=1; shift;; -k|--key-gen) key_gen=1; shift;; -a|--add-cib) cib=1; shift;; -A|--allow-anywhere) anywhere=1; shift;; -C|--add-clone) add_clone=1; shift;; -m|--add-master) echo "-m/--add-master is deprecated (use -C/--add-clone instead)" echo add_clone=1 shift ;; -v|--verify) verify=1; shift;; "") break;; *) helptext 1;; esac done if [ $verify -eq 1 ]; then # verify virsh tool is available and that # we can connect to lxc driver. virsh -c lxc:/// list --all > /dev/null 2>&1 if [ $? -ne 0 ]; then echo "libvirt LXC driver must be installed (could not connect 'virsh -c lxc:///')" # yum install -y libvirt-daemon-driver-lxc libvirt-daemon-lxc libvirt-login-shell exit 1 fi SELINUX=$(getenforce) if [ "$SELINUX" != "Enforcing" ] && [ "$SELINUX" != "Permissive" ]; then echo "SELINUX must be set to permissive or enforcing mode" exit 1 fi ps ax | grep "[l]ibvirtd" if [ $? -ne 0 ]; then echo "libvirtd must be running" exit 1 fi which rsync > /dev/null 2>&1 if [ $? -ne 0 ]; then echo "rsync must be installed" fi which pacemaker-remoted > /dev/null 2>&1 if [ $? -ne 0 ]; then echo "pacemaker-remoted must be installed" fi fi #strip last digits off addr addr="$(echo "$addr" | awk -F. '{print $1"."$2"."$3}')" node_exec() { ssh -o StrictHostKeyChecking=no \ -o ConnectTimeout=30 \ -o BatchMode=yes \ -l root -T "$@" } this_node() { crm_node -n } other_nodes() { crm_node -l | awk "\$2 != \"$(this_node)\" {print \$2}" } make_directory() { # argument must be full path DIR="$1" mkdir -p "$DIR" if [ $share_configs -eq 1 ]; then for node in $(other_nodes); do node_exec "$node" mkdir -p "$DIR" done fi } sync_file() { TARGET="$1" if [ $share_configs -eq 1 ]; then for node in $(other_nodes); do rsync -ave "ssh $SSH_RSYNC_OPTS" "$TARGET" "${node}:${TARGET}" done fi } download_agent() { wget https://raw.github.com/ClusterLabs/resource-agents/master/heartbeat/VirtualDomain chmod 755 VirtualDomain mv -f VirtualDomain /usr/lib/ocf/resource.d/heartbeat/VirtualDomain sync_file /usr/lib/ocf/resource.d/heartbeat/VirtualDomain } set_network() { rm -f cur_network.xml cat << END >> cur_network.xml default 41ebdb84-7134-1111-a136-91f0f1119225 END sync_file "${working_dir}"/cur_network.xml } distribute_configs() { for node in $(other_nodes); do rsync -ave "ssh $SSH_RSYNC_OPTS" "${working_dir}"/lxc*.xml "${node}:${working_dir}" rsync -ave "ssh $SSH_RSYNC_OPTS" "${working_dir}"/lxc*-filesystem "${node}:${working_dir}" done } start_network() { NODE="$1" node_exec "$NODE" <<-EOF cd "$working_dir" virsh net-info default >/dev/null 2>&1 if [ \$? -eq 0 ]; then if [ ! -f restore_default.xml ]; then virsh net-dumpxml default > restore_default.xml fi virsh net-destroy default virsh net-undefine default fi virsh net-define cur_network.xml virsh net-start default virsh net-autostart default EOF } start_network_all() { start_network "$(this_node)" if [ $share_configs -eq 1 ]; then for node in $(other_nodes); do start_network "$node" done fi } add_hosts_entry() { IP="$1" HNAME="$2" echo "$IP $HNAME" >>/etc/hosts if [ $share_configs -eq 1 ]; then for node in $(other_nodes); do node_exec "$node" "echo $IP $HNAME >>/etc/hosts" done fi } generate_key() { if [ ! -e /etc/pacemaker/authkey ]; then make_directory /etc/pacemaker dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 sync_file /etc/pacemaker/authkey fi } generate() { set_network # Generate libvirt domains in xml for (( c=1; c <= containers; c++ )) do # Clean any previous definition rm -rf "lxc$c.xml" "lxc$c-filesystem" # Create a basic filesystem with run directories for dir in $run_dirs; do mkdir -p "lxc$c-filesystem/$dir" done # Create libvirt definition suffix=$((10 + c)) prefix="$(echo "$addr" | awk -F. '{print $1"."$2}')" subnet="$(echo "$addr" | awk -F. '{print $3}')" while [ $suffix -gt 255 ]; do subnet=$((subnet + 1)) suffix=$((subnet - 255)) done cip="$prefix.$subnet.$suffix" cat << END >> lxc$c.xml lxc$c 200704 exe $working_dir/lxc$c-filesystem/launch-helper END for dir in $run_dirs; do cat << END >> lxc$c.xml END done cat << END >> lxc$c.xml END # Create CIB definition rm -f "container$c.cib" cat << END >> "container$c.cib" END # Create container init rm -f "lxc$c-filesystem/launch-helper" cat << END >> "lxc$c-filesystem/launch-helper" #!@BASH_PATH@ ip -f inet addr add "$cip/24" dev eth0 ip link set eth0 up ip route add default via "$addr.1" hostname "lxc$c" df > "$working_dir/lxc$c-filesystem/disk_usage.txt" export PCMK_debugfile="@CRM_LOG_DIR@/pacemaker_remote_lxc$c.log" /usr/sbin/pacemaker-remoted END chmod 711 "lxc$c-filesystem/launch-helper" add_hosts_entry "$cip" "lxc$c" done # Create CIB fragment for a promotable clone resource cat << END > lxc-clone.cib END } +container_names() { + find . -maxdepth 1 -name "lxc*.xml" -exec basename -s .xml "{}" ";" +} + apply_cib_clone() { cibadmin -Q > cur.cib export CIB_file=cur.cib cibadmin -o resources -Mc -x lxc-clone.cib - for tmp in $(find . -maxdepth 1 -name "lxc*.xml" | sed -e 's/\.xml//g'); do + for tmp in $(container_names); do echo "" > tmp_constraint cibadmin -o constraints -Mc -x tmp_constraint done # Make sure the version changes even if the content doesn't cibadmin -B unset CIB_file cibadmin --replace -o configuration --xml-file cur.cib rm -f cur.cib } apply_cib_entries() { cibadmin -Q > cur.cib export CIB_file=cur.cib for tmp in container*.cib; do cibadmin -o resources -Mc -x "$tmp" remote_node="$(grep remote-node "${tmp}" | sed -n -e 's/^.*value=\"\(.*\)\".*/\1/p')" if [ $anywhere -eq 0 ]; then crm_resource -M -r "${tmp//\.cib/}" -H "$(this_node)" fi echo "" > tmp_constraint # Ignore any failure; this constraint is just to help with CTS when the # connectivity resources (which fail the guest nodes) are in use. cibadmin -o constraints -Mc -x tmp_constraint > /dev/null 2>&1 for rsc in $(crm_resource -l | grep rsc_ ); do echo "" > tmp_constraint cibadmin -o constraints -Mc -x tmp_constraint > /dev/null 2>&1 done rm -f tmp_constraint done # Make sure the version changes even if the content doesn't cibadmin -B unset CIB_file cibadmin --replace -o configuration --xml-file cur.cib rm -f cur.cib } restore_cib() { cibadmin -Q > cur.cib export CIB_file=cur.cib - for tmp in $(find . -maxdepth 1 -name "lxc*.xml" | sed -e 's/\.xml//g'); do + for tmp in $(container_names); do echo "" > tmp_constraint cibadmin -o constraints -D -x tmp_constraint echo "" > tmp_constraint cibadmin -o constraints -D -x tmp_constraint for rsc in $(crm_resource -l | grep rsc_ ); do echo "" > tmp_constraint cibadmin -o constraints -D -x tmp_constraint done rm -f tmp_constraint done cibadmin -o resources -D -x lxc-clone.cib for tmp in container*.cib; do tmp="${tmp//\.cib/}" crm_resource -U -r "$tmp" -H "$(this_node)" crm_resource -D -r "$tmp" -t primitive done # Make sure the version changes even if the content doesn't cibadmin -B unset CIB_file cibadmin --replace -o configuration --xml-file cur.cib rm -f cur.cib # Allow the cluster to stabilize before continuing crm_resource --wait # Purge nodes from caches and CIB status section - for tmp in $(find . -maxdepth 1 -name "lxc*.xml" | sed -e 's/\.xml//g'); do + for tmp in $(container_names); do crm_node --force --remove "$tmp" done } restore_network() { NODE="$1" node_exec "$NODE" <<-EOF cd "$working_dir" for tmp in \$(ls lxc*.xml | sed -e 's/\.xml//g'); do virsh -c lxc:/// destroy "\$tmp" >/dev/null 2>&1 virsh -c lxc:/// undefine "\$tmp" >/dev/null 2>&1 sed -i.bak "/...\....\....\..* \${tmp}/d" /etc/hosts done virsh net-destroy default >/dev/null 2>&1 virsh net-undefine default >/dev/null 2>&1 if [ -f restore_default.xml ]; then virsh net-define restore_default.xml virsh net-start default rm restore_default.xml fi EOF echo "Containers destroyed and default network restored on $NODE" } restore_libvirt() { restore_network "$(this_node)" if [ $share_configs -eq 1 ]; then for node in $(other_nodes); do restore_network "$node" done fi } restore_files() { find . -maxdepth 1 -not -name "lxc*.xml" -a -not -name . -exec rm -rf "{}" ";" if [ $share_configs -eq 1 ]; then for node in $(other_nodes); do node_exec "$node" rm -rf \ "$working_dir"/lxc*-filesystem \ "$working_dir"/cur_network.xml done fi } make_directory "$working_dir" cd "$working_dir" || exit 1 # Generate files as requested if [ $download -eq 1 ]; then download_agent fi if [ $key_gen -eq 1 ]; then generate_key fi if [ $generate -eq 1 ]; then generate fi if [ $share_configs -eq 1 ]; then distribute_configs fi if [ $generate -eq 1 ]; then start_network_all fi # Update cluster as requested if [ $cib -eq 1 ]; then apply_cib_entries fi if [ $add_clone -eq 1 ]; then apply_cib_clone fi # Restore original state as requested if [ $restore_pcmk -eq 1 ]; then restore_cib fi if [ $restore -eq 1 ]; then restore_libvirt fi if [ $restore_all -eq 1 ]; then restore_files fi # vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/cts/scheduler/summary/bug-1572-1.summary b/cts/scheduler/summary/bug-1572-1.summary index 6abedea530..c572db21d5 100644 --- a/cts/scheduler/summary/bug-1572-1.summary +++ b/cts/scheduler/summary/bug-1572-1.summary @@ -1,85 +1,85 @@ Current cluster status: * Node List: * Online: [ arc-dknightlx arc-tkincaidlx.wsicorp.com ] * Full List of Resources: * Clone Set: ms_drbd_7788 [rsc_drbd_7788] (promotable): * Promoted: [ arc-tkincaidlx.wsicorp.com ] * Unpromoted: [ arc-dknightlx ] * Resource Group: grp_pgsql_mirror: * fs_mirror (ocf:heartbeat:Filesystem): Started arc-tkincaidlx.wsicorp.com * pgsql_5555 (ocf:heartbeat:pgsql): Started arc-tkincaidlx.wsicorp.com * IPaddr_147_81_84_133 (ocf:heartbeat:IPaddr): Started arc-tkincaidlx.wsicorp.com Transition Summary: - * Stop rsc_drbd_7788:0 ( Unpromoted arc-dknightlx ) due to node availability + * Stop rsc_drbd_7788:0 ( Unpromoted arc-dknightlx ) due to node availability * Restart rsc_drbd_7788:1 ( Promoted arc-tkincaidlx.wsicorp.com ) due to resource definition change * Restart fs_mirror ( arc-tkincaidlx.wsicorp.com ) due to required ms_drbd_7788 notified * Restart pgsql_5555 ( arc-tkincaidlx.wsicorp.com ) due to required fs_mirror start * Restart IPaddr_147_81_84_133 ( arc-tkincaidlx.wsicorp.com ) due to required pgsql_5555 start Executing Cluster Transition: * Pseudo action: ms_drbd_7788_pre_notify_demote_0 * Pseudo action: grp_pgsql_mirror_stop_0 * Resource action: IPaddr_147_81_84_133 stop on arc-tkincaidlx.wsicorp.com * Resource action: rsc_drbd_7788:0 notify on arc-dknightlx * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-pre_notify_demote_0 * Resource action: pgsql_5555 stop on arc-tkincaidlx.wsicorp.com * Resource action: fs_mirror stop on arc-tkincaidlx.wsicorp.com * Pseudo action: grp_pgsql_mirror_stopped_0 * Pseudo action: ms_drbd_7788_demote_0 * Resource action: rsc_drbd_7788:1 demote on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_demoted_0 * Pseudo action: ms_drbd_7788_post_notify_demoted_0 * Resource action: rsc_drbd_7788:0 notify on arc-dknightlx * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-post_notify_demoted_0 * Pseudo action: ms_drbd_7788_pre_notify_stop_0 * Resource action: rsc_drbd_7788:0 notify on arc-dknightlx * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-pre_notify_stop_0 * Pseudo action: ms_drbd_7788_stop_0 * Resource action: rsc_drbd_7788:0 stop on arc-dknightlx * Resource action: rsc_drbd_7788:1 stop on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_stopped_0 * Cluster action: do_shutdown on arc-dknightlx * Pseudo action: ms_drbd_7788_post_notify_stopped_0 * Pseudo action: ms_drbd_7788_confirmed-post_notify_stopped_0 * Pseudo action: ms_drbd_7788_pre_notify_start_0 * Pseudo action: ms_drbd_7788_confirmed-pre_notify_start_0 * Pseudo action: ms_drbd_7788_start_0 * Resource action: rsc_drbd_7788:1 start on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_running_0 * Pseudo action: ms_drbd_7788_post_notify_running_0 * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-post_notify_running_0 * Pseudo action: ms_drbd_7788_pre_notify_promote_0 * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-pre_notify_promote_0 * Pseudo action: ms_drbd_7788_promote_0 * Resource action: rsc_drbd_7788:1 promote on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_promoted_0 * Pseudo action: ms_drbd_7788_post_notify_promoted_0 * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-post_notify_promoted_0 * Pseudo action: grp_pgsql_mirror_start_0 * Resource action: fs_mirror start on arc-tkincaidlx.wsicorp.com * Resource action: pgsql_5555 start on arc-tkincaidlx.wsicorp.com * Resource action: pgsql_5555 monitor=30000 on arc-tkincaidlx.wsicorp.com * Resource action: IPaddr_147_81_84_133 start on arc-tkincaidlx.wsicorp.com * Resource action: IPaddr_147_81_84_133 monitor=25000 on arc-tkincaidlx.wsicorp.com * Pseudo action: grp_pgsql_mirror_running_0 Revised Cluster Status: * Node List: * Online: [ arc-dknightlx arc-tkincaidlx.wsicorp.com ] * Full List of Resources: * Clone Set: ms_drbd_7788 [rsc_drbd_7788] (promotable): * Promoted: [ arc-tkincaidlx.wsicorp.com ] * Stopped: [ arc-dknightlx ] * Resource Group: grp_pgsql_mirror: * fs_mirror (ocf:heartbeat:Filesystem): Started arc-tkincaidlx.wsicorp.com * pgsql_5555 (ocf:heartbeat:pgsql): Started arc-tkincaidlx.wsicorp.com * IPaddr_147_81_84_133 (ocf:heartbeat:IPaddr): Started arc-tkincaidlx.wsicorp.com diff --git a/cts/scheduler/summary/bug-1572-2.summary b/cts/scheduler/summary/bug-1572-2.summary index 7d4921dc36..012ca78dd6 100644 --- a/cts/scheduler/summary/bug-1572-2.summary +++ b/cts/scheduler/summary/bug-1572-2.summary @@ -1,61 +1,61 @@ Current cluster status: * Node List: * Online: [ arc-dknightlx arc-tkincaidlx.wsicorp.com ] * Full List of Resources: * Clone Set: ms_drbd_7788 [rsc_drbd_7788] (promotable): * Promoted: [ arc-tkincaidlx.wsicorp.com ] * Unpromoted: [ arc-dknightlx ] * Resource Group: grp_pgsql_mirror: * fs_mirror (ocf:heartbeat:Filesystem): Started arc-tkincaidlx.wsicorp.com * pgsql_5555 (ocf:heartbeat:pgsql): Started arc-tkincaidlx.wsicorp.com * IPaddr_147_81_84_133 (ocf:heartbeat:IPaddr): Started arc-tkincaidlx.wsicorp.com Transition Summary: - * Stop rsc_drbd_7788:0 ( Unpromoted arc-dknightlx ) due to node availability + * Stop rsc_drbd_7788:0 ( Unpromoted arc-dknightlx ) due to node availability * Demote rsc_drbd_7788:1 ( Promoted -> Unpromoted arc-tkincaidlx.wsicorp.com ) * Stop fs_mirror ( arc-tkincaidlx.wsicorp.com ) due to node availability * Stop pgsql_5555 ( arc-tkincaidlx.wsicorp.com ) due to node availability * Stop IPaddr_147_81_84_133 ( arc-tkincaidlx.wsicorp.com ) due to node availability Executing Cluster Transition: * Pseudo action: ms_drbd_7788_pre_notify_demote_0 * Pseudo action: grp_pgsql_mirror_stop_0 * Resource action: IPaddr_147_81_84_133 stop on arc-tkincaidlx.wsicorp.com * Resource action: rsc_drbd_7788:0 notify on arc-dknightlx * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-pre_notify_demote_0 * Resource action: pgsql_5555 stop on arc-tkincaidlx.wsicorp.com * Resource action: fs_mirror stop on arc-tkincaidlx.wsicorp.com * Pseudo action: grp_pgsql_mirror_stopped_0 * Pseudo action: ms_drbd_7788_demote_0 * Resource action: rsc_drbd_7788:1 demote on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_demoted_0 * Pseudo action: ms_drbd_7788_post_notify_demoted_0 * Resource action: rsc_drbd_7788:0 notify on arc-dknightlx * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-post_notify_demoted_0 * Pseudo action: ms_drbd_7788_pre_notify_stop_0 * Resource action: rsc_drbd_7788:0 notify on arc-dknightlx * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-pre_notify_stop_0 * Pseudo action: ms_drbd_7788_stop_0 * Resource action: rsc_drbd_7788:0 stop on arc-dknightlx * Pseudo action: ms_drbd_7788_stopped_0 * Cluster action: do_shutdown on arc-dknightlx * Pseudo action: ms_drbd_7788_post_notify_stopped_0 * Resource action: rsc_drbd_7788:1 notify on arc-tkincaidlx.wsicorp.com * Pseudo action: ms_drbd_7788_confirmed-post_notify_stopped_0 Revised Cluster Status: * Node List: * Online: [ arc-dknightlx arc-tkincaidlx.wsicorp.com ] * Full List of Resources: * Clone Set: ms_drbd_7788 [rsc_drbd_7788] (promotable): * Unpromoted: [ arc-tkincaidlx.wsicorp.com ] * Stopped: [ arc-dknightlx ] * Resource Group: grp_pgsql_mirror: * fs_mirror (ocf:heartbeat:Filesystem): Stopped * pgsql_5555 (ocf:heartbeat:pgsql): Stopped * IPaddr_147_81_84_133 (ocf:heartbeat:IPaddr): Stopped diff --git a/cts/scheduler/summary/bug-5059.summary b/cts/scheduler/summary/bug-5059.summary index a33a2f60a2..c555d1dfb5 100644 --- a/cts/scheduler/summary/bug-5059.summary +++ b/cts/scheduler/summary/bug-5059.summary @@ -1,77 +1,77 @@ Current cluster status: * Node List: * Node gluster03.h: standby * Online: [ gluster01.h gluster02.h ] * OFFLINE: [ gluster04.h ] * Full List of Resources: * Clone Set: ms_stateful [g_stateful] (promotable): * Resource Group: g_stateful:0: * p_stateful1 (ocf:pacemaker:Stateful): Unpromoted gluster01.h * p_stateful2 (ocf:pacemaker:Stateful): Stopped * Resource Group: g_stateful:1: * p_stateful1 (ocf:pacemaker:Stateful): Unpromoted gluster02.h * p_stateful2 (ocf:pacemaker:Stateful): Stopped * Stopped: [ gluster03.h gluster04.h ] * Clone Set: c_dummy [p_dummy1]: * Started: [ gluster01.h gluster02.h ] Transition Summary: - * Promote p_stateful1:0 ( Unpromoted -> Promoted gluster01.h ) - * Promote p_stateful2:0 ( Stopped -> Promoted gluster01.h ) + * Promote p_stateful1:0 ( Unpromoted -> Promoted gluster01.h ) + * Promote p_stateful2:0 ( Stopped -> Promoted gluster01.h ) * Start p_stateful2:1 ( gluster02.h ) Executing Cluster Transition: * Pseudo action: ms_stateful_pre_notify_start_0 * Resource action: iptest delete on gluster02.h * Resource action: ipsrc2 delete on gluster02.h * Resource action: p_stateful1:0 notify on gluster01.h * Resource action: p_stateful1:1 notify on gluster02.h * Pseudo action: ms_stateful_confirmed-pre_notify_start_0 * Pseudo action: ms_stateful_start_0 * Pseudo action: g_stateful:0_start_0 * Resource action: p_stateful2:0 start on gluster01.h * Pseudo action: g_stateful:1_start_0 * Resource action: p_stateful2:1 start on gluster02.h * Pseudo action: g_stateful:0_running_0 * Pseudo action: g_stateful:1_running_0 * Pseudo action: ms_stateful_running_0 * Pseudo action: ms_stateful_post_notify_running_0 * Resource action: p_stateful1:0 notify on gluster01.h * Resource action: p_stateful2:0 notify on gluster01.h * Resource action: p_stateful1:1 notify on gluster02.h * Resource action: p_stateful2:1 notify on gluster02.h * Pseudo action: ms_stateful_confirmed-post_notify_running_0 * Pseudo action: ms_stateful_pre_notify_promote_0 * Resource action: p_stateful1:0 notify on gluster01.h * Resource action: p_stateful2:0 notify on gluster01.h * Resource action: p_stateful1:1 notify on gluster02.h * Resource action: p_stateful2:1 notify on gluster02.h * Pseudo action: ms_stateful_confirmed-pre_notify_promote_0 * Pseudo action: ms_stateful_promote_0 * Pseudo action: g_stateful:0_promote_0 * Resource action: p_stateful1:0 promote on gluster01.h * Resource action: p_stateful2:0 promote on gluster01.h * Pseudo action: g_stateful:0_promoted_0 * Pseudo action: ms_stateful_promoted_0 * Pseudo action: ms_stateful_post_notify_promoted_0 * Resource action: p_stateful1:0 notify on gluster01.h * Resource action: p_stateful2:0 notify on gluster01.h * Resource action: p_stateful1:1 notify on gluster02.h * Resource action: p_stateful2:1 notify on gluster02.h * Pseudo action: ms_stateful_confirmed-post_notify_promoted_0 * Resource action: p_stateful1:1 monitor=10000 on gluster02.h * Resource action: p_stateful2:1 monitor=10000 on gluster02.h Revised Cluster Status: * Node List: * Node gluster03.h: standby * Online: [ gluster01.h gluster02.h ] * OFFLINE: [ gluster04.h ] * Full List of Resources: * Clone Set: ms_stateful [g_stateful] (promotable): * Promoted: [ gluster01.h ] * Unpromoted: [ gluster02.h ] * Clone Set: c_dummy [p_dummy1]: * Started: [ gluster01.h gluster02.h ] diff --git a/cts/scheduler/summary/bug-cl-5212.summary b/cts/scheduler/summary/bug-cl-5212.summary index 48cb54bedc..e7a6e26833 100644 --- a/cts/scheduler/summary/bug-cl-5212.summary +++ b/cts/scheduler/summary/bug-cl-5212.summary @@ -1,69 +1,69 @@ Current cluster status: * Node List: * Node srv01: UNCLEAN (offline) * Node srv02: UNCLEAN (offline) * Online: [ srv03 ] * Full List of Resources: * Resource Group: grpStonith1: * prmStonith1-1 (stonith:external/ssh): Started srv02 (UNCLEAN) * Resource Group: grpStonith2: * prmStonith2-1 (stonith:external/ssh): Started srv01 (UNCLEAN) * Resource Group: grpStonith3: * prmStonith3-1 (stonith:external/ssh): Started srv01 (UNCLEAN) * Clone Set: msPostgresql [pgsql] (promotable): * pgsql (ocf:pacemaker:Stateful): Unpromoted srv02 (UNCLEAN) * pgsql (ocf:pacemaker:Stateful): Promoted srv01 (UNCLEAN) * Unpromoted: [ srv03 ] * Clone Set: clnPingd [prmPingd]: * prmPingd (ocf:pacemaker:ping): Started srv02 (UNCLEAN) * prmPingd (ocf:pacemaker:ping): Started srv01 (UNCLEAN) * Started: [ srv03 ] Transition Summary: * Stop prmStonith1-1 ( srv02 ) blocked * Stop prmStonith2-1 ( srv01 ) blocked * Stop prmStonith3-1 ( srv01 ) due to node availability (blocked) - * Stop pgsql:0 ( Unpromoted srv02 ) due to node availability (blocked) - * Stop pgsql:1 ( Promoted srv01 ) due to node availability (blocked) + * Stop pgsql:0 ( Unpromoted srv02 ) due to node availability (blocked) + * Stop pgsql:1 ( Promoted srv01 ) due to node availability (blocked) * Stop prmPingd:0 ( srv02 ) due to node availability (blocked) * Stop prmPingd:1 ( srv01 ) due to node availability (blocked) Executing Cluster Transition: * Pseudo action: grpStonith1_stop_0 * Pseudo action: grpStonith1_start_0 * Pseudo action: grpStonith2_stop_0 * Pseudo action: grpStonith2_start_0 * Pseudo action: grpStonith3_stop_0 * Pseudo action: msPostgresql_pre_notify_stop_0 * Pseudo action: clnPingd_stop_0 * Resource action: pgsql notify on srv03 * Pseudo action: msPostgresql_confirmed-pre_notify_stop_0 * Pseudo action: msPostgresql_stop_0 * Pseudo action: clnPingd_stopped_0 * Pseudo action: msPostgresql_stopped_0 * Pseudo action: msPostgresql_post_notify_stopped_0 * Resource action: pgsql notify on srv03 * Pseudo action: msPostgresql_confirmed-post_notify_stopped_0 Revised Cluster Status: * Node List: * Node srv01: UNCLEAN (offline) * Node srv02: UNCLEAN (offline) * Online: [ srv03 ] * Full List of Resources: * Resource Group: grpStonith1: * prmStonith1-1 (stonith:external/ssh): Started srv02 (UNCLEAN) * Resource Group: grpStonith2: * prmStonith2-1 (stonith:external/ssh): Started srv01 (UNCLEAN) * Resource Group: grpStonith3: * prmStonith3-1 (stonith:external/ssh): Started srv01 (UNCLEAN) * Clone Set: msPostgresql [pgsql] (promotable): * pgsql (ocf:pacemaker:Stateful): Unpromoted srv02 (UNCLEAN) * pgsql (ocf:pacemaker:Stateful): Promoted srv01 (UNCLEAN) * Unpromoted: [ srv03 ] * Clone Set: clnPingd [prmPingd]: * prmPingd (ocf:pacemaker:ping): Started srv02 (UNCLEAN) * prmPingd (ocf:pacemaker:ping): Started srv01 (UNCLEAN) * Started: [ srv03 ] diff --git a/cts/scheduler/summary/bug-cl-5247.summary b/cts/scheduler/summary/bug-cl-5247.summary index 056e526490..67ad0c3ded 100644 --- a/cts/scheduler/summary/bug-cl-5247.summary +++ b/cts/scheduler/summary/bug-cl-5247.summary @@ -1,87 +1,87 @@ Using the original execution date of: 2015-08-12 02:53:40Z Current cluster status: * Node List: * Online: [ bl460g8n3 bl460g8n4 ] * GuestOnline: [ pgsr01@bl460g8n3 ] * Full List of Resources: * prmDB1 (ocf:heartbeat:VirtualDomain): Started bl460g8n3 * prmDB2 (ocf:heartbeat:VirtualDomain): FAILED bl460g8n4 * Resource Group: grpStonith1: * prmStonith1-2 (stonith:external/ipmi): Started bl460g8n4 * Resource Group: grpStonith2: * prmStonith2-2 (stonith:external/ipmi): Started bl460g8n3 * Resource Group: master-group: * vip-master (ocf:heartbeat:Dummy): FAILED pgsr02 * vip-rep (ocf:heartbeat:Dummy): FAILED pgsr02 * Clone Set: msPostgresql [pgsql] (promotable): * Promoted: [ pgsr01 ] * Stopped: [ bl460g8n3 bl460g8n4 ] Transition Summary: * Fence (off) pgsr02 (resource: prmDB2) 'guest is unclean' * Stop prmDB2 ( bl460g8n4 ) due to node availability * Recover vip-master ( pgsr02 -> pgsr01 ) * Recover vip-rep ( pgsr02 -> pgsr01 ) - * Stop pgsql:0 ( Promoted pgsr02 ) due to node availability + * Stop pgsql:0 ( Promoted pgsr02 ) due to node availability * Stop pgsr02 ( bl460g8n4 ) due to node availability Executing Cluster Transition: * Resource action: vip-master monitor on pgsr01 * Resource action: vip-rep monitor on pgsr01 * Pseudo action: msPostgresql_pre_notify_demote_0 * Resource action: pgsr01 monitor on bl460g8n4 * Resource action: pgsr02 stop on bl460g8n4 * Resource action: pgsr02 monitor on bl460g8n3 * Resource action: prmDB2 stop on bl460g8n4 * Resource action: pgsql notify on pgsr01 * Pseudo action: msPostgresql_confirmed-pre_notify_demote_0 * Pseudo action: msPostgresql_demote_0 * Pseudo action: stonith-pgsr02-off on pgsr02 * Pseudo action: pgsql_post_notify_stop_0 * Pseudo action: pgsql_demote_0 * Pseudo action: msPostgresql_demoted_0 * Pseudo action: msPostgresql_post_notify_demoted_0 * Resource action: pgsql notify on pgsr01 * Pseudo action: msPostgresql_confirmed-post_notify_demoted_0 * Pseudo action: msPostgresql_pre_notify_stop_0 * Pseudo action: master-group_stop_0 * Pseudo action: vip-rep_stop_0 * Resource action: pgsql notify on pgsr01 * Pseudo action: msPostgresql_confirmed-pre_notify_stop_0 * Pseudo action: msPostgresql_stop_0 * Pseudo action: vip-master_stop_0 * Pseudo action: pgsql_stop_0 * Pseudo action: msPostgresql_stopped_0 * Pseudo action: master-group_stopped_0 * Pseudo action: master-group_start_0 * Resource action: vip-master start on pgsr01 * Resource action: vip-rep start on pgsr01 * Pseudo action: msPostgresql_post_notify_stopped_0 * Pseudo action: master-group_running_0 * Resource action: vip-master monitor=10000 on pgsr01 * Resource action: vip-rep monitor=10000 on pgsr01 * Resource action: pgsql notify on pgsr01 * Pseudo action: msPostgresql_confirmed-post_notify_stopped_0 * Pseudo action: pgsql_notified_0 * Resource action: pgsql monitor=9000 on pgsr01 Using the original execution date of: 2015-08-12 02:53:40Z Revised Cluster Status: * Node List: * Online: [ bl460g8n3 bl460g8n4 ] * GuestOnline: [ pgsr01@bl460g8n3 ] * Full List of Resources: * prmDB1 (ocf:heartbeat:VirtualDomain): Started bl460g8n3 * prmDB2 (ocf:heartbeat:VirtualDomain): FAILED * Resource Group: grpStonith1: * prmStonith1-2 (stonith:external/ipmi): Started bl460g8n4 * Resource Group: grpStonith2: * prmStonith2-2 (stonith:external/ipmi): Started bl460g8n3 * Resource Group: master-group: * vip-master (ocf:heartbeat:Dummy): FAILED [ pgsr01 pgsr02 ] * vip-rep (ocf:heartbeat:Dummy): FAILED [ pgsr01 pgsr02 ] * Clone Set: msPostgresql [pgsql] (promotable): * Promoted: [ pgsr01 ] * Stopped: [ bl460g8n3 bl460g8n4 ] diff --git a/cts/scheduler/summary/bug-lf-2606.summary b/cts/scheduler/summary/bug-lf-2606.summary index e0b7ebf0e6..004788e80b 100644 --- a/cts/scheduler/summary/bug-lf-2606.summary +++ b/cts/scheduler/summary/bug-lf-2606.summary @@ -1,46 +1,46 @@ 1 of 5 resource instances DISABLED and 0 BLOCKED from further action due to failure Current cluster status: * Node List: * Node node2: UNCLEAN (online) * Online: [ node1 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * rsc1 (ocf:pacemaker:Dummy): FAILED node2 (disabled) * rsc2 (ocf:pacemaker:Dummy): Started node2 * Clone Set: ms3 [rsc3] (promotable): * Promoted: [ node2 ] * Unpromoted: [ node1 ] Transition Summary: * Fence (reboot) node2 'rsc1 failed there' * Stop rsc1 ( node2 ) due to node availability * Move rsc2 ( node2 -> node1 ) - * Stop rsc3:1 ( Promoted node2 ) due to node availability + * Stop rsc3:1 ( Promoted node2 ) due to node availability Executing Cluster Transition: * Pseudo action: ms3_demote_0 * Fencing node2 (reboot) * Pseudo action: rsc1_stop_0 * Pseudo action: rsc2_stop_0 * Pseudo action: rsc3:1_demote_0 * Pseudo action: ms3_demoted_0 * Pseudo action: ms3_stop_0 * Resource action: rsc2 start on node1 * Pseudo action: rsc3:1_stop_0 * Pseudo action: ms3_stopped_0 * Resource action: rsc2 monitor=10000 on node1 Revised Cluster Status: * Node List: * Online: [ node1 ] * OFFLINE: [ node2 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * rsc1 (ocf:pacemaker:Dummy): Stopped (disabled) * rsc2 (ocf:pacemaker:Dummy): Started node1 * Clone Set: ms3 [rsc3] (promotable): * Unpromoted: [ node1 ] * Stopped: [ node2 ] diff --git a/cts/scheduler/summary/bug-pm-12.summary b/cts/scheduler/summary/bug-pm-12.summary index 7b811d1a02..c4f3adb908 100644 --- a/cts/scheduler/summary/bug-pm-12.summary +++ b/cts/scheduler/summary/bug-pm-12.summary @@ -1,57 +1,57 @@ Current cluster status: * Node List: * Online: [ node-a node-b ] * Full List of Resources: * Clone Set: ms-sf [group] (promotable) (unique): * Resource Group: group:0: * stateful-1:0 (ocf:heartbeat:Stateful): Unpromoted node-b * stateful-2:0 (ocf:heartbeat:Stateful): Unpromoted node-b * Resource Group: group:1: * stateful-1:1 (ocf:heartbeat:Stateful): Promoted node-a * stateful-2:1 (ocf:heartbeat:Stateful): Promoted node-a Transition Summary: - * Restart stateful-2:0 ( Unpromoted node-b ) due to resource definition change - * Restart stateful-2:1 ( Promoted node-a ) due to resource definition change + * Restart stateful-2:0 ( Unpromoted node-b ) due to resource definition change + * Restart stateful-2:1 ( Promoted node-a ) due to resource definition change Executing Cluster Transition: * Pseudo action: ms-sf_demote_0 * Pseudo action: group:1_demote_0 * Resource action: stateful-2:1 demote on node-a * Pseudo action: group:1_demoted_0 * Pseudo action: ms-sf_demoted_0 * Pseudo action: ms-sf_stop_0 * Pseudo action: group:0_stop_0 * Resource action: stateful-2:0 stop on node-b * Pseudo action: group:1_stop_0 * Resource action: stateful-2:1 stop on node-a * Pseudo action: group:0_stopped_0 * Pseudo action: group:1_stopped_0 * Pseudo action: ms-sf_stopped_0 * Pseudo action: ms-sf_start_0 * Pseudo action: group:0_start_0 * Resource action: stateful-2:0 start on node-b * Pseudo action: group:1_start_0 * Resource action: stateful-2:1 start on node-a * Pseudo action: group:0_running_0 * Pseudo action: group:1_running_0 * Pseudo action: ms-sf_running_0 * Pseudo action: ms-sf_promote_0 * Pseudo action: group:1_promote_0 * Resource action: stateful-2:1 promote on node-a * Pseudo action: group:1_promoted_0 * Pseudo action: ms-sf_promoted_0 Revised Cluster Status: * Node List: * Online: [ node-a node-b ] * Full List of Resources: * Clone Set: ms-sf [group] (promotable) (unique): * Resource Group: group:0: * stateful-1:0 (ocf:heartbeat:Stateful): Unpromoted node-b * stateful-2:0 (ocf:heartbeat:Stateful): Unpromoted node-b * Resource Group: group:1: * stateful-1:1 (ocf:heartbeat:Stateful): Promoted node-a * stateful-2:1 (ocf:heartbeat:Stateful): Promoted node-a diff --git a/cts/scheduler/summary/bundle-order-fencing.summary b/cts/scheduler/summary/bundle-order-fencing.summary index 387c05532a..8cb40718db 100644 --- a/cts/scheduler/summary/bundle-order-fencing.summary +++ b/cts/scheduler/summary/bundle-order-fencing.summary @@ -1,220 +1,220 @@ Using the original execution date of: 2017-09-12 10:51:59Z Current cluster status: * Node List: * Node controller-0: UNCLEAN (offline) * Online: [ controller-1 controller-2 ] * GuestOnline: [ galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-1@controller-1 rabbitmq-bundle-2@controller-2 redis-bundle-1@controller-1 redis-bundle-2@controller-2 ] * Full List of Resources: * Container bundle set: rabbitmq-bundle [192.168.24.1:8787/rhosp12/openstack-rabbitmq-docker:pcmklatest]: * rabbitmq-bundle-0 (ocf:heartbeat:rabbitmq-cluster): FAILED controller-0 (UNCLEAN) * rabbitmq-bundle-1 (ocf:heartbeat:rabbitmq-cluster): Started controller-1 * rabbitmq-bundle-2 (ocf:heartbeat:rabbitmq-cluster): Started controller-2 * Container bundle set: galera-bundle [192.168.24.1:8787/rhosp12/openstack-mariadb-docker:pcmklatest]: * galera-bundle-0 (ocf:heartbeat:galera): FAILED Promoted controller-0 (UNCLEAN) * galera-bundle-1 (ocf:heartbeat:galera): Promoted controller-1 * galera-bundle-2 (ocf:heartbeat:galera): Promoted controller-2 * Container bundle set: redis-bundle [192.168.24.1:8787/rhosp12/openstack-redis-docker:pcmklatest]: * redis-bundle-0 (ocf:heartbeat:redis): FAILED Promoted controller-0 (UNCLEAN) * redis-bundle-1 (ocf:heartbeat:redis): Unpromoted controller-1 * redis-bundle-2 (ocf:heartbeat:redis): Unpromoted controller-2 * ip-192.168.24.7 (ocf:heartbeat:IPaddr2): Started controller-0 (UNCLEAN) * ip-10.0.0.109 (ocf:heartbeat:IPaddr2): Started controller-0 (UNCLEAN) * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.1.19 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.3.19 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-0 (UNCLEAN) * Container bundle set: haproxy-bundle [192.168.24.1:8787/rhosp12/openstack-haproxy-docker:pcmklatest]: * haproxy-bundle-docker-0 (ocf:heartbeat:docker): Started controller-0 (UNCLEAN) * haproxy-bundle-docker-1 (ocf:heartbeat:docker): Started controller-2 * haproxy-bundle-docker-2 (ocf:heartbeat:docker): Started controller-1 * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-2 * stonith-fence_ipmilan-525400efba5c (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-5254003e8e97 (stonith:fence_ipmilan): Started controller-0 (UNCLEAN) * stonith-fence_ipmilan-5254000dcb3f (stonith:fence_ipmilan): Started controller-0 (UNCLEAN) Transition Summary: * Fence (off) redis-bundle-0 (resource: redis-bundle-docker-0) 'guest is unclean' * Fence (off) rabbitmq-bundle-0 (resource: rabbitmq-bundle-docker-0) 'guest is unclean' * Fence (off) galera-bundle-0 (resource: galera-bundle-docker-0) 'guest is unclean' * Fence (reboot) controller-0 'peer is no longer part of the cluster' * Stop rabbitmq-bundle-docker-0 ( controller-0 ) due to node availability * Stop rabbitmq-bundle-0 ( controller-0 ) due to unrunnable rabbitmq-bundle-docker-0 start * Stop rabbitmq:0 ( rabbitmq-bundle-0 ) due to unrunnable rabbitmq-bundle-docker-0 start * Stop galera-bundle-docker-0 ( controller-0 ) due to node availability * Stop galera-bundle-0 ( controller-0 ) due to unrunnable galera-bundle-docker-0 start - * Stop galera:0 ( Promoted galera-bundle-0 ) due to unrunnable galera-bundle-docker-0 start + * Stop galera:0 ( Promoted galera-bundle-0 ) due to unrunnable galera-bundle-docker-0 start * Stop redis-bundle-docker-0 ( controller-0 ) due to node availability * Stop redis-bundle-0 ( controller-0 ) due to unrunnable redis-bundle-docker-0 start - * Stop redis:0 ( Promoted redis-bundle-0 ) due to unrunnable redis-bundle-docker-0 start + * Stop redis:0 ( Promoted redis-bundle-0 ) due to unrunnable redis-bundle-docker-0 start * Promote redis:1 ( Unpromoted -> Promoted redis-bundle-1 ) * Move ip-192.168.24.7 ( controller-0 -> controller-2 ) * Move ip-10.0.0.109 ( controller-0 -> controller-1 ) * Move ip-172.17.4.11 ( controller-0 -> controller-1 ) * Stop haproxy-bundle-docker-0 ( controller-0 ) due to node availability * Move stonith-fence_ipmilan-5254003e8e97 ( controller-0 -> controller-1 ) * Move stonith-fence_ipmilan-5254000dcb3f ( controller-0 -> controller-2 ) Executing Cluster Transition: * Pseudo action: rabbitmq-bundle-clone_pre_notify_stop_0 * Pseudo action: rabbitmq-bundle-0_stop_0 * Resource action: rabbitmq-bundle-0 monitor on controller-2 * Resource action: rabbitmq-bundle-0 monitor on controller-1 * Resource action: rabbitmq-bundle-1 monitor on controller-2 * Resource action: rabbitmq-bundle-2 monitor on controller-1 * Pseudo action: galera-bundle-0_stop_0 * Resource action: galera-bundle-0 monitor on controller-2 * Resource action: galera-bundle-0 monitor on controller-1 * Resource action: galera-bundle-1 monitor on controller-2 * Resource action: galera-bundle-2 monitor on controller-1 * Resource action: redis cancel=45000 on redis-bundle-1 * Resource action: redis cancel=60000 on redis-bundle-1 * Pseudo action: redis-bundle-master_pre_notify_demote_0 * Pseudo action: redis-bundle-0_stop_0 * Resource action: redis-bundle-0 monitor on controller-2 * Resource action: redis-bundle-0 monitor on controller-1 * Resource action: redis-bundle-1 monitor on controller-2 * Resource action: redis-bundle-2 monitor on controller-1 * Pseudo action: stonith-fence_ipmilan-5254003e8e97_stop_0 * Pseudo action: stonith-fence_ipmilan-5254000dcb3f_stop_0 * Pseudo action: haproxy-bundle_stop_0 * Pseudo action: redis-bundle_demote_0 * Pseudo action: galera-bundle_demote_0 * Pseudo action: rabbitmq-bundle_stop_0 * Pseudo action: rabbitmq-bundle_start_0 * Fencing controller-0 (reboot) * Resource action: rabbitmq notify on rabbitmq-bundle-1 * Resource action: rabbitmq notify on rabbitmq-bundle-2 * Pseudo action: rabbitmq-bundle-clone_confirmed-pre_notify_stop_0 * Pseudo action: rabbitmq-bundle-docker-0_stop_0 * Pseudo action: galera-bundle-master_demote_0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-pre_notify_demote_0 * Pseudo action: redis-bundle-master_demote_0 * Pseudo action: haproxy-bundle-docker-0_stop_0 * Resource action: stonith-fence_ipmilan-5254003e8e97 start on controller-1 * Resource action: stonith-fence_ipmilan-5254000dcb3f start on controller-2 * Pseudo action: stonith-redis-bundle-0-off on redis-bundle-0 * Pseudo action: stonith-rabbitmq-bundle-0-off on rabbitmq-bundle-0 * Pseudo action: stonith-galera-bundle-0-off on galera-bundle-0 * Pseudo action: haproxy-bundle_stopped_0 * Pseudo action: rabbitmq_post_notify_stop_0 * Pseudo action: rabbitmq-bundle-clone_stop_0 * Pseudo action: galera_demote_0 * Pseudo action: galera-bundle-master_demoted_0 * Pseudo action: redis_post_notify_stop_0 * Pseudo action: redis_demote_0 * Pseudo action: redis-bundle-master_demoted_0 * Pseudo action: ip-192.168.24.7_stop_0 * Pseudo action: ip-10.0.0.109_stop_0 * Pseudo action: ip-172.17.4.11_stop_0 * Resource action: stonith-fence_ipmilan-5254003e8e97 monitor=60000 on controller-1 * Resource action: stonith-fence_ipmilan-5254000dcb3f monitor=60000 on controller-2 * Pseudo action: galera-bundle_demoted_0 * Pseudo action: galera-bundle_stop_0 * Pseudo action: rabbitmq_stop_0 * Pseudo action: rabbitmq-bundle-clone_stopped_0 * Pseudo action: galera-bundle-master_stop_0 * Pseudo action: galera-bundle-docker-0_stop_0 * Pseudo action: redis-bundle-master_post_notify_demoted_0 * Resource action: ip-192.168.24.7 start on controller-2 * Resource action: ip-10.0.0.109 start on controller-1 * Resource action: ip-172.17.4.11 start on controller-1 * Pseudo action: rabbitmq-bundle-clone_post_notify_stopped_0 * Pseudo action: galera_stop_0 * Pseudo action: galera-bundle-master_stopped_0 * Pseudo action: galera-bundle-master_start_0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-post_notify_demoted_0 * Pseudo action: redis-bundle-master_pre_notify_stop_0 * Resource action: ip-192.168.24.7 monitor=10000 on controller-2 * Resource action: ip-10.0.0.109 monitor=10000 on controller-1 * Resource action: ip-172.17.4.11 monitor=10000 on controller-1 * Pseudo action: redis-bundle_demoted_0 * Pseudo action: redis-bundle_stop_0 * Pseudo action: galera-bundle_stopped_0 * Resource action: rabbitmq notify on rabbitmq-bundle-1 * Resource action: rabbitmq notify on rabbitmq-bundle-2 * Pseudo action: rabbitmq-bundle-clone_confirmed-post_notify_stopped_0 * Pseudo action: rabbitmq-bundle-clone_pre_notify_start_0 * Pseudo action: galera-bundle-master_running_0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-pre_notify_stop_0 * Pseudo action: redis-bundle-master_stop_0 * Pseudo action: redis-bundle-docker-0_stop_0 * Pseudo action: galera-bundle_running_0 * Pseudo action: rabbitmq-bundle_stopped_0 * Pseudo action: rabbitmq_notified_0 * Pseudo action: rabbitmq-bundle-clone_confirmed-pre_notify_start_0 * Pseudo action: rabbitmq-bundle-clone_start_0 * Pseudo action: redis_stop_0 * Pseudo action: redis-bundle-master_stopped_0 * Pseudo action: rabbitmq-bundle-clone_running_0 * Pseudo action: redis-bundle-master_post_notify_stopped_0 * Pseudo action: rabbitmq-bundle-clone_post_notify_running_0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-post_notify_stopped_0 * Pseudo action: redis-bundle-master_pre_notify_start_0 * Pseudo action: redis-bundle_stopped_0 * Pseudo action: rabbitmq-bundle-clone_confirmed-post_notify_running_0 * Pseudo action: redis_notified_0 * Pseudo action: redis-bundle-master_confirmed-pre_notify_start_0 * Pseudo action: redis-bundle-master_start_0 * Pseudo action: rabbitmq-bundle_running_0 * Pseudo action: redis-bundle-master_running_0 * Pseudo action: redis-bundle-master_post_notify_running_0 * Pseudo action: redis-bundle-master_confirmed-post_notify_running_0 * Pseudo action: redis-bundle_running_0 * Pseudo action: redis-bundle-master_pre_notify_promote_0 * Pseudo action: redis-bundle_promote_0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-pre_notify_promote_0 * Pseudo action: redis-bundle-master_promote_0 * Resource action: redis promote on redis-bundle-1 * Pseudo action: redis-bundle-master_promoted_0 * Pseudo action: redis-bundle-master_post_notify_promoted_0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-post_notify_promoted_0 * Pseudo action: redis-bundle_promoted_0 * Resource action: redis monitor=20000 on redis-bundle-1 Using the original execution date of: 2017-09-12 10:51:59Z Revised Cluster Status: * Node List: * Online: [ controller-1 controller-2 ] * OFFLINE: [ controller-0 ] * GuestOnline: [ galera-bundle-1@controller-1 galera-bundle-2@controller-2 rabbitmq-bundle-1@controller-1 rabbitmq-bundle-2@controller-2 redis-bundle-1@controller-1 redis-bundle-2@controller-2 ] * Full List of Resources: * Container bundle set: rabbitmq-bundle [192.168.24.1:8787/rhosp12/openstack-rabbitmq-docker:pcmklatest]: * rabbitmq-bundle-0 (ocf:heartbeat:rabbitmq-cluster): FAILED * rabbitmq-bundle-1 (ocf:heartbeat:rabbitmq-cluster): Started controller-1 * rabbitmq-bundle-2 (ocf:heartbeat:rabbitmq-cluster): Started controller-2 * Container bundle set: galera-bundle [192.168.24.1:8787/rhosp12/openstack-mariadb-docker:pcmklatest]: * galera-bundle-0 (ocf:heartbeat:galera): FAILED Promoted * galera-bundle-1 (ocf:heartbeat:galera): Promoted controller-1 * galera-bundle-2 (ocf:heartbeat:galera): Promoted controller-2 * Container bundle set: redis-bundle [192.168.24.1:8787/rhosp12/openstack-redis-docker:pcmklatest]: * redis-bundle-0 (ocf:heartbeat:redis): FAILED Promoted * redis-bundle-1 (ocf:heartbeat:redis): Promoted controller-1 * redis-bundle-2 (ocf:heartbeat:redis): Unpromoted controller-2 * ip-192.168.24.7 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-10.0.0.109 (ocf:heartbeat:IPaddr2): Started controller-1 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.1.19 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.3.19 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-1 * Container bundle set: haproxy-bundle [192.168.24.1:8787/rhosp12/openstack-haproxy-docker:pcmklatest]: * haproxy-bundle-docker-0 (ocf:heartbeat:docker): Stopped * haproxy-bundle-docker-1 (ocf:heartbeat:docker): Started controller-2 * haproxy-bundle-docker-2 (ocf:heartbeat:docker): Started controller-1 * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-2 * stonith-fence_ipmilan-525400efba5c (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-5254003e8e97 (stonith:fence_ipmilan): Started controller-1 * stonith-fence_ipmilan-5254000dcb3f (stonith:fence_ipmilan): Started controller-2 diff --git a/cts/scheduler/summary/bundle-order-stop-on-remote.summary b/cts/scheduler/summary/bundle-order-stop-on-remote.summary index bf94ce3c72..8cd17eef61 100644 --- a/cts/scheduler/summary/bundle-order-stop-on-remote.summary +++ b/cts/scheduler/summary/bundle-order-stop-on-remote.summary @@ -1,224 +1,224 @@ Current cluster status: * Node List: * RemoteNode database-0: UNCLEAN (offline) * RemoteNode database-2: UNCLEAN (offline) * Online: [ controller-0 controller-1 controller-2 ] * RemoteOnline: [ database-1 messaging-0 messaging-1 messaging-2 ] * GuestOnline: [ galera-bundle-1@controller-2 rabbitmq-bundle-0@controller-2 rabbitmq-bundle-1@controller-2 rabbitmq-bundle-2@controller-2 redis-bundle-0@controller-0 redis-bundle-2@controller-2 ] * Full List of Resources: * database-0 (ocf:pacemaker:remote): Stopped * database-1 (ocf:pacemaker:remote): Started controller-2 * database-2 (ocf:pacemaker:remote): Stopped * messaging-0 (ocf:pacemaker:remote): Started controller-2 * messaging-1 (ocf:pacemaker:remote): Started controller-2 * messaging-2 (ocf:pacemaker:remote): Started controller-2 * Container bundle set: rabbitmq-bundle [192.168.24.1:8787/rhosp12/openstack-rabbitmq-docker:pcmklatest]: * rabbitmq-bundle-0 (ocf:heartbeat:rabbitmq-cluster): Started messaging-0 * rabbitmq-bundle-1 (ocf:heartbeat:rabbitmq-cluster): Started messaging-1 * rabbitmq-bundle-2 (ocf:heartbeat:rabbitmq-cluster): Started messaging-2 * Container bundle set: galera-bundle [192.168.24.1:8787/rhosp12/openstack-mariadb-docker:pcmklatest]: * galera-bundle-0 (ocf:heartbeat:galera): FAILED Promoted database-0 (UNCLEAN) * galera-bundle-1 (ocf:heartbeat:galera): Promoted database-1 * galera-bundle-2 (ocf:heartbeat:galera): FAILED Promoted database-2 (UNCLEAN) * Container bundle set: redis-bundle [192.168.24.1:8787/rhosp12/openstack-redis-docker:pcmklatest]: * redis-bundle-0 (ocf:heartbeat:redis): Unpromoted controller-0 * redis-bundle-1 (ocf:heartbeat:redis): Stopped * redis-bundle-2 (ocf:heartbeat:redis): Unpromoted controller-2 * ip-192.168.24.11 (ocf:heartbeat:IPaddr2): Stopped * ip-10.0.0.104 (ocf:heartbeat:IPaddr2): Stopped * ip-172.17.1.19 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.1.11 (ocf:heartbeat:IPaddr2): Stopped * ip-172.17.3.13 (ocf:heartbeat:IPaddr2): Stopped * ip-172.17.4.19 (ocf:heartbeat:IPaddr2): Started controller-2 * Container bundle set: haproxy-bundle [192.168.24.1:8787/rhosp12/openstack-haproxy-docker:pcmklatest]: * haproxy-bundle-docker-0 (ocf:heartbeat:docker): Started controller-0 * haproxy-bundle-docker-1 (ocf:heartbeat:docker): Stopped * haproxy-bundle-docker-2 (ocf:heartbeat:docker): Started controller-2 * openstack-cinder-volume (systemd:openstack-cinder-volume): Stopped * stonith-fence_ipmilan-525400244e09 (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-525400cdec10 (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-525400c709f7 (stonith:fence_ipmilan): Stopped * stonith-fence_ipmilan-525400a7f9e0 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400a25787 (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-5254005ea387 (stonith:fence_ipmilan): Stopped * stonith-fence_ipmilan-525400542c06 (stonith:fence_ipmilan): Stopped * stonith-fence_ipmilan-525400aac413 (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-525400498d34 (stonith:fence_ipmilan): Stopped Transition Summary: * Fence (reboot) galera-bundle-2 (resource: galera-bundle-docker-2) 'guest is unclean' * Fence (reboot) galera-bundle-0 (resource: galera-bundle-docker-0) 'guest is unclean' * Start database-0 ( controller-0 ) * Start database-2 ( controller-1 ) * Recover galera-bundle-docker-0 ( database-0 ) * Start galera-bundle-0 ( controller-0 ) - * Recover galera:0 ( Promoted galera-bundle-0 ) + * Recover galera:0 ( Promoted galera-bundle-0 ) * Recover galera-bundle-docker-2 ( database-2 ) * Start galera-bundle-2 ( controller-1 ) - * Recover galera:2 ( Promoted galera-bundle-2 ) + * Recover galera:2 ( Promoted galera-bundle-2 ) * Promote redis:0 ( Unpromoted -> Promoted redis-bundle-0 ) * Start redis-bundle-docker-1 ( controller-1 ) * Start redis-bundle-1 ( controller-1 ) * Start redis:1 ( redis-bundle-1 ) * Start ip-192.168.24.11 ( controller-0 ) * Start ip-10.0.0.104 ( controller-1 ) * Start ip-172.17.1.11 ( controller-0 ) * Start ip-172.17.3.13 ( controller-1 ) * Start haproxy-bundle-docker-1 ( controller-1 ) * Start openstack-cinder-volume ( controller-0 ) * Start stonith-fence_ipmilan-525400c709f7 ( controller-1 ) * Start stonith-fence_ipmilan-5254005ea387 ( controller-1 ) * Start stonith-fence_ipmilan-525400542c06 ( controller-0 ) * Start stonith-fence_ipmilan-525400498d34 ( controller-1 ) Executing Cluster Transition: * Resource action: database-0 start on controller-0 * Resource action: database-2 start on controller-1 * Pseudo action: redis-bundle-master_pre_notify_start_0 * Resource action: stonith-fence_ipmilan-525400c709f7 start on controller-1 * Resource action: stonith-fence_ipmilan-5254005ea387 start on controller-1 * Resource action: stonith-fence_ipmilan-525400542c06 start on controller-0 * Resource action: stonith-fence_ipmilan-525400498d34 start on controller-1 * Pseudo action: redis-bundle_start_0 * Pseudo action: galera-bundle_demote_0 * Resource action: database-0 monitor=20000 on controller-0 * Resource action: database-2 monitor=20000 on controller-1 * Pseudo action: galera-bundle-master_demote_0 * Resource action: redis notify on redis-bundle-0 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-pre_notify_start_0 * Pseudo action: redis-bundle-master_start_0 * Resource action: stonith-fence_ipmilan-525400c709f7 monitor=60000 on controller-1 * Resource action: stonith-fence_ipmilan-5254005ea387 monitor=60000 on controller-1 * Resource action: stonith-fence_ipmilan-525400542c06 monitor=60000 on controller-0 * Resource action: stonith-fence_ipmilan-525400498d34 monitor=60000 on controller-1 * Pseudo action: galera_demote_0 * Pseudo action: galera_demote_0 * Pseudo action: galera-bundle-master_demoted_0 * Pseudo action: galera-bundle_demoted_0 * Pseudo action: galera-bundle_stop_0 * Resource action: galera-bundle-docker-0 stop on database-0 * Resource action: galera-bundle-docker-2 stop on database-2 * Pseudo action: stonith-galera-bundle-2-reboot on galera-bundle-2 * Pseudo action: stonith-galera-bundle-0-reboot on galera-bundle-0 * Pseudo action: galera-bundle-master_stop_0 * Resource action: redis-bundle-docker-1 start on controller-1 * Resource action: redis-bundle-1 monitor on controller-1 * Resource action: ip-192.168.24.11 start on controller-0 * Resource action: ip-10.0.0.104 start on controller-1 * Resource action: ip-172.17.1.11 start on controller-0 * Resource action: ip-172.17.3.13 start on controller-1 * Resource action: openstack-cinder-volume start on controller-0 * Pseudo action: haproxy-bundle_start_0 * Pseudo action: galera_stop_0 * Resource action: redis-bundle-docker-1 monitor=60000 on controller-1 * Resource action: redis-bundle-1 start on controller-1 * Resource action: ip-192.168.24.11 monitor=10000 on controller-0 * Resource action: ip-10.0.0.104 monitor=10000 on controller-1 * Resource action: ip-172.17.1.11 monitor=10000 on controller-0 * Resource action: ip-172.17.3.13 monitor=10000 on controller-1 * Resource action: haproxy-bundle-docker-1 start on controller-1 * Resource action: openstack-cinder-volume monitor=60000 on controller-0 * Pseudo action: haproxy-bundle_running_0 * Pseudo action: galera_stop_0 * Pseudo action: galera-bundle-master_stopped_0 * Resource action: redis start on redis-bundle-1 * Pseudo action: redis-bundle-master_running_0 * Resource action: redis-bundle-1 monitor=30000 on controller-1 * Resource action: haproxy-bundle-docker-1 monitor=60000 on controller-1 * Pseudo action: galera-bundle_stopped_0 * Pseudo action: galera-bundle_start_0 * Pseudo action: galera-bundle-master_start_0 * Resource action: galera-bundle-docker-0 start on database-0 * Resource action: galera-bundle-0 monitor on controller-1 * Resource action: galera-bundle-docker-2 start on database-2 * Resource action: galera-bundle-2 monitor on controller-1 * Pseudo action: redis-bundle-master_post_notify_running_0 * Resource action: galera-bundle-docker-0 monitor=60000 on database-0 * Resource action: galera-bundle-0 start on controller-0 * Resource action: galera-bundle-docker-2 monitor=60000 on database-2 * Resource action: galera-bundle-2 start on controller-1 * Resource action: redis notify on redis-bundle-0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-post_notify_running_0 * Pseudo action: redis-bundle_running_0 * Resource action: galera start on galera-bundle-0 * Resource action: galera start on galera-bundle-2 * Pseudo action: galera-bundle-master_running_0 * Resource action: galera-bundle-0 monitor=30000 on controller-0 * Resource action: galera-bundle-2 monitor=30000 on controller-1 * Pseudo action: redis-bundle-master_pre_notify_promote_0 * Pseudo action: redis-bundle_promote_0 * Pseudo action: galera-bundle_running_0 * Resource action: redis notify on redis-bundle-0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-pre_notify_promote_0 * Pseudo action: redis-bundle-master_promote_0 * Pseudo action: galera-bundle_promote_0 * Pseudo action: galera-bundle-master_promote_0 * Resource action: redis promote on redis-bundle-0 * Pseudo action: redis-bundle-master_promoted_0 * Resource action: galera promote on galera-bundle-0 * Resource action: galera promote on galera-bundle-2 * Pseudo action: galera-bundle-master_promoted_0 * Pseudo action: redis-bundle-master_post_notify_promoted_0 * Pseudo action: galera-bundle_promoted_0 * Resource action: galera monitor=10000 on galera-bundle-0 * Resource action: galera monitor=10000 on galera-bundle-2 * Resource action: redis notify on redis-bundle-0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-post_notify_promoted_0 * Pseudo action: redis-bundle_promoted_0 * Resource action: redis monitor=20000 on redis-bundle-0 * Resource action: redis monitor=60000 on redis-bundle-1 * Resource action: redis monitor=45000 on redis-bundle-1 Revised Cluster Status: * Node List: * Online: [ controller-0 controller-1 controller-2 ] * RemoteOnline: [ database-0 database-1 database-2 messaging-0 messaging-1 messaging-2 ] * GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-2 galera-bundle-2@controller-1 rabbitmq-bundle-0@controller-2 rabbitmq-bundle-1@controller-2 rabbitmq-bundle-2@controller-2 redis-bundle-0@controller-0 redis-bundle-1@controller-1 redis-bundle-2@controller-2 ] * Full List of Resources: * database-0 (ocf:pacemaker:remote): Started controller-0 * database-1 (ocf:pacemaker:remote): Started controller-2 * database-2 (ocf:pacemaker:remote): Started controller-1 * messaging-0 (ocf:pacemaker:remote): Started controller-2 * messaging-1 (ocf:pacemaker:remote): Started controller-2 * messaging-2 (ocf:pacemaker:remote): Started controller-2 * Container bundle set: rabbitmq-bundle [192.168.24.1:8787/rhosp12/openstack-rabbitmq-docker:pcmklatest]: * rabbitmq-bundle-0 (ocf:heartbeat:rabbitmq-cluster): Started messaging-0 * rabbitmq-bundle-1 (ocf:heartbeat:rabbitmq-cluster): Started messaging-1 * rabbitmq-bundle-2 (ocf:heartbeat:rabbitmq-cluster): Started messaging-2 * Container bundle set: galera-bundle [192.168.24.1:8787/rhosp12/openstack-mariadb-docker:pcmklatest]: * galera-bundle-0 (ocf:heartbeat:galera): Promoted database-0 * galera-bundle-1 (ocf:heartbeat:galera): Promoted database-1 * galera-bundle-2 (ocf:heartbeat:galera): Promoted database-2 * Container bundle set: redis-bundle [192.168.24.1:8787/rhosp12/openstack-redis-docker:pcmklatest]: * redis-bundle-0 (ocf:heartbeat:redis): Promoted controller-0 * redis-bundle-1 (ocf:heartbeat:redis): Unpromoted controller-1 * redis-bundle-2 (ocf:heartbeat:redis): Unpromoted controller-2 * ip-192.168.24.11 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.104 (ocf:heartbeat:IPaddr2): Started controller-1 * ip-172.17.1.19 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.1.11 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.3.13 (ocf:heartbeat:IPaddr2): Started controller-1 * ip-172.17.4.19 (ocf:heartbeat:IPaddr2): Started controller-2 * Container bundle set: haproxy-bundle [192.168.24.1:8787/rhosp12/openstack-haproxy-docker:pcmklatest]: * haproxy-bundle-docker-0 (ocf:heartbeat:docker): Started controller-0 * haproxy-bundle-docker-1 (ocf:heartbeat:docker): Started controller-1 * haproxy-bundle-docker-2 (ocf:heartbeat:docker): Started controller-2 * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400244e09 (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-525400cdec10 (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-525400c709f7 (stonith:fence_ipmilan): Started controller-1 * stonith-fence_ipmilan-525400a7f9e0 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400a25787 (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-5254005ea387 (stonith:fence_ipmilan): Started controller-1 * stonith-fence_ipmilan-525400542c06 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400aac413 (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-525400498d34 (stonith:fence_ipmilan): Started controller-1 diff --git a/cts/scheduler/summary/colocation-influence.summary b/cts/scheduler/summary/colocation-influence.summary index 3ea8b3f545..7fa4fcf0c2 100644 --- a/cts/scheduler/summary/colocation-influence.summary +++ b/cts/scheduler/summary/colocation-influence.summary @@ -1,170 +1,170 @@ Current cluster status: * Node List: * Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * GuestOnline: [ bundle10-0@rhel7-2 bundle10-1@rhel7-3 bundle11-0@rhel7-1 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started rhel7-1 * rsc1a (ocf:pacemaker:Dummy): Started rhel7-2 * rsc1b (ocf:pacemaker:Dummy): Started rhel7-2 * rsc2a (ocf:pacemaker:Dummy): Started rhel7-4 * rsc2b (ocf:pacemaker:Dummy): Started rhel7-4 * rsc3a (ocf:pacemaker:Dummy): Stopped * rsc3b (ocf:pacemaker:Dummy): Stopped * rsc4a (ocf:pacemaker:Dummy): Started rhel7-3 * rsc4b (ocf:pacemaker:Dummy): Started rhel7-3 * rsc5a (ocf:pacemaker:Dummy): Started rhel7-1 * Resource Group: group5a: * rsc5a1 (ocf:pacemaker:Dummy): Started rhel7-1 * rsc5a2 (ocf:pacemaker:Dummy): Started rhel7-1 * Resource Group: group6a: * rsc6a1 (ocf:pacemaker:Dummy): Started rhel7-2 * rsc6a2 (ocf:pacemaker:Dummy): Started rhel7-2 * rsc6a (ocf:pacemaker:Dummy): Started rhel7-2 * Resource Group: group7a: * rsc7a1 (ocf:pacemaker:Dummy): Started rhel7-3 * rsc7a2 (ocf:pacemaker:Dummy): Started rhel7-3 * Clone Set: rsc8a-clone [rsc8a]: * Started: [ rhel7-1 rhel7-3 rhel7-4 ] * Clone Set: rsc8b-clone [rsc8b]: * Started: [ rhel7-1 rhel7-3 rhel7-4 ] * rsc9a (ocf:pacemaker:Dummy): Started rhel7-4 * rsc9b (ocf:pacemaker:Dummy): Started rhel7-4 * rsc9c (ocf:pacemaker:Dummy): Started rhel7-4 * rsc10a (ocf:pacemaker:Dummy): Started rhel7-2 * rsc11a (ocf:pacemaker:Dummy): Started rhel7-1 * rsc12a (ocf:pacemaker:Dummy): Started rhel7-1 * rsc12b (ocf:pacemaker:Dummy): Started rhel7-1 * rsc12c (ocf:pacemaker:Dummy): Started rhel7-1 * Container bundle set: bundle10 [pcmktest:http]: * bundle10-0 (192.168.122.131) (ocf:heartbeat:apache): Started rhel7-2 * bundle10-1 (192.168.122.132) (ocf:heartbeat:apache): Started rhel7-3 * Container bundle set: bundle11 [pcmktest:http]: * bundle11-0 (192.168.122.134) (ocf:pacemaker:Dummy): Started rhel7-1 * bundle11-1 (192.168.122.135) (ocf:pacemaker:Dummy): Stopped * rsc13a (ocf:pacemaker:Dummy): Started rhel7-3 * Clone Set: rsc13b-clone [rsc13b] (promotable): * Promoted: [ rhel7-3 ] * Unpromoted: [ rhel7-1 rhel7-2 rhel7-4 ] * Stopped: [ rhel7-5 ] * rsc14b (ocf:pacemaker:Dummy): Started rhel7-4 * Clone Set: rsc14a-clone [rsc14a] (promotable): * Promoted: [ rhel7-4 ] * Unpromoted: [ rhel7-1 rhel7-2 rhel7-3 ] * Stopped: [ rhel7-5 ] Transition Summary: * Move rsc1a ( rhel7-2 -> rhel7-3 ) * Move rsc1b ( rhel7-2 -> rhel7-3 ) * Stop rsc2a ( rhel7-4 ) due to node availability * Start rsc3a ( rhel7-2 ) * Start rsc3b ( rhel7-2 ) * Stop rsc4a ( rhel7-3 ) due to node availability * Stop rsc5a ( rhel7-1 ) due to node availability * Stop rsc6a1 ( rhel7-2 ) due to node availability * Stop rsc6a2 ( rhel7-2 ) due to node availability * Stop rsc7a2 ( rhel7-3 ) due to node availability * Stop rsc8a:1 ( rhel7-4 ) due to node availability * Stop rsc9c ( rhel7-4 ) due to node availability * Move rsc10a ( rhel7-2 -> rhel7-3 ) * Stop rsc12b ( rhel7-1 ) due to node availability * Start bundle11-1 ( rhel7-5 ) due to unrunnable bundle11-docker-1 start (blocked) * Start bundle11a:1 ( bundle11-1 ) due to unrunnable bundle11-docker-1 start (blocked) * Stop rsc13a ( rhel7-3 ) due to node availability - * Stop rsc14a:1 ( Promoted rhel7-4 ) due to node availability + * Stop rsc14a:1 ( Promoted rhel7-4 ) due to node availability Executing Cluster Transition: * Resource action: rsc1a stop on rhel7-2 * Resource action: rsc1b stop on rhel7-2 * Resource action: rsc2a stop on rhel7-4 * Resource action: rsc3a start on rhel7-2 * Resource action: rsc3b start on rhel7-2 * Resource action: rsc4a stop on rhel7-3 * Resource action: rsc5a stop on rhel7-1 * Pseudo action: group6a_stop_0 * Resource action: rsc6a2 stop on rhel7-2 * Pseudo action: group7a_stop_0 * Resource action: rsc7a2 stop on rhel7-3 * Pseudo action: rsc8a-clone_stop_0 * Resource action: rsc9c stop on rhel7-4 * Resource action: rsc10a stop on rhel7-2 * Resource action: rsc12b stop on rhel7-1 * Resource action: rsc13a stop on rhel7-3 * Pseudo action: rsc14a-clone_demote_0 * Pseudo action: bundle11_start_0 * Resource action: rsc1a start on rhel7-3 * Resource action: rsc1b start on rhel7-3 * Resource action: rsc3a monitor=10000 on rhel7-2 * Resource action: rsc3b monitor=10000 on rhel7-2 * Resource action: rsc6a1 stop on rhel7-2 * Pseudo action: group7a_stopped_0 * Resource action: rsc8a stop on rhel7-4 * Pseudo action: rsc8a-clone_stopped_0 * Resource action: rsc10a start on rhel7-3 * Pseudo action: bundle11-clone_start_0 * Resource action: rsc14a demote on rhel7-4 * Pseudo action: rsc14a-clone_demoted_0 * Pseudo action: rsc14a-clone_stop_0 * Resource action: rsc1a monitor=10000 on rhel7-3 * Resource action: rsc1b monitor=10000 on rhel7-3 * Pseudo action: group6a_stopped_0 * Resource action: rsc10a monitor=10000 on rhel7-3 * Pseudo action: bundle11-clone_running_0 * Resource action: rsc14a stop on rhel7-4 * Pseudo action: rsc14a-clone_stopped_0 * Pseudo action: bundle11_running_0 Revised Cluster Status: * Node List: * Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * GuestOnline: [ bundle10-0@rhel7-2 bundle10-1@rhel7-3 bundle11-0@rhel7-1 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started rhel7-1 * rsc1a (ocf:pacemaker:Dummy): Started rhel7-3 * rsc1b (ocf:pacemaker:Dummy): Started rhel7-3 * rsc2a (ocf:pacemaker:Dummy): Stopped * rsc2b (ocf:pacemaker:Dummy): Started rhel7-4 * rsc3a (ocf:pacemaker:Dummy): Started rhel7-2 * rsc3b (ocf:pacemaker:Dummy): Started rhel7-2 * rsc4a (ocf:pacemaker:Dummy): Stopped * rsc4b (ocf:pacemaker:Dummy): Started rhel7-3 * rsc5a (ocf:pacemaker:Dummy): Stopped * Resource Group: group5a: * rsc5a1 (ocf:pacemaker:Dummy): Started rhel7-1 * rsc5a2 (ocf:pacemaker:Dummy): Started rhel7-1 * Resource Group: group6a: * rsc6a1 (ocf:pacemaker:Dummy): Stopped * rsc6a2 (ocf:pacemaker:Dummy): Stopped * rsc6a (ocf:pacemaker:Dummy): Started rhel7-2 * Resource Group: group7a: * rsc7a1 (ocf:pacemaker:Dummy): Started rhel7-3 * rsc7a2 (ocf:pacemaker:Dummy): Stopped * Clone Set: rsc8a-clone [rsc8a]: * Started: [ rhel7-1 rhel7-3 ] * Stopped: [ rhel7-2 rhel7-4 rhel7-5 ] * Clone Set: rsc8b-clone [rsc8b]: * Started: [ rhel7-1 rhel7-3 rhel7-4 ] * rsc9a (ocf:pacemaker:Dummy): Started rhel7-4 * rsc9b (ocf:pacemaker:Dummy): Started rhel7-4 * rsc9c (ocf:pacemaker:Dummy): Stopped * rsc10a (ocf:pacemaker:Dummy): Started rhel7-3 * rsc11a (ocf:pacemaker:Dummy): Started rhel7-1 * rsc12a (ocf:pacemaker:Dummy): Started rhel7-1 * rsc12b (ocf:pacemaker:Dummy): Stopped * rsc12c (ocf:pacemaker:Dummy): Started rhel7-1 * Container bundle set: bundle10 [pcmktest:http]: * bundle10-0 (192.168.122.131) (ocf:heartbeat:apache): Started rhel7-2 * bundle10-1 (192.168.122.132) (ocf:heartbeat:apache): Started rhel7-3 * Container bundle set: bundle11 [pcmktest:http]: * bundle11-0 (192.168.122.134) (ocf:pacemaker:Dummy): Started rhel7-1 * bundle11-1 (192.168.122.135) (ocf:pacemaker:Dummy): Stopped * rsc13a (ocf:pacemaker:Dummy): Stopped * Clone Set: rsc13b-clone [rsc13b] (promotable): * Promoted: [ rhel7-3 ] * Unpromoted: [ rhel7-1 rhel7-2 rhel7-4 ] * Stopped: [ rhel7-5 ] * rsc14b (ocf:pacemaker:Dummy): Started rhel7-4 * Clone Set: rsc14a-clone [rsc14a] (promotable): * Unpromoted: [ rhel7-1 rhel7-2 rhel7-3 ] * Stopped: [ rhel7-4 rhel7-5 ] diff --git a/cts/scheduler/summary/dc-fence-ordering.summary b/cts/scheduler/summary/dc-fence-ordering.summary index ac46031f07..305ebd5c19 100644 --- a/cts/scheduler/summary/dc-fence-ordering.summary +++ b/cts/scheduler/summary/dc-fence-ordering.summary @@ -1,82 +1,82 @@ Using the original execution date of: 2018-11-28 18:37:16Z Current cluster status: * Node List: * Node rhel7-1: UNCLEAN (online) * Online: [ rhel7-2 rhel7-4 rhel7-5 ] * OFFLINE: [ rhel7-3 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Stopped * FencingPass (stonith:fence_dummy): Stopped * FencingFail (stonith:fence_dummy): Stopped * rsc_rhel7-1 (ocf:heartbeat:IPaddr2): Stopped * rsc_rhel7-2 (ocf:heartbeat:IPaddr2): Stopped * rsc_rhel7-3 (ocf:heartbeat:IPaddr2): Stopped * rsc_rhel7-4 (ocf:heartbeat:IPaddr2): Stopped * rsc_rhel7-5 (ocf:heartbeat:IPaddr2): Stopped * migrator (ocf:pacemaker:Dummy): Stopped * Clone Set: Connectivity [ping-1]: * Stopped: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * Clone Set: promotable-1 [stateful-1] (promotable): * Promoted: [ rhel7-1 ] * Unpromoted: [ rhel7-2 rhel7-4 rhel7-5 ] * Stopped: [ rhel7-3 ] * Resource Group: group-1: * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel7-1 * petulant (service:pacemaker-cts-dummyd@10): FAILED rhel7-1 * r192.168.122.208 (ocf:heartbeat:IPaddr2): Stopped * lsb-dummy (lsb:LSBDummy): Stopped Transition Summary: * Fence (reboot) rhel7-1 'petulant failed there' - * Stop stateful-1:0 ( Unpromoted rhel7-5 ) due to node availability - * Stop stateful-1:1 ( Promoted rhel7-1 ) due to node availability - * Stop stateful-1:2 ( Unpromoted rhel7-2 ) due to node availability - * Stop stateful-1:3 ( Unpromoted rhel7-4 ) due to node availability + * Stop stateful-1:0 ( Unpromoted rhel7-5 ) due to node availability + * Stop stateful-1:1 ( Promoted rhel7-1 ) due to node availability + * Stop stateful-1:2 ( Unpromoted rhel7-2 ) due to node availability + * Stop stateful-1:3 ( Unpromoted rhel7-4 ) due to node availability * Stop r192.168.122.207 ( rhel7-1 ) due to node availability * Stop petulant ( rhel7-1 ) due to node availability Executing Cluster Transition: * Fencing rhel7-1 (reboot) * Pseudo action: group-1_stop_0 * Pseudo action: petulant_stop_0 * Pseudo action: r192.168.122.207_stop_0 * Pseudo action: group-1_stopped_0 * Pseudo action: promotable-1_demote_0 * Pseudo action: stateful-1_demote_0 * Pseudo action: promotable-1_demoted_0 * Pseudo action: promotable-1_stop_0 * Resource action: stateful-1 stop on rhel7-5 * Pseudo action: stateful-1_stop_0 * Resource action: stateful-1 stop on rhel7-2 * Resource action: stateful-1 stop on rhel7-4 * Pseudo action: promotable-1_stopped_0 * Cluster action: do_shutdown on rhel7-5 * Cluster action: do_shutdown on rhel7-4 * Cluster action: do_shutdown on rhel7-2 Using the original execution date of: 2018-11-28 18:37:16Z Revised Cluster Status: * Node List: * Online: [ rhel7-2 rhel7-4 rhel7-5 ] * OFFLINE: [ rhel7-1 rhel7-3 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Stopped * FencingPass (stonith:fence_dummy): Stopped * FencingFail (stonith:fence_dummy): Stopped * rsc_rhel7-1 (ocf:heartbeat:IPaddr2): Stopped * rsc_rhel7-2 (ocf:heartbeat:IPaddr2): Stopped * rsc_rhel7-3 (ocf:heartbeat:IPaddr2): Stopped * rsc_rhel7-4 (ocf:heartbeat:IPaddr2): Stopped * rsc_rhel7-5 (ocf:heartbeat:IPaddr2): Stopped * migrator (ocf:pacemaker:Dummy): Stopped * Clone Set: Connectivity [ping-1]: * Stopped: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * Clone Set: promotable-1 [stateful-1] (promotable): * Stopped: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * Resource Group: group-1: * r192.168.122.207 (ocf:heartbeat:IPaddr2): Stopped * petulant (service:pacemaker-cts-dummyd@10): Stopped * r192.168.122.208 (ocf:heartbeat:IPaddr2): Stopped * lsb-dummy (lsb:LSBDummy): Stopped diff --git a/cts/scheduler/summary/guest-node-host-dies.summary b/cts/scheduler/summary/guest-node-host-dies.summary index b0286b2846..f4509b9029 100644 --- a/cts/scheduler/summary/guest-node-host-dies.summary +++ b/cts/scheduler/summary/guest-node-host-dies.summary @@ -1,82 +1,82 @@ Current cluster status: * Node List: * Node rhel7-1: UNCLEAN (offline) * Online: [ rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started rhel7-4 * rsc_rhel7-1 (ocf:heartbeat:IPaddr2): Started rhel7-1 (UNCLEAN) * container1 (ocf:heartbeat:VirtualDomain): FAILED rhel7-1 (UNCLEAN) * container2 (ocf:heartbeat:VirtualDomain): FAILED rhel7-1 (UNCLEAN) * Clone Set: lxc-ms-master [lxc-ms] (promotable): * Stopped: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] Transition Summary: * Fence (reboot) lxc2 (resource: container2) 'guest is unclean' * Fence (reboot) lxc1 (resource: container1) 'guest is unclean' * Fence (reboot) rhel7-1 'rsc_rhel7-1 is thought to be active there' * Restart Fencing ( rhel7-4 ) due to resource definition change * Move rsc_rhel7-1 ( rhel7-1 -> rhel7-5 ) * Recover container1 ( rhel7-1 -> rhel7-2 ) * Recover container2 ( rhel7-1 -> rhel7-3 ) - * Recover lxc-ms:0 ( Promoted lxc1 ) - * Recover lxc-ms:1 ( Unpromoted lxc2 ) + * Recover lxc-ms:0 ( Promoted lxc1 ) + * Recover lxc-ms:1 ( Unpromoted lxc2 ) * Move lxc1 ( rhel7-1 -> rhel7-2 ) * Move lxc2 ( rhel7-1 -> rhel7-3 ) Executing Cluster Transition: * Resource action: Fencing stop on rhel7-4 * Pseudo action: lxc-ms-master_demote_0 * Pseudo action: lxc1_stop_0 * Resource action: lxc1 monitor on rhel7-5 * Resource action: lxc1 monitor on rhel7-4 * Resource action: lxc1 monitor on rhel7-3 * Pseudo action: lxc2_stop_0 * Resource action: lxc2 monitor on rhel7-5 * Resource action: lxc2 monitor on rhel7-4 * Resource action: lxc2 monitor on rhel7-2 * Fencing rhel7-1 (reboot) * Pseudo action: rsc_rhel7-1_stop_0 * Pseudo action: container1_stop_0 * Pseudo action: container2_stop_0 * Pseudo action: stonith-lxc2-reboot on lxc2 * Pseudo action: stonith-lxc1-reboot on lxc1 * Resource action: Fencing start on rhel7-4 * Resource action: Fencing monitor=120000 on rhel7-4 * Resource action: rsc_rhel7-1 start on rhel7-5 * Resource action: container1 start on rhel7-2 * Resource action: container2 start on rhel7-3 * Pseudo action: lxc-ms_demote_0 * Pseudo action: lxc-ms-master_demoted_0 * Pseudo action: lxc-ms-master_stop_0 * Resource action: lxc1 start on rhel7-2 * Resource action: lxc2 start on rhel7-3 * Resource action: rsc_rhel7-1 monitor=5000 on rhel7-5 * Pseudo action: lxc-ms_stop_0 * Pseudo action: lxc-ms_stop_0 * Pseudo action: lxc-ms-master_stopped_0 * Pseudo action: lxc-ms-master_start_0 * Resource action: lxc1 monitor=30000 on rhel7-2 * Resource action: lxc2 monitor=30000 on rhel7-3 * Resource action: lxc-ms start on lxc1 * Resource action: lxc-ms start on lxc2 * Pseudo action: lxc-ms-master_running_0 * Resource action: lxc-ms monitor=10000 on lxc2 * Pseudo action: lxc-ms-master_promote_0 * Resource action: lxc-ms promote on lxc1 * Pseudo action: lxc-ms-master_promoted_0 Revised Cluster Status: * Node List: * Online: [ rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * OFFLINE: [ rhel7-1 ] * GuestOnline: [ lxc1@rhel7-2 lxc2@rhel7-3 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started rhel7-4 * rsc_rhel7-1 (ocf:heartbeat:IPaddr2): Started rhel7-5 * container1 (ocf:heartbeat:VirtualDomain): Started rhel7-2 * container2 (ocf:heartbeat:VirtualDomain): Started rhel7-3 * Clone Set: lxc-ms-master [lxc-ms] (promotable): * Promoted: [ lxc1 ] * Unpromoted: [ lxc2 ] diff --git a/cts/scheduler/summary/migrate-fencing.summary b/cts/scheduler/summary/migrate-fencing.summary index fd4fffa1d3..955bb0f434 100644 --- a/cts/scheduler/summary/migrate-fencing.summary +++ b/cts/scheduler/summary/migrate-fencing.summary @@ -1,108 +1,108 @@ Current cluster status: * Node List: * Node pcmk-4: UNCLEAN (online) * Online: [ pcmk-1 pcmk-2 pcmk-3 ] * Full List of Resources: * Clone Set: Fencing [FencingChild]: * Started: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] * Resource Group: group-1: * r192.168.101.181 (ocf:heartbeat:IPaddr): Started pcmk-4 * r192.168.101.182 (ocf:heartbeat:IPaddr): Started pcmk-4 * r192.168.101.183 (ocf:heartbeat:IPaddr): Started pcmk-4 * rsc_pcmk-1 (ocf:heartbeat:IPaddr): Started pcmk-1 * rsc_pcmk-2 (ocf:heartbeat:IPaddr): Started pcmk-2 * rsc_pcmk-3 (ocf:heartbeat:IPaddr): Started pcmk-3 * rsc_pcmk-4 (ocf:heartbeat:IPaddr): Started pcmk-4 * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-4 * migrator (ocf:pacemaker:Dummy): Started pcmk-1 * Clone Set: Connectivity [ping-1]: * Started: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] * Clone Set: master-1 [stateful-1] (promotable): * Promoted: [ pcmk-4 ] * Unpromoted: [ pcmk-1 pcmk-2 pcmk-3 ] Transition Summary: * Fence (reboot) pcmk-4 'termination was requested' * Stop FencingChild:0 ( pcmk-4 ) due to node availability * Move r192.168.101.181 ( pcmk-4 -> pcmk-1 ) * Move r192.168.101.182 ( pcmk-4 -> pcmk-1 ) * Move r192.168.101.183 ( pcmk-4 -> pcmk-1 ) * Move rsc_pcmk-4 ( pcmk-4 -> pcmk-2 ) * Move lsb-dummy ( pcmk-4 -> pcmk-1 ) * Migrate migrator ( pcmk-1 -> pcmk-3 ) * Stop ping-1:0 ( pcmk-4 ) due to node availability - * Stop stateful-1:0 ( Promoted pcmk-4 ) due to node availability + * Stop stateful-1:0 ( Promoted pcmk-4 ) due to node availability * Promote stateful-1:1 ( Unpromoted -> Promoted pcmk-1 ) Executing Cluster Transition: * Pseudo action: Fencing_stop_0 * Resource action: stateful-1:3 monitor=15000 on pcmk-3 * Resource action: stateful-1:2 monitor=15000 on pcmk-2 * Fencing pcmk-4 (reboot) * Pseudo action: FencingChild:0_stop_0 * Pseudo action: Fencing_stopped_0 * Pseudo action: rsc_pcmk-4_stop_0 * Pseudo action: lsb-dummy_stop_0 * Resource action: migrator migrate_to on pcmk-1 * Pseudo action: Connectivity_stop_0 * Pseudo action: group-1_stop_0 * Pseudo action: r192.168.101.183_stop_0 * Resource action: rsc_pcmk-4 start on pcmk-2 * Resource action: migrator migrate_from on pcmk-3 * Resource action: migrator stop on pcmk-1 * Pseudo action: ping-1:0_stop_0 * Pseudo action: Connectivity_stopped_0 * Pseudo action: r192.168.101.182_stop_0 * Resource action: rsc_pcmk-4 monitor=5000 on pcmk-2 * Pseudo action: migrator_start_0 * Pseudo action: r192.168.101.181_stop_0 * Resource action: migrator monitor=10000 on pcmk-3 * Pseudo action: group-1_stopped_0 * Pseudo action: master-1_demote_0 * Pseudo action: stateful-1:0_demote_0 * Pseudo action: master-1_demoted_0 * Pseudo action: master-1_stop_0 * Pseudo action: stateful-1:0_stop_0 * Pseudo action: master-1_stopped_0 * Pseudo action: master-1_promote_0 * Resource action: stateful-1:1 promote on pcmk-1 * Pseudo action: master-1_promoted_0 * Pseudo action: group-1_start_0 * Resource action: r192.168.101.181 start on pcmk-1 * Resource action: r192.168.101.182 start on pcmk-1 * Resource action: r192.168.101.183 start on pcmk-1 * Resource action: stateful-1:1 monitor=16000 on pcmk-1 * Pseudo action: group-1_running_0 * Resource action: r192.168.101.181 monitor=5000 on pcmk-1 * Resource action: r192.168.101.182 monitor=5000 on pcmk-1 * Resource action: r192.168.101.183 monitor=5000 on pcmk-1 * Resource action: lsb-dummy start on pcmk-1 * Resource action: lsb-dummy monitor=5000 on pcmk-1 Revised Cluster Status: * Node List: * Online: [ pcmk-1 pcmk-2 pcmk-3 ] * OFFLINE: [ pcmk-4 ] * Full List of Resources: * Clone Set: Fencing [FencingChild]: * Started: [ pcmk-1 pcmk-2 pcmk-3 ] * Stopped: [ pcmk-4 ] * Resource Group: group-1: * r192.168.101.181 (ocf:heartbeat:IPaddr): Started pcmk-1 * r192.168.101.182 (ocf:heartbeat:IPaddr): Started pcmk-1 * r192.168.101.183 (ocf:heartbeat:IPaddr): Started pcmk-1 * rsc_pcmk-1 (ocf:heartbeat:IPaddr): Started pcmk-1 * rsc_pcmk-2 (ocf:heartbeat:IPaddr): Started pcmk-2 * rsc_pcmk-3 (ocf:heartbeat:IPaddr): Started pcmk-3 * rsc_pcmk-4 (ocf:heartbeat:IPaddr): Started pcmk-2 * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-1 * migrator (ocf:pacemaker:Dummy): Started pcmk-3 * Clone Set: Connectivity [ping-1]: * Started: [ pcmk-1 pcmk-2 pcmk-3 ] * Stopped: [ pcmk-4 ] * Clone Set: master-1 [stateful-1] (promotable): * Promoted: [ pcmk-1 ] * Unpromoted: [ pcmk-2 pcmk-3 ] * Stopped: [ pcmk-4 ] diff --git a/cts/scheduler/summary/migrate-shutdown.summary b/cts/scheduler/summary/migrate-shutdown.summary index 551a41a175..1da9db21e8 100644 --- a/cts/scheduler/summary/migrate-shutdown.summary +++ b/cts/scheduler/summary/migrate-shutdown.summary @@ -1,92 +1,92 @@ Current cluster status: * Node List: * Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started pcmk-1 * Resource Group: group-1: * r192.168.122.105 (ocf:heartbeat:IPaddr): Started pcmk-2 * r192.168.122.106 (ocf:heartbeat:IPaddr): Started pcmk-2 * r192.168.122.107 (ocf:heartbeat:IPaddr): Started pcmk-2 * rsc_pcmk-1 (ocf:heartbeat:IPaddr): Started pcmk-1 * rsc_pcmk-2 (ocf:heartbeat:IPaddr): Started pcmk-2 * rsc_pcmk-3 (ocf:heartbeat:IPaddr): Stopped * rsc_pcmk-4 (ocf:heartbeat:IPaddr): Started pcmk-4 * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-2 * migrator (ocf:pacemaker:Dummy): Started pcmk-1 * Clone Set: Connectivity [ping-1]: * Started: [ pcmk-1 pcmk-2 pcmk-4 ] * Stopped: [ pcmk-3 ] * Clone Set: master-1 [stateful-1] (promotable): * Promoted: [ pcmk-2 ] * Unpromoted: [ pcmk-1 pcmk-4 ] * Stopped: [ pcmk-3 ] Transition Summary: * Stop Fencing ( pcmk-1 ) due to node availability * Stop r192.168.122.105 ( pcmk-2 ) due to node availability * Stop r192.168.122.106 ( pcmk-2 ) due to node availability * Stop r192.168.122.107 ( pcmk-2 ) due to node availability * Stop rsc_pcmk-1 ( pcmk-1 ) due to node availability * Stop rsc_pcmk-2 ( pcmk-2 ) due to node availability * Stop rsc_pcmk-4 ( pcmk-4 ) due to node availability * Stop lsb-dummy ( pcmk-2 ) due to node availability * Stop migrator ( pcmk-1 ) due to node availability * Stop ping-1:0 ( pcmk-1 ) due to node availability * Stop ping-1:1 ( pcmk-2 ) due to node availability * Stop ping-1:2 ( pcmk-4 ) due to node availability - * Stop stateful-1:0 ( Unpromoted pcmk-1 ) due to node availability - * Stop stateful-1:1 ( Promoted pcmk-2 ) due to node availability - * Stop stateful-1:2 ( Unpromoted pcmk-4 ) due to node availability + * Stop stateful-1:0 ( Unpromoted pcmk-1 ) due to node availability + * Stop stateful-1:1 ( Promoted pcmk-2 ) due to node availability + * Stop stateful-1:2 ( Unpromoted pcmk-4 ) due to node availability Executing Cluster Transition: * Resource action: Fencing stop on pcmk-1 * Resource action: rsc_pcmk-1 stop on pcmk-1 * Resource action: rsc_pcmk-2 stop on pcmk-2 * Resource action: rsc_pcmk-4 stop on pcmk-4 * Resource action: lsb-dummy stop on pcmk-2 * Resource action: migrator stop on pcmk-1 * Resource action: migrator stop on pcmk-3 * Pseudo action: Connectivity_stop_0 * Cluster action: do_shutdown on pcmk-3 * Pseudo action: group-1_stop_0 * Resource action: r192.168.122.107 stop on pcmk-2 * Resource action: ping-1:0 stop on pcmk-1 * Resource action: ping-1:1 stop on pcmk-2 * Resource action: ping-1:3 stop on pcmk-4 * Pseudo action: Connectivity_stopped_0 * Resource action: r192.168.122.106 stop on pcmk-2 * Resource action: r192.168.122.105 stop on pcmk-2 * Pseudo action: group-1_stopped_0 * Pseudo action: master-1_demote_0 * Resource action: stateful-1:0 demote on pcmk-2 * Pseudo action: master-1_demoted_0 * Pseudo action: master-1_stop_0 * Resource action: stateful-1:2 stop on pcmk-1 * Resource action: stateful-1:0 stop on pcmk-2 * Resource action: stateful-1:3 stop on pcmk-4 * Pseudo action: master-1_stopped_0 * Cluster action: do_shutdown on pcmk-4 * Cluster action: do_shutdown on pcmk-2 * Cluster action: do_shutdown on pcmk-1 Revised Cluster Status: * Node List: * Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Stopped * Resource Group: group-1: * r192.168.122.105 (ocf:heartbeat:IPaddr): Stopped * r192.168.122.106 (ocf:heartbeat:IPaddr): Stopped * r192.168.122.107 (ocf:heartbeat:IPaddr): Stopped * rsc_pcmk-1 (ocf:heartbeat:IPaddr): Stopped * rsc_pcmk-2 (ocf:heartbeat:IPaddr): Stopped * rsc_pcmk-3 (ocf:heartbeat:IPaddr): Stopped * rsc_pcmk-4 (ocf:heartbeat:IPaddr): Stopped * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Stopped * migrator (ocf:pacemaker:Dummy): Stopped * Clone Set: Connectivity [ping-1]: * Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] * Clone Set: master-1 [stateful-1] (promotable): * Stopped: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] diff --git a/cts/scheduler/summary/no-promote-on-unrunnable-guest.summary b/cts/scheduler/summary/no-promote-on-unrunnable-guest.summary index 532f731235..8eb68a4cb9 100644 --- a/cts/scheduler/summary/no-promote-on-unrunnable-guest.summary +++ b/cts/scheduler/summary/no-promote-on-unrunnable-guest.summary @@ -1,103 +1,103 @@ Using the original execution date of: 2020-05-14 10:49:31Z Current cluster status: * Node List: * Online: [ controller-0 controller-1 controller-2 ] * GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 ovn-dbs-bundle-0@controller-0 ovn-dbs-bundle-1@controller-1 ovn-dbs-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 rabbitmq-bundle-2@controller-2 redis-bundle-0@controller-0 redis-bundle-1@controller-1 redis-bundle-2@controller-2 ] * Full List of Resources: * Container bundle set: galera-bundle [cluster.common.tag/rhosp16-openstack-mariadb:pcmklatest]: * galera-bundle-0 (ocf:heartbeat:galera): Promoted controller-0 * galera-bundle-1 (ocf:heartbeat:galera): Promoted controller-1 * galera-bundle-2 (ocf:heartbeat:galera): Promoted controller-2 * Container bundle set: rabbitmq-bundle [cluster.common.tag/rhosp16-openstack-rabbitmq:pcmklatest]: * rabbitmq-bundle-0 (ocf:heartbeat:rabbitmq-cluster): Started controller-0 * rabbitmq-bundle-1 (ocf:heartbeat:rabbitmq-cluster): Started controller-1 * rabbitmq-bundle-2 (ocf:heartbeat:rabbitmq-cluster): Started controller-2 * Container bundle set: redis-bundle [cluster.common.tag/rhosp16-openstack-redis:pcmklatest]: * redis-bundle-0 (ocf:heartbeat:redis): Promoted controller-0 * redis-bundle-1 (ocf:heartbeat:redis): Unpromoted controller-1 * redis-bundle-2 (ocf:heartbeat:redis): Unpromoted controller-2 * Container bundle set: ovn-dbs-bundle [cluster.common.tag/rhosp16-openstack-ovn-northd:pcmklatest]: * ovn-dbs-bundle-0 (ocf:ovn:ovndb-servers): Unpromoted controller-0 * ovn-dbs-bundle-1 (ocf:ovn:ovndb-servers): Unpromoted controller-1 * ovn-dbs-bundle-2 (ocf:ovn:ovndb-servers): Unpromoted controller-2 * stonith-fence_ipmilan-5254005e097a (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400afe30e (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-525400985679 (stonith:fence_ipmilan): Started controller-1 * Container bundle: openstack-cinder-volume [cluster.common.tag/rhosp16-openstack-cinder-volume:pcmklatest]: * openstack-cinder-volume-podman-0 (ocf:heartbeat:podman): Started controller-0 Transition Summary: * Stop ovn-dbs-bundle-podman-0 ( controller-0 ) due to node availability * Stop ovn-dbs-bundle-0 ( controller-0 ) due to unrunnable ovn-dbs-bundle-podman-0 start - * Stop ovndb_servers:0 ( Unpromoted ovn-dbs-bundle-0 ) due to unrunnable ovn-dbs-bundle-podman-0 start + * Stop ovndb_servers:0 ( Unpromoted ovn-dbs-bundle-0 ) due to unrunnable ovn-dbs-bundle-podman-0 start * Promote ovndb_servers:1 ( Unpromoted -> Promoted ovn-dbs-bundle-1 ) Executing Cluster Transition: * Resource action: ovndb_servers cancel=30000 on ovn-dbs-bundle-1 * Pseudo action: ovn-dbs-bundle-master_pre_notify_stop_0 * Pseudo action: ovn-dbs-bundle_stop_0 * Resource action: ovndb_servers notify on ovn-dbs-bundle-0 * Resource action: ovndb_servers notify on ovn-dbs-bundle-1 * Resource action: ovndb_servers notify on ovn-dbs-bundle-2 * Pseudo action: ovn-dbs-bundle-master_confirmed-pre_notify_stop_0 * Pseudo action: ovn-dbs-bundle-master_stop_0 * Resource action: ovndb_servers stop on ovn-dbs-bundle-0 * Pseudo action: ovn-dbs-bundle-master_stopped_0 * Resource action: ovn-dbs-bundle-0 stop on controller-0 * Pseudo action: ovn-dbs-bundle-master_post_notify_stopped_0 * Resource action: ovn-dbs-bundle-podman-0 stop on controller-0 * Resource action: ovndb_servers notify on ovn-dbs-bundle-1 * Resource action: ovndb_servers notify on ovn-dbs-bundle-2 * Pseudo action: ovn-dbs-bundle-master_confirmed-post_notify_stopped_0 * Pseudo action: ovn-dbs-bundle-master_pre_notify_start_0 * Pseudo action: ovn-dbs-bundle_stopped_0 * Pseudo action: ovn-dbs-bundle-master_confirmed-pre_notify_start_0 * Pseudo action: ovn-dbs-bundle-master_start_0 * Pseudo action: ovn-dbs-bundle-master_running_0 * Pseudo action: ovn-dbs-bundle-master_post_notify_running_0 * Pseudo action: ovn-dbs-bundle-master_confirmed-post_notify_running_0 * Pseudo action: ovn-dbs-bundle_running_0 * Pseudo action: ovn-dbs-bundle-master_pre_notify_promote_0 * Pseudo action: ovn-dbs-bundle_promote_0 * Resource action: ovndb_servers notify on ovn-dbs-bundle-1 * Resource action: ovndb_servers notify on ovn-dbs-bundle-2 * Pseudo action: ovn-dbs-bundle-master_confirmed-pre_notify_promote_0 * Pseudo action: ovn-dbs-bundle-master_promote_0 * Resource action: ovndb_servers promote on ovn-dbs-bundle-1 * Pseudo action: ovn-dbs-bundle-master_promoted_0 * Pseudo action: ovn-dbs-bundle-master_post_notify_promoted_0 * Resource action: ovndb_servers notify on ovn-dbs-bundle-1 * Resource action: ovndb_servers notify on ovn-dbs-bundle-2 * Pseudo action: ovn-dbs-bundle-master_confirmed-post_notify_promoted_0 * Pseudo action: ovn-dbs-bundle_promoted_0 * Resource action: ovndb_servers monitor=10000 on ovn-dbs-bundle-1 Using the original execution date of: 2020-05-14 10:49:31Z Revised Cluster Status: * Node List: * Online: [ controller-0 controller-1 controller-2 ] * GuestOnline: [ galera-bundle-0@controller-0 galera-bundle-1@controller-1 galera-bundle-2@controller-2 ovn-dbs-bundle-1@controller-1 ovn-dbs-bundle-2@controller-2 rabbitmq-bundle-0@controller-0 rabbitmq-bundle-1@controller-1 rabbitmq-bundle-2@controller-2 redis-bundle-0@controller-0 redis-bundle-1@controller-1 redis-bundle-2@controller-2 ] * Full List of Resources: * Container bundle set: galera-bundle [cluster.common.tag/rhosp16-openstack-mariadb:pcmklatest]: * galera-bundle-0 (ocf:heartbeat:galera): Promoted controller-0 * galera-bundle-1 (ocf:heartbeat:galera): Promoted controller-1 * galera-bundle-2 (ocf:heartbeat:galera): Promoted controller-2 * Container bundle set: rabbitmq-bundle [cluster.common.tag/rhosp16-openstack-rabbitmq:pcmklatest]: * rabbitmq-bundle-0 (ocf:heartbeat:rabbitmq-cluster): Started controller-0 * rabbitmq-bundle-1 (ocf:heartbeat:rabbitmq-cluster): Started controller-1 * rabbitmq-bundle-2 (ocf:heartbeat:rabbitmq-cluster): Started controller-2 * Container bundle set: redis-bundle [cluster.common.tag/rhosp16-openstack-redis:pcmklatest]: * redis-bundle-0 (ocf:heartbeat:redis): Promoted controller-0 * redis-bundle-1 (ocf:heartbeat:redis): Unpromoted controller-1 * redis-bundle-2 (ocf:heartbeat:redis): Unpromoted controller-2 * Container bundle set: ovn-dbs-bundle [cluster.common.tag/rhosp16-openstack-ovn-northd:pcmklatest]: * ovn-dbs-bundle-0 (ocf:ovn:ovndb-servers): Stopped * ovn-dbs-bundle-1 (ocf:ovn:ovndb-servers): Promoted controller-1 * ovn-dbs-bundle-2 (ocf:ovn:ovndb-servers): Unpromoted controller-2 * stonith-fence_ipmilan-5254005e097a (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400afe30e (stonith:fence_ipmilan): Started controller-2 * stonith-fence_ipmilan-525400985679 (stonith:fence_ipmilan): Started controller-1 * Container bundle: openstack-cinder-volume [cluster.common.tag/rhosp16-openstack-cinder-volume:pcmklatest]: * openstack-cinder-volume-podman-0 (ocf:heartbeat:podman): Started controller-0 diff --git a/cts/scheduler/summary/novell-239082.summary b/cts/scheduler/summary/novell-239082.summary index 431b6ddc63..01af7656e9 100644 --- a/cts/scheduler/summary/novell-239082.summary +++ b/cts/scheduler/summary/novell-239082.summary @@ -1,59 +1,59 @@ Current cluster status: * Node List: * Online: [ xen-1 xen-2 ] * Full List of Resources: * fs_1 (ocf:heartbeat:Filesystem): Started xen-1 * Clone Set: ms-drbd0 [drbd0] (promotable): * Promoted: [ xen-1 ] * Unpromoted: [ xen-2 ] Transition Summary: * Move fs_1 ( xen-1 -> xen-2 ) * Promote drbd0:0 ( Unpromoted -> Promoted xen-2 ) - * Stop drbd0:1 ( Promoted xen-1 ) due to node availability + * Stop drbd0:1 ( Promoted xen-1 ) due to node availability Executing Cluster Transition: * Resource action: fs_1 stop on xen-1 * Pseudo action: ms-drbd0_pre_notify_demote_0 * Resource action: drbd0:0 notify on xen-2 * Resource action: drbd0:1 notify on xen-1 * Pseudo action: ms-drbd0_confirmed-pre_notify_demote_0 * Pseudo action: ms-drbd0_demote_0 * Resource action: drbd0:1 demote on xen-1 * Pseudo action: ms-drbd0_demoted_0 * Pseudo action: ms-drbd0_post_notify_demoted_0 * Resource action: drbd0:0 notify on xen-2 * Resource action: drbd0:1 notify on xen-1 * Pseudo action: ms-drbd0_confirmed-post_notify_demoted_0 * Pseudo action: ms-drbd0_pre_notify_stop_0 * Resource action: drbd0:0 notify on xen-2 * Resource action: drbd0:1 notify on xen-1 * Pseudo action: ms-drbd0_confirmed-pre_notify_stop_0 * Pseudo action: ms-drbd0_stop_0 * Resource action: drbd0:1 stop on xen-1 * Pseudo action: ms-drbd0_stopped_0 * Cluster action: do_shutdown on xen-1 * Pseudo action: ms-drbd0_post_notify_stopped_0 * Resource action: drbd0:0 notify on xen-2 * Pseudo action: ms-drbd0_confirmed-post_notify_stopped_0 * Pseudo action: ms-drbd0_pre_notify_promote_0 * Resource action: drbd0:0 notify on xen-2 * Pseudo action: ms-drbd0_confirmed-pre_notify_promote_0 * Pseudo action: ms-drbd0_promote_0 * Resource action: drbd0:0 promote on xen-2 * Pseudo action: ms-drbd0_promoted_0 * Pseudo action: ms-drbd0_post_notify_promoted_0 * Resource action: drbd0:0 notify on xen-2 * Pseudo action: ms-drbd0_confirmed-post_notify_promoted_0 * Resource action: fs_1 start on xen-2 Revised Cluster Status: * Node List: * Online: [ xen-1 xen-2 ] * Full List of Resources: * fs_1 (ocf:heartbeat:Filesystem): Started xen-2 * Clone Set: ms-drbd0 [drbd0] (promotable): * Promoted: [ xen-2 ] * Stopped: [ xen-1 ] diff --git a/cts/scheduler/summary/on_fail_demote4.summary b/cts/scheduler/summary/on_fail_demote4.summary index 781f5488bb..b7b1388e58 100644 --- a/cts/scheduler/summary/on_fail_demote4.summary +++ b/cts/scheduler/summary/on_fail_demote4.summary @@ -1,189 +1,189 @@ Using the original execution date of: 2020-06-16 19:23:21Z Current cluster status: * Node List: * RemoteNode remote-rhel7-2: UNCLEAN (offline) * Node rhel7-4: UNCLEAN (offline) * Online: [ rhel7-1 rhel7-3 rhel7-5 ] * GuestOnline: [ lxc1@rhel7-3 stateful-bundle-1@rhel7-1 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started rhel7-4 (UNCLEAN) * Clone Set: rsc1-clone [rsc1] (promotable): * rsc1 (ocf:pacemaker:Stateful): Promoted rhel7-4 (UNCLEAN) * rsc1 (ocf:pacemaker:Stateful): Unpromoted remote-rhel7-2 (UNCLEAN) * Unpromoted: [ lxc1 rhel7-1 rhel7-3 rhel7-5 ] * Clone Set: rsc2-master [rsc2] (promotable): * rsc2 (ocf:pacemaker:Stateful): Unpromoted rhel7-4 (UNCLEAN) * rsc2 (ocf:pacemaker:Stateful): Promoted remote-rhel7-2 (UNCLEAN) * Unpromoted: [ lxc1 rhel7-1 rhel7-3 rhel7-5 ] * remote-rhel7-2 (ocf:pacemaker:remote): FAILED rhel7-1 * container1 (ocf:heartbeat:VirtualDomain): Started rhel7-3 * container2 (ocf:heartbeat:VirtualDomain): FAILED rhel7-3 * Clone Set: lxc-ms-master [lxc-ms] (promotable): * Unpromoted: [ lxc1 ] * Stopped: [ remote-rhel7-2 rhel7-1 rhel7-3 rhel7-4 rhel7-5 ] * Container bundle set: stateful-bundle [pcmktest:http]: * stateful-bundle-0 (192.168.122.131) (ocf:pacemaker:Stateful): FAILED Promoted rhel7-5 * stateful-bundle-1 (192.168.122.132) (ocf:pacemaker:Stateful): Unpromoted rhel7-1 * stateful-bundle-2 (192.168.122.133) (ocf:pacemaker:Stateful): FAILED rhel7-4 (UNCLEAN) Transition Summary: * Fence (reboot) stateful-bundle-2 (resource: stateful-bundle-docker-2) 'guest is unclean' * Fence (reboot) stateful-bundle-0 (resource: stateful-bundle-docker-0) 'guest is unclean' * Fence (reboot) lxc2 (resource: container2) 'guest is unclean' * Fence (reboot) remote-rhel7-2 'remote connection is unrecoverable' * Fence (reboot) rhel7-4 'peer is no longer part of the cluster' * Move Fencing ( rhel7-4 -> rhel7-5 ) - * Stop rsc1:0 ( Promoted rhel7-4 ) due to node availability - * Promote rsc1:1 ( Unpromoted -> Promoted rhel7-3 ) - * Stop rsc1:4 ( Unpromoted remote-rhel7-2 ) due to node availability - * Recover rsc1:5 ( Unpromoted lxc2 ) - * Stop rsc2:0 ( Unpromoted rhel7-4 ) due to node availability - * Promote rsc2:1 ( Unpromoted -> Promoted rhel7-3 ) - * Stop rsc2:4 ( Promoted remote-rhel7-2 ) due to node availability - * Recover rsc2:5 ( Unpromoted lxc2 ) + * Stop rsc1:0 ( Promoted rhel7-4 ) due to node availability + * Promote rsc1:1 ( Unpromoted -> Promoted rhel7-3 ) + * Stop rsc1:4 ( Unpromoted remote-rhel7-2 ) due to node availability + * Recover rsc1:5 ( Unpromoted lxc2 ) + * Stop rsc2:0 ( Unpromoted rhel7-4 ) due to node availability + * Promote rsc2:1 ( Unpromoted -> Promoted rhel7-3 ) + * Stop rsc2:4 ( Promoted remote-rhel7-2 ) due to node availability + * Recover rsc2:5 ( Unpromoted lxc2 ) * Recover remote-rhel7-2 ( rhel7-1 ) * Recover container2 ( rhel7-3 ) - * Recover lxc-ms:0 ( Promoted lxc2 ) + * Recover lxc-ms:0 ( Promoted lxc2 ) * Recover stateful-bundle-docker-0 ( rhel7-5 ) * Restart stateful-bundle-0 ( rhel7-5 ) due to required stateful-bundle-docker-0 start - * Recover bundled:0 ( Promoted stateful-bundle-0 ) + * Recover bundled:0 ( Promoted stateful-bundle-0 ) * Move stateful-bundle-ip-192.168.122.133 ( rhel7-4 -> rhel7-3 ) * Recover stateful-bundle-docker-2 ( rhel7-4 -> rhel7-3 ) * Move stateful-bundle-2 ( rhel7-4 -> rhel7-3 ) - * Recover bundled:2 ( Unpromoted stateful-bundle-2 ) + * Recover bundled:2 ( Unpromoted stateful-bundle-2 ) * Restart lxc2 ( rhel7-3 ) due to required container2 start Executing Cluster Transition: * Pseudo action: Fencing_stop_0 * Resource action: rsc1 cancel=11000 on rhel7-3 * Pseudo action: rsc1-clone_demote_0 * Resource action: rsc2 cancel=11000 on rhel7-3 * Pseudo action: rsc2-master_demote_0 * Pseudo action: lxc-ms-master_demote_0 * Resource action: stateful-bundle-0 stop on rhel7-5 * Pseudo action: stateful-bundle-2_stop_0 * Resource action: lxc2 stop on rhel7-3 * Pseudo action: stateful-bundle_demote_0 * Fencing remote-rhel7-2 (reboot) * Fencing rhel7-4 (reboot) * Pseudo action: rsc1_demote_0 * Pseudo action: rsc1-clone_demoted_0 * Pseudo action: rsc2_demote_0 * Pseudo action: rsc2-master_demoted_0 * Resource action: container2 stop on rhel7-3 * Pseudo action: stateful-bundle-master_demote_0 * Pseudo action: stonith-stateful-bundle-2-reboot on stateful-bundle-2 * Pseudo action: stonith-lxc2-reboot on lxc2 * Resource action: Fencing start on rhel7-5 * Pseudo action: rsc1-clone_stop_0 * Pseudo action: rsc2-master_stop_0 * Pseudo action: lxc-ms_demote_0 * Pseudo action: lxc-ms-master_demoted_0 * Pseudo action: lxc-ms-master_stop_0 * Pseudo action: bundled_demote_0 * Pseudo action: stateful-bundle-master_demoted_0 * Pseudo action: stateful-bundle_demoted_0 * Pseudo action: stateful-bundle_stop_0 * Resource action: Fencing monitor=120000 on rhel7-5 * Pseudo action: rsc1_stop_0 * Pseudo action: rsc1_stop_0 * Pseudo action: rsc1_stop_0 * Pseudo action: rsc1-clone_stopped_0 * Pseudo action: rsc1-clone_start_0 * Pseudo action: rsc2_stop_0 * Pseudo action: rsc2_stop_0 * Pseudo action: rsc2_stop_0 * Pseudo action: rsc2-master_stopped_0 * Pseudo action: rsc2-master_start_0 * Resource action: remote-rhel7-2 stop on rhel7-1 * Pseudo action: lxc-ms_stop_0 * Pseudo action: lxc-ms-master_stopped_0 * Pseudo action: lxc-ms-master_start_0 * Resource action: stateful-bundle-docker-0 stop on rhel7-5 * Pseudo action: stateful-bundle-docker-2_stop_0 * Pseudo action: stonith-stateful-bundle-0-reboot on stateful-bundle-0 * Resource action: remote-rhel7-2 start on rhel7-1 * Resource action: remote-rhel7-2 monitor=60000 on rhel7-1 * Resource action: container2 start on rhel7-3 * Resource action: container2 monitor=20000 on rhel7-3 * Pseudo action: stateful-bundle-master_stop_0 * Pseudo action: stateful-bundle-ip-192.168.122.133_stop_0 * Resource action: lxc2 start on rhel7-3 * Resource action: lxc2 monitor=30000 on rhel7-3 * Resource action: rsc1 start on lxc2 * Pseudo action: rsc1-clone_running_0 * Resource action: rsc2 start on lxc2 * Pseudo action: rsc2-master_running_0 * Resource action: lxc-ms start on lxc2 * Pseudo action: lxc-ms-master_running_0 * Pseudo action: bundled_stop_0 * Resource action: stateful-bundle-ip-192.168.122.133 start on rhel7-3 * Resource action: rsc1 monitor=11000 on lxc2 * Pseudo action: rsc1-clone_promote_0 * Resource action: rsc2 monitor=11000 on lxc2 * Pseudo action: rsc2-master_promote_0 * Pseudo action: lxc-ms-master_promote_0 * Pseudo action: bundled_stop_0 * Pseudo action: stateful-bundle-master_stopped_0 * Resource action: stateful-bundle-ip-192.168.122.133 monitor=60000 on rhel7-3 * Pseudo action: stateful-bundle_stopped_0 * Pseudo action: stateful-bundle_start_0 * Resource action: rsc1 promote on rhel7-3 * Pseudo action: rsc1-clone_promoted_0 * Resource action: rsc2 promote on rhel7-3 * Pseudo action: rsc2-master_promoted_0 * Resource action: lxc-ms promote on lxc2 * Pseudo action: lxc-ms-master_promoted_0 * Pseudo action: stateful-bundle-master_start_0 * Resource action: stateful-bundle-docker-0 start on rhel7-5 * Resource action: stateful-bundle-docker-0 monitor=60000 on rhel7-5 * Resource action: stateful-bundle-0 start on rhel7-5 * Resource action: stateful-bundle-0 monitor=30000 on rhel7-5 * Resource action: stateful-bundle-docker-2 start on rhel7-3 * Resource action: stateful-bundle-2 start on rhel7-3 * Resource action: rsc1 monitor=10000 on rhel7-3 * Resource action: rsc2 monitor=10000 on rhel7-3 * Resource action: lxc-ms monitor=10000 on lxc2 * Resource action: bundled start on stateful-bundle-0 * Resource action: bundled start on stateful-bundle-2 * Pseudo action: stateful-bundle-master_running_0 * Resource action: stateful-bundle-docker-2 monitor=60000 on rhel7-3 * Resource action: stateful-bundle-2 monitor=30000 on rhel7-3 * Pseudo action: stateful-bundle_running_0 * Resource action: bundled monitor=11000 on stateful-bundle-2 * Pseudo action: stateful-bundle_promote_0 * Pseudo action: stateful-bundle-master_promote_0 * Resource action: bundled promote on stateful-bundle-0 * Pseudo action: stateful-bundle-master_promoted_0 * Pseudo action: stateful-bundle_promoted_0 * Resource action: bundled monitor=10000 on stateful-bundle-0 Using the original execution date of: 2020-06-16 19:23:21Z Revised Cluster Status: * Node List: * Online: [ rhel7-1 rhel7-3 rhel7-5 ] * OFFLINE: [ rhel7-4 ] * RemoteOnline: [ remote-rhel7-2 ] * GuestOnline: [ lxc1@rhel7-3 lxc2@rhel7-3 stateful-bundle-0@rhel7-5 stateful-bundle-1@rhel7-1 stateful-bundle-2@rhel7-3 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started rhel7-5 * Clone Set: rsc1-clone [rsc1] (promotable): * Promoted: [ rhel7-3 ] * Unpromoted: [ lxc1 lxc2 rhel7-1 rhel7-5 ] * Stopped: [ remote-rhel7-2 rhel7-4 ] * Clone Set: rsc2-master [rsc2] (promotable): * Promoted: [ rhel7-3 ] * Unpromoted: [ lxc1 lxc2 rhel7-1 rhel7-5 ] * Stopped: [ remote-rhel7-2 rhel7-4 ] * remote-rhel7-2 (ocf:pacemaker:remote): Started rhel7-1 * container1 (ocf:heartbeat:VirtualDomain): Started rhel7-3 * container2 (ocf:heartbeat:VirtualDomain): Started rhel7-3 * Clone Set: lxc-ms-master [lxc-ms] (promotable): * Promoted: [ lxc2 ] * Unpromoted: [ lxc1 ] * Container bundle set: stateful-bundle [pcmktest:http]: * stateful-bundle-0 (192.168.122.131) (ocf:pacemaker:Stateful): Promoted rhel7-5 * stateful-bundle-1 (192.168.122.132) (ocf:pacemaker:Stateful): Unpromoted rhel7-1 * stateful-bundle-2 (192.168.122.133) (ocf:pacemaker:Stateful): Unpromoted rhel7-3 diff --git a/cts/scheduler/summary/probe-2.summary b/cts/scheduler/summary/probe-2.summary index f2c60821ab..3523891d30 100644 --- a/cts/scheduler/summary/probe-2.summary +++ b/cts/scheduler/summary/probe-2.summary @@ -1,163 +1,163 @@ Current cluster status: * Node List: * Node wc02: standby (with active resources) * Online: [ wc01 ] * Full List of Resources: * Resource Group: group_www_data: * fs_www_data (ocf:heartbeat:Filesystem): Started wc01 * nfs-kernel-server (lsb:nfs-kernel-server): Started wc01 * intip_nfs (ocf:heartbeat:IPaddr2): Started wc01 * Clone Set: ms_drbd_mysql [drbd_mysql] (promotable): * Promoted: [ wc02 ] * Unpromoted: [ wc01 ] * Resource Group: group_mysql: * fs_mysql (ocf:heartbeat:Filesystem): Started wc02 * intip_sql (ocf:heartbeat:IPaddr2): Started wc02 * mysql-server (ocf:heartbeat:mysql): Started wc02 * Clone Set: ms_drbd_www [drbd_www] (promotable): * Promoted: [ wc01 ] * Unpromoted: [ wc02 ] * Clone Set: clone_nfs-common [group_nfs-common]: * Started: [ wc01 wc02 ] * Clone Set: clone_mysql-proxy [group_mysql-proxy]: * Started: [ wc01 wc02 ] * Clone Set: clone_webservice [group_webservice]: * Started: [ wc01 wc02 ] * Resource Group: group_ftpd: * extip_ftp (ocf:heartbeat:IPaddr2): Started wc01 * pure-ftpd (ocf:heartbeat:Pure-FTPd): Started wc01 * Clone Set: DoFencing [stonith_rackpdu] (unique): * stonith_rackpdu:0 (stonith:external/rackpdu): Started wc01 * stonith_rackpdu:1 (stonith:external/rackpdu): Started wc02 Transition Summary: * Promote drbd_mysql:0 ( Unpromoted -> Promoted wc01 ) - * Stop drbd_mysql:1 ( Promoted wc02 ) due to node availability + * Stop drbd_mysql:1 ( Promoted wc02 ) due to node availability * Move fs_mysql ( wc02 -> wc01 ) * Move intip_sql ( wc02 -> wc01 ) * Move mysql-server ( wc02 -> wc01 ) - * Stop drbd_www:1 ( Unpromoted wc02 ) due to node availability + * Stop drbd_www:1 ( Unpromoted wc02 ) due to node availability * Stop nfs-common:1 ( wc02 ) due to node availability * Stop mysql-proxy:1 ( wc02 ) due to node availability * Stop fs_www:1 ( wc02 ) due to node availability * Stop apache2:1 ( wc02 ) due to node availability * Restart stonith_rackpdu:0 ( wc01 ) * Stop stonith_rackpdu:1 ( wc02 ) due to node availability Executing Cluster Transition: * Resource action: drbd_mysql:0 cancel=10000 on wc01 * Pseudo action: ms_drbd_mysql_pre_notify_demote_0 * Pseudo action: group_mysql_stop_0 * Resource action: mysql-server stop on wc02 * Pseudo action: ms_drbd_www_pre_notify_stop_0 * Pseudo action: clone_mysql-proxy_stop_0 * Pseudo action: clone_webservice_stop_0 * Pseudo action: DoFencing_stop_0 * Resource action: drbd_mysql:0 notify on wc01 * Resource action: drbd_mysql:1 notify on wc02 * Pseudo action: ms_drbd_mysql_confirmed-pre_notify_demote_0 * Resource action: intip_sql stop on wc02 * Resource action: drbd_www:0 notify on wc01 * Resource action: drbd_www:1 notify on wc02 * Pseudo action: ms_drbd_www_confirmed-pre_notify_stop_0 * Pseudo action: ms_drbd_www_stop_0 * Pseudo action: group_mysql-proxy:1_stop_0 * Resource action: mysql-proxy:1 stop on wc02 * Pseudo action: group_webservice:1_stop_0 * Resource action: apache2:1 stop on wc02 * Resource action: stonith_rackpdu:0 stop on wc01 * Resource action: stonith_rackpdu:1 stop on wc02 * Pseudo action: DoFencing_stopped_0 * Pseudo action: DoFencing_start_0 * Resource action: fs_mysql stop on wc02 * Resource action: drbd_www:1 stop on wc02 * Pseudo action: ms_drbd_www_stopped_0 * Pseudo action: group_mysql-proxy:1_stopped_0 * Pseudo action: clone_mysql-proxy_stopped_0 * Resource action: fs_www:1 stop on wc02 * Resource action: stonith_rackpdu:0 start on wc01 * Pseudo action: DoFencing_running_0 * Pseudo action: group_mysql_stopped_0 * Pseudo action: ms_drbd_www_post_notify_stopped_0 * Pseudo action: group_webservice:1_stopped_0 * Pseudo action: clone_webservice_stopped_0 * Resource action: stonith_rackpdu:0 monitor=5000 on wc01 * Pseudo action: ms_drbd_mysql_demote_0 * Resource action: drbd_www:0 notify on wc01 * Pseudo action: ms_drbd_www_confirmed-post_notify_stopped_0 * Pseudo action: clone_nfs-common_stop_0 * Resource action: drbd_mysql:1 demote on wc02 * Pseudo action: ms_drbd_mysql_demoted_0 * Pseudo action: group_nfs-common:1_stop_0 * Resource action: nfs-common:1 stop on wc02 * Pseudo action: ms_drbd_mysql_post_notify_demoted_0 * Pseudo action: group_nfs-common:1_stopped_0 * Pseudo action: clone_nfs-common_stopped_0 * Resource action: drbd_mysql:0 notify on wc01 * Resource action: drbd_mysql:1 notify on wc02 * Pseudo action: ms_drbd_mysql_confirmed-post_notify_demoted_0 * Pseudo action: ms_drbd_mysql_pre_notify_stop_0 * Resource action: drbd_mysql:0 notify on wc01 * Resource action: drbd_mysql:1 notify on wc02 * Pseudo action: ms_drbd_mysql_confirmed-pre_notify_stop_0 * Pseudo action: ms_drbd_mysql_stop_0 * Resource action: drbd_mysql:1 stop on wc02 * Pseudo action: ms_drbd_mysql_stopped_0 * Pseudo action: ms_drbd_mysql_post_notify_stopped_0 * Resource action: drbd_mysql:0 notify on wc01 * Pseudo action: ms_drbd_mysql_confirmed-post_notify_stopped_0 * Pseudo action: ms_drbd_mysql_pre_notify_promote_0 * Resource action: drbd_mysql:0 notify on wc01 * Pseudo action: ms_drbd_mysql_confirmed-pre_notify_promote_0 * Pseudo action: ms_drbd_mysql_promote_0 * Resource action: drbd_mysql:0 promote on wc01 * Pseudo action: ms_drbd_mysql_promoted_0 * Pseudo action: ms_drbd_mysql_post_notify_promoted_0 * Resource action: drbd_mysql:0 notify on wc01 * Pseudo action: ms_drbd_mysql_confirmed-post_notify_promoted_0 * Pseudo action: group_mysql_start_0 * Resource action: fs_mysql start on wc01 * Resource action: intip_sql start on wc01 * Resource action: mysql-server start on wc01 * Resource action: drbd_mysql:0 monitor=5000 on wc01 * Pseudo action: group_mysql_running_0 * Resource action: fs_mysql monitor=30000 on wc01 * Resource action: intip_sql monitor=30000 on wc01 * Resource action: mysql-server monitor=30000 on wc01 Revised Cluster Status: * Node List: * Node wc02: standby * Online: [ wc01 ] * Full List of Resources: * Resource Group: group_www_data: * fs_www_data (ocf:heartbeat:Filesystem): Started wc01 * nfs-kernel-server (lsb:nfs-kernel-server): Started wc01 * intip_nfs (ocf:heartbeat:IPaddr2): Started wc01 * Clone Set: ms_drbd_mysql [drbd_mysql] (promotable): * Promoted: [ wc01 ] * Stopped: [ wc02 ] * Resource Group: group_mysql: * fs_mysql (ocf:heartbeat:Filesystem): Started wc01 * intip_sql (ocf:heartbeat:IPaddr2): Started wc01 * mysql-server (ocf:heartbeat:mysql): Started wc01 * Clone Set: ms_drbd_www [drbd_www] (promotable): * Promoted: [ wc01 ] * Stopped: [ wc02 ] * Clone Set: clone_nfs-common [group_nfs-common]: * Started: [ wc01 ] * Stopped: [ wc02 ] * Clone Set: clone_mysql-proxy [group_mysql-proxy]: * Started: [ wc01 ] * Stopped: [ wc02 ] * Clone Set: clone_webservice [group_webservice]: * Started: [ wc01 ] * Stopped: [ wc02 ] * Resource Group: group_ftpd: * extip_ftp (ocf:heartbeat:IPaddr2): Started wc01 * pure-ftpd (ocf:heartbeat:Pure-FTPd): Started wc01 * Clone Set: DoFencing [stonith_rackpdu] (unique): * stonith_rackpdu:0 (stonith:external/rackpdu): Started wc01 * stonith_rackpdu:1 (stonith:external/rackpdu): Stopped diff --git a/cts/scheduler/summary/promoted-7.summary b/cts/scheduler/summary/promoted-7.summary index 4fc3a85e9a..0602f95895 100644 --- a/cts/scheduler/summary/promoted-7.summary +++ b/cts/scheduler/summary/promoted-7.summary @@ -1,121 +1,121 @@ Current cluster status: * Node List: * Node c001n01: UNCLEAN (offline) * Online: [ c001n02 c001n03 c001n08 ] * Full List of Resources: * DcIPaddr (ocf:heartbeat:IPaddr): Started c001n01 (UNCLEAN) * Resource Group: group-1: * ocf_192.168.100.181 (ocf:heartbeat:IPaddr): Started c001n03 * heartbeat_192.168.100.182 (ocf:heartbeat:IPaddr): Started c001n03 * ocf_192.168.100.183 (ocf:heartbeat:IPaddr): Started c001n03 * lsb_dummy (lsb:/usr/lib/heartbeat/cts/LSBDummy): Started c001n02 * rsc_c001n01 (ocf:heartbeat:IPaddr): Started c001n01 (UNCLEAN) * rsc_c001n08 (ocf:heartbeat:IPaddr): Started c001n08 * rsc_c001n02 (ocf:heartbeat:IPaddr): Started c001n02 * rsc_c001n03 (ocf:heartbeat:IPaddr): Started c001n03 * Clone Set: DoFencing [child_DoFencing] (unique): * child_DoFencing:0 (stonith:ssh): Started c001n01 (UNCLEAN) * child_DoFencing:1 (stonith:ssh): Started c001n03 * child_DoFencing:2 (stonith:ssh): Started c001n02 * child_DoFencing:3 (stonith:ssh): Started c001n08 * Clone Set: master_rsc_1 [ocf_msdummy] (promotable) (unique): * ocf_msdummy:0 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Promoted c001n01 (UNCLEAN) * ocf_msdummy:1 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n03 * ocf_msdummy:2 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n02 * ocf_msdummy:3 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n08 * ocf_msdummy:4 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n01 (UNCLEAN) * ocf_msdummy:5 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n03 * ocf_msdummy:6 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n02 * ocf_msdummy:7 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n08 Transition Summary: * Fence (reboot) c001n01 'peer is no longer part of the cluster' * Move DcIPaddr ( c001n01 -> c001n03 ) * Move ocf_192.168.100.181 ( c001n03 -> c001n02 ) * Move heartbeat_192.168.100.182 ( c001n03 -> c001n02 ) * Move ocf_192.168.100.183 ( c001n03 -> c001n02 ) * Move lsb_dummy ( c001n02 -> c001n08 ) * Move rsc_c001n01 ( c001n01 -> c001n03 ) * Stop child_DoFencing:0 ( c001n01 ) due to node availability - * Stop ocf_msdummy:0 ( Promoted c001n01 ) due to node availability - * Stop ocf_msdummy:4 ( Unpromoted c001n01 ) due to node availability + * Stop ocf_msdummy:0 ( Promoted c001n01 ) due to node availability + * Stop ocf_msdummy:4 ( Unpromoted c001n01 ) due to node availability Executing Cluster Transition: * Pseudo action: group-1_stop_0 * Resource action: ocf_192.168.100.183 stop on c001n03 * Resource action: lsb_dummy stop on c001n02 * Resource action: child_DoFencing:2 monitor on c001n08 * Resource action: child_DoFencing:2 monitor on c001n03 * Resource action: child_DoFencing:3 monitor on c001n03 * Resource action: child_DoFencing:3 monitor on c001n02 * Pseudo action: DoFencing_stop_0 * Resource action: ocf_msdummy:4 monitor on c001n08 * Resource action: ocf_msdummy:4 monitor on c001n03 * Resource action: ocf_msdummy:4 monitor on c001n02 * Resource action: ocf_msdummy:5 monitor on c001n08 * Resource action: ocf_msdummy:5 monitor on c001n02 * Resource action: ocf_msdummy:6 monitor on c001n08 * Resource action: ocf_msdummy:6 monitor on c001n03 * Resource action: ocf_msdummy:7 monitor on c001n03 * Resource action: ocf_msdummy:7 monitor on c001n02 * Pseudo action: master_rsc_1_demote_0 * Fencing c001n01 (reboot) * Pseudo action: DcIPaddr_stop_0 * Resource action: heartbeat_192.168.100.182 stop on c001n03 * Resource action: lsb_dummy start on c001n08 * Pseudo action: rsc_c001n01_stop_0 * Pseudo action: child_DoFencing:0_stop_0 * Pseudo action: DoFencing_stopped_0 * Pseudo action: ocf_msdummy:0_demote_0 * Pseudo action: master_rsc_1_demoted_0 * Pseudo action: master_rsc_1_stop_0 * Resource action: DcIPaddr start on c001n03 * Resource action: ocf_192.168.100.181 stop on c001n03 * Resource action: lsb_dummy monitor=5000 on c001n08 * Resource action: rsc_c001n01 start on c001n03 * Pseudo action: ocf_msdummy:0_stop_0 * Pseudo action: ocf_msdummy:4_stop_0 * Pseudo action: master_rsc_1_stopped_0 * Resource action: DcIPaddr monitor=5000 on c001n03 * Pseudo action: group-1_stopped_0 * Pseudo action: group-1_start_0 * Resource action: ocf_192.168.100.181 start on c001n02 * Resource action: heartbeat_192.168.100.182 start on c001n02 * Resource action: ocf_192.168.100.183 start on c001n02 * Resource action: rsc_c001n01 monitor=5000 on c001n03 * Pseudo action: group-1_running_0 * Resource action: ocf_192.168.100.181 monitor=5000 on c001n02 * Resource action: heartbeat_192.168.100.182 monitor=5000 on c001n02 * Resource action: ocf_192.168.100.183 monitor=5000 on c001n02 Revised Cluster Status: * Node List: * Online: [ c001n02 c001n03 c001n08 ] * OFFLINE: [ c001n01 ] * Full List of Resources: * DcIPaddr (ocf:heartbeat:IPaddr): Started c001n03 * Resource Group: group-1: * ocf_192.168.100.181 (ocf:heartbeat:IPaddr): Started c001n02 * heartbeat_192.168.100.182 (ocf:heartbeat:IPaddr): Started c001n02 * ocf_192.168.100.183 (ocf:heartbeat:IPaddr): Started c001n02 * lsb_dummy (lsb:/usr/lib/heartbeat/cts/LSBDummy): Started c001n08 * rsc_c001n01 (ocf:heartbeat:IPaddr): Started c001n03 * rsc_c001n08 (ocf:heartbeat:IPaddr): Started c001n08 * rsc_c001n02 (ocf:heartbeat:IPaddr): Started c001n02 * rsc_c001n03 (ocf:heartbeat:IPaddr): Started c001n03 * Clone Set: DoFencing [child_DoFencing] (unique): * child_DoFencing:0 (stonith:ssh): Stopped * child_DoFencing:1 (stonith:ssh): Started c001n03 * child_DoFencing:2 (stonith:ssh): Started c001n02 * child_DoFencing:3 (stonith:ssh): Started c001n08 * Clone Set: master_rsc_1 [ocf_msdummy] (promotable) (unique): * ocf_msdummy:0 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Stopped * ocf_msdummy:1 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n03 * ocf_msdummy:2 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n02 * ocf_msdummy:3 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n08 * ocf_msdummy:4 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Stopped * ocf_msdummy:5 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n03 * ocf_msdummy:6 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n02 * ocf_msdummy:7 (ocf:heartbeat:/usr/lib/heartbeat/cts/OCFMSDummy): Unpromoted c001n08 diff --git a/cts/scheduler/summary/promoted-asymmetrical-order.summary b/cts/scheduler/summary/promoted-asymmetrical-order.summary index df6e00c9c2..e10568e898 100644 --- a/cts/scheduler/summary/promoted-asymmetrical-order.summary +++ b/cts/scheduler/summary/promoted-asymmetrical-order.summary @@ -1,37 +1,37 @@ 2 of 4 resource instances DISABLED and 0 BLOCKED from further action due to failure Current cluster status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * Clone Set: ms1 [rsc1] (promotable) (disabled): * Promoted: [ node1 ] * Unpromoted: [ node2 ] * Clone Set: ms2 [rsc2] (promotable): * Promoted: [ node2 ] * Unpromoted: [ node1 ] Transition Summary: - * Stop rsc1:0 ( Promoted node1 ) due to node availability - * Stop rsc1:1 ( Unpromoted node2 ) due to node availability + * Stop rsc1:0 ( Promoted node1 ) due to node availability + * Stop rsc1:1 ( Unpromoted node2 ) due to node availability Executing Cluster Transition: * Pseudo action: ms1_demote_0 * Resource action: rsc1:0 demote on node1 * Pseudo action: ms1_demoted_0 * Pseudo action: ms1_stop_0 * Resource action: rsc1:0 stop on node1 * Resource action: rsc1:1 stop on node2 * Pseudo action: ms1_stopped_0 Revised Cluster Status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * Clone Set: ms1 [rsc1] (promotable) (disabled): * Stopped (disabled): [ node1 node2 ] * Clone Set: ms2 [rsc2] (promotable): * Promoted: [ node2 ] * Unpromoted: [ node1 ] diff --git a/cts/scheduler/summary/promoted-demote-2.summary b/cts/scheduler/summary/promoted-demote-2.summary index daea66ae8b..115da9aaaf 100644 --- a/cts/scheduler/summary/promoted-demote-2.summary +++ b/cts/scheduler/summary/promoted-demote-2.summary @@ -1,75 +1,75 @@ Current cluster status: * Node List: * Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started pcmk-1 * Resource Group: group-1: * r192.168.122.105 (ocf:heartbeat:IPaddr): Stopped * r192.168.122.106 (ocf:heartbeat:IPaddr): Stopped * r192.168.122.107 (ocf:heartbeat:IPaddr): Stopped * rsc_pcmk-1 (ocf:heartbeat:IPaddr): Started pcmk-1 * rsc_pcmk-2 (ocf:heartbeat:IPaddr): Started pcmk-2 * rsc_pcmk-3 (ocf:heartbeat:IPaddr): Started pcmk-3 * rsc_pcmk-4 (ocf:heartbeat:IPaddr): Started pcmk-4 * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Stopped * migrator (ocf:pacemaker:Dummy): Started pcmk-4 * Clone Set: Connectivity [ping-1]: * Started: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] * Clone Set: master-1 [stateful-1] (promotable): * stateful-1 (ocf:pacemaker:Stateful): FAILED pcmk-1 * Unpromoted: [ pcmk-2 pcmk-3 pcmk-4 ] Transition Summary: * Start r192.168.122.105 ( pcmk-2 ) * Start r192.168.122.106 ( pcmk-2 ) * Start r192.168.122.107 ( pcmk-2 ) * Start lsb-dummy ( pcmk-2 ) - * Recover stateful-1:0 ( Unpromoted pcmk-1 ) + * Recover stateful-1:0 ( Unpromoted pcmk-1 ) * Promote stateful-1:1 ( Unpromoted -> Promoted pcmk-2 ) Executing Cluster Transition: * Resource action: stateful-1:0 cancel=15000 on pcmk-2 * Pseudo action: master-1_stop_0 * Resource action: stateful-1:1 stop on pcmk-1 * Pseudo action: master-1_stopped_0 * Pseudo action: master-1_start_0 * Resource action: stateful-1:1 start on pcmk-1 * Pseudo action: master-1_running_0 * Resource action: stateful-1:1 monitor=15000 on pcmk-1 * Pseudo action: master-1_promote_0 * Resource action: stateful-1:0 promote on pcmk-2 * Pseudo action: master-1_promoted_0 * Pseudo action: group-1_start_0 * Resource action: r192.168.122.105 start on pcmk-2 * Resource action: r192.168.122.106 start on pcmk-2 * Resource action: r192.168.122.107 start on pcmk-2 * Resource action: stateful-1:0 monitor=16000 on pcmk-2 * Pseudo action: group-1_running_0 * Resource action: r192.168.122.105 monitor=5000 on pcmk-2 * Resource action: r192.168.122.106 monitor=5000 on pcmk-2 * Resource action: r192.168.122.107 monitor=5000 on pcmk-2 * Resource action: lsb-dummy start on pcmk-2 * Resource action: lsb-dummy monitor=5000 on pcmk-2 Revised Cluster Status: * Node List: * Online: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started pcmk-1 * Resource Group: group-1: * r192.168.122.105 (ocf:heartbeat:IPaddr): Started pcmk-2 * r192.168.122.106 (ocf:heartbeat:IPaddr): Started pcmk-2 * r192.168.122.107 (ocf:heartbeat:IPaddr): Started pcmk-2 * rsc_pcmk-1 (ocf:heartbeat:IPaddr): Started pcmk-1 * rsc_pcmk-2 (ocf:heartbeat:IPaddr): Started pcmk-2 * rsc_pcmk-3 (ocf:heartbeat:IPaddr): Started pcmk-3 * rsc_pcmk-4 (ocf:heartbeat:IPaddr): Started pcmk-4 * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started pcmk-2 * migrator (ocf:pacemaker:Dummy): Started pcmk-4 * Clone Set: Connectivity [ping-1]: * Started: [ pcmk-1 pcmk-2 pcmk-3 pcmk-4 ] * Clone Set: master-1 [stateful-1] (promotable): * Promoted: [ pcmk-2 ] * Unpromoted: [ pcmk-1 pcmk-3 pcmk-4 ] diff --git a/cts/scheduler/summary/promoted-failed-demote-2.summary b/cts/scheduler/summary/promoted-failed-demote-2.summary index 198d9ad3ee..c8504e9e1d 100644 --- a/cts/scheduler/summary/promoted-failed-demote-2.summary +++ b/cts/scheduler/summary/promoted-failed-demote-2.summary @@ -1,47 +1,47 @@ Current cluster status: * Node List: * Online: [ dl380g5a dl380g5b ] * Full List of Resources: * Clone Set: ms-sf [group] (promotable) (unique): * Resource Group: group:0: * stateful-1:0 (ocf:heartbeat:Stateful): FAILED dl380g5b * stateful-2:0 (ocf:heartbeat:Stateful): Stopped * Resource Group: group:1: * stateful-1:1 (ocf:heartbeat:Stateful): Unpromoted dl380g5a * stateful-2:1 (ocf:heartbeat:Stateful): Unpromoted dl380g5a Transition Summary: - * Stop stateful-1:0 ( Unpromoted dl380g5b ) due to node availability + * Stop stateful-1:0 ( Unpromoted dl380g5b ) due to node availability * Promote stateful-1:1 ( Unpromoted -> Promoted dl380g5a ) * Promote stateful-2:1 ( Unpromoted -> Promoted dl380g5a ) Executing Cluster Transition: * Resource action: stateful-1:1 cancel=20000 on dl380g5a * Resource action: stateful-2:1 cancel=20000 on dl380g5a * Pseudo action: ms-sf_stop_0 * Pseudo action: group:0_stop_0 * Resource action: stateful-1:0 stop on dl380g5b * Pseudo action: group:0_stopped_0 * Pseudo action: ms-sf_stopped_0 * Pseudo action: ms-sf_promote_0 * Pseudo action: group:1_promote_0 * Resource action: stateful-1:1 promote on dl380g5a * Resource action: stateful-2:1 promote on dl380g5a * Pseudo action: group:1_promoted_0 * Resource action: stateful-1:1 monitor=10000 on dl380g5a * Resource action: stateful-2:1 monitor=10000 on dl380g5a * Pseudo action: ms-sf_promoted_0 Revised Cluster Status: * Node List: * Online: [ dl380g5a dl380g5b ] * Full List of Resources: * Clone Set: ms-sf [group] (promotable) (unique): * Resource Group: group:0: * stateful-1:0 (ocf:heartbeat:Stateful): Stopped * stateful-2:0 (ocf:heartbeat:Stateful): Stopped * Resource Group: group:1: * stateful-1:1 (ocf:heartbeat:Stateful): Promoted dl380g5a * stateful-2:1 (ocf:heartbeat:Stateful): Promoted dl380g5a diff --git a/cts/scheduler/summary/promoted-failed-demote.summary b/cts/scheduler/summary/promoted-failed-demote.summary index 884a380063..f071025528 100644 --- a/cts/scheduler/summary/promoted-failed-demote.summary +++ b/cts/scheduler/summary/promoted-failed-demote.summary @@ -1,64 +1,64 @@ Current cluster status: * Node List: * Online: [ dl380g5a dl380g5b ] * Full List of Resources: * Clone Set: ms-sf [group] (promotable) (unique): * Resource Group: group:0: * stateful-1:0 (ocf:heartbeat:Stateful): FAILED dl380g5b * stateful-2:0 (ocf:heartbeat:Stateful): Stopped * Resource Group: group:1: * stateful-1:1 (ocf:heartbeat:Stateful): Unpromoted dl380g5a * stateful-2:1 (ocf:heartbeat:Stateful): Unpromoted dl380g5a Transition Summary: - * Stop stateful-1:0 ( Unpromoted dl380g5b ) due to node availability + * Stop stateful-1:0 ( Unpromoted dl380g5b ) due to node availability * Promote stateful-1:1 ( Unpromoted -> Promoted dl380g5a ) * Promote stateful-2:1 ( Unpromoted -> Promoted dl380g5a ) Executing Cluster Transition: * Resource action: stateful-1:1 cancel=20000 on dl380g5a * Resource action: stateful-2:1 cancel=20000 on dl380g5a * Pseudo action: ms-sf_pre_notify_stop_0 * Resource action: stateful-1:0 notify on dl380g5b * Resource action: stateful-1:1 notify on dl380g5a * Resource action: stateful-2:1 notify on dl380g5a * Pseudo action: ms-sf_confirmed-pre_notify_stop_0 * Pseudo action: ms-sf_stop_0 * Pseudo action: group:0_stop_0 * Resource action: stateful-1:0 stop on dl380g5b * Pseudo action: group:0_stopped_0 * Pseudo action: ms-sf_stopped_0 * Pseudo action: ms-sf_post_notify_stopped_0 * Resource action: stateful-1:1 notify on dl380g5a * Resource action: stateful-2:1 notify on dl380g5a * Pseudo action: ms-sf_confirmed-post_notify_stopped_0 * Pseudo action: ms-sf_pre_notify_promote_0 * Resource action: stateful-1:1 notify on dl380g5a * Resource action: stateful-2:1 notify on dl380g5a * Pseudo action: ms-sf_confirmed-pre_notify_promote_0 * Pseudo action: ms-sf_promote_0 * Pseudo action: group:1_promote_0 * Resource action: stateful-1:1 promote on dl380g5a * Resource action: stateful-2:1 promote on dl380g5a * Pseudo action: group:1_promoted_0 * Pseudo action: ms-sf_promoted_0 * Pseudo action: ms-sf_post_notify_promoted_0 * Resource action: stateful-1:1 notify on dl380g5a * Resource action: stateful-2:1 notify on dl380g5a * Pseudo action: ms-sf_confirmed-post_notify_promoted_0 * Resource action: stateful-1:1 monitor=10000 on dl380g5a * Resource action: stateful-2:1 monitor=10000 on dl380g5a Revised Cluster Status: * Node List: * Online: [ dl380g5a dl380g5b ] * Full List of Resources: * Clone Set: ms-sf [group] (promotable) (unique): * Resource Group: group:0: * stateful-1:0 (ocf:heartbeat:Stateful): Stopped * stateful-2:0 (ocf:heartbeat:Stateful): Stopped * Resource Group: group:1: * stateful-1:1 (ocf:heartbeat:Stateful): Promoted dl380g5a * stateful-2:1 (ocf:heartbeat:Stateful): Promoted dl380g5a diff --git a/cts/scheduler/summary/remote-connection-unrecoverable.summary b/cts/scheduler/summary/remote-connection-unrecoverable.summary index bd1adfcfa4..3cfb64565a 100644 --- a/cts/scheduler/summary/remote-connection-unrecoverable.summary +++ b/cts/scheduler/summary/remote-connection-unrecoverable.summary @@ -1,54 +1,54 @@ Current cluster status: * Node List: * Node node1: UNCLEAN (offline) * Online: [ node2 ] * RemoteOnline: [ remote1 ] * Full List of Resources: * remote1 (ocf:pacemaker:remote): Started node1 (UNCLEAN) * killer (stonith:fence_xvm): Started node2 * rsc1 (ocf:pacemaker:Dummy): Started remote1 * Clone Set: rsc2-master [rsc2] (promotable): * rsc2 (ocf:pacemaker:Stateful): Promoted node1 (UNCLEAN) * Promoted: [ node2 ] * Stopped: [ remote1 ] Transition Summary: * Fence (reboot) remote1 'resources are active and the connection is unrecoverable' * Fence (reboot) node1 'peer is no longer part of the cluster' * Stop remote1 ( node1 ) due to node availability * Restart killer ( node2 ) due to resource definition change * Move rsc1 ( remote1 -> node2 ) - * Stop rsc2:0 ( Promoted node1 ) due to node availability + * Stop rsc2:0 ( Promoted node1 ) due to node availability Executing Cluster Transition: * Pseudo action: remote1_stop_0 * Resource action: killer stop on node2 * Resource action: rsc1 monitor on node2 * Fencing node1 (reboot) * Fencing remote1 (reboot) * Resource action: killer start on node2 * Resource action: killer monitor=60000 on node2 * Pseudo action: rsc1_stop_0 * Pseudo action: rsc2-master_demote_0 * Resource action: rsc1 start on node2 * Pseudo action: rsc2_demote_0 * Pseudo action: rsc2-master_demoted_0 * Pseudo action: rsc2-master_stop_0 * Resource action: rsc1 monitor=10000 on node2 * Pseudo action: rsc2_stop_0 * Pseudo action: rsc2-master_stopped_0 Revised Cluster Status: * Node List: * Online: [ node2 ] * OFFLINE: [ node1 ] * RemoteOFFLINE: [ remote1 ] * Full List of Resources: * remote1 (ocf:pacemaker:remote): Stopped * killer (stonith:fence_xvm): Started node2 * rsc1 (ocf:pacemaker:Dummy): Started node2 * Clone Set: rsc2-master [rsc2] (promotable): * Promoted: [ node2 ] * Stopped: [ node1 remote1 ] diff --git a/cts/scheduler/summary/remote-recover-all.summary b/cts/scheduler/summary/remote-recover-all.summary index 176c1de8b3..18d10730bf 100644 --- a/cts/scheduler/summary/remote-recover-all.summary +++ b/cts/scheduler/summary/remote-recover-all.summary @@ -1,146 +1,146 @@ Using the original execution date of: 2017-05-03 13:33:24Z Current cluster status: * Node List: * Node controller-1: UNCLEAN (offline) * Online: [ controller-0 controller-2 ] * RemoteOnline: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-1 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 galera-2 ] * Stopped: [ controller-0 controller-1 controller-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * redis (ocf:heartbeat:redis): Unpromoted controller-1 (UNCLEAN) * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * Clone Set: haproxy-clone [haproxy]: * haproxy (systemd:haproxy): Started controller-1 (UNCLEAN) * Started: [ controller-0 controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-1 (UNCLEAN) Transition Summary: * Fence (reboot) messaging-1 'resources are active and the connection is unrecoverable' * Fence (reboot) galera-2 'resources are active and the connection is unrecoverable' * Fence (reboot) controller-1 'peer is no longer part of the cluster' * Stop messaging-1 ( controller-1 ) due to node availability * Move galera-0 ( controller-1 -> controller-2 ) * Stop galera-2 ( controller-1 ) due to node availability * Stop rabbitmq:2 ( messaging-1 ) due to node availability - * Stop galera:1 ( Promoted galera-2 ) due to node availability - * Stop redis:0 ( Unpromoted controller-1 ) due to node availability + * Stop galera:1 ( Promoted galera-2 ) due to node availability + * Stop redis:0 ( Unpromoted controller-1 ) due to node availability * Move ip-172.17.1.14 ( controller-1 -> controller-2 ) * Move ip-172.17.1.17 ( controller-1 -> controller-2 ) * Move ip-172.17.4.11 ( controller-1 -> controller-2 ) * Stop haproxy:0 ( controller-1 ) due to node availability * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing Cluster Transition: * Pseudo action: messaging-1_stop_0 * Pseudo action: galera-0_stop_0 * Pseudo action: galera-2_stop_0 * Pseudo action: galera-master_demote_0 * Pseudo action: redis-master_pre_notify_stop_0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-pre_notify_stop_0 * Pseudo action: redis-master_stop_0 * Pseudo action: haproxy-clone_stop_0 * Fencing galera-2 (reboot) * Pseudo action: galera_demote_0 * Pseudo action: galera-master_demoted_0 * Pseudo action: galera-master_stop_0 * Pseudo action: redis_stop_0 * Pseudo action: redis-master_stopped_0 * Pseudo action: haproxy_stop_0 * Pseudo action: haproxy-clone_stopped_0 * Fencing messaging-1 (reboot) * Resource action: galera-0 start on controller-2 * Pseudo action: rabbitmq_post_notify_stop_0 * Pseudo action: rabbitmq-clone_stop_0 * Pseudo action: galera_stop_0 * Resource action: galera monitor=10000 on galera-0 * Pseudo action: galera-master_stopped_0 * Pseudo action: redis-master_post_notify_stopped_0 * Pseudo action: ip-172.17.1.14_stop_0 * Pseudo action: ip-172.17.1.17_stop_0 * Pseudo action: ip-172.17.4.11_stop_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: rabbitmq notify on messaging-2 * Resource action: rabbitmq notify on messaging-0 * Pseudo action: rabbitmq_notified_0 * Pseudo action: rabbitmq_stop_0 * Pseudo action: rabbitmq-clone_stopped_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-post_notify_stopped_0 * Resource action: ip-172.17.1.14 start on controller-2 * Resource action: ip-172.17.1.17 start on controller-2 * Resource action: ip-172.17.4.11 start on controller-2 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 * Pseudo action: redis_notified_0 * Resource action: ip-172.17.1.14 monitor=10000 on controller-2 * Resource action: ip-172.17.1.17 monitor=10000 on controller-2 * Resource action: ip-172.17.4.11 monitor=10000 on controller-2 Using the original execution date of: 2017-05-03 13:33:24Z Revised Cluster Status: * Node List: * Online: [ controller-0 controller-2 ] * OFFLINE: [ controller-1 ] * RemoteOnline: [ galera-0 galera-1 messaging-0 messaging-2 ] * RemoteOFFLINE: [ galera-2 messaging-1 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Stopped * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-2 * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Stopped * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 messaging-1 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 ] * Stopped: [ controller-0 controller-1 controller-2 galera-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-2 * Clone Set: haproxy-clone [haproxy]: * Started: [ controller-0 controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-2 diff --git a/cts/scheduler/summary/remote-recover-connection.summary b/cts/scheduler/summary/remote-recover-connection.summary index fd6900dd96..a9723bc5e1 100644 --- a/cts/scheduler/summary/remote-recover-connection.summary +++ b/cts/scheduler/summary/remote-recover-connection.summary @@ -1,132 +1,132 @@ Using the original execution date of: 2017-05-03 13:33:24Z Current cluster status: * Node List: * Node controller-1: UNCLEAN (offline) * Online: [ controller-0 controller-2 ] * RemoteOnline: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-1 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 galera-2 ] * Stopped: [ controller-0 controller-1 controller-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * redis (ocf:heartbeat:redis): Unpromoted controller-1 (UNCLEAN) * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * Clone Set: haproxy-clone [haproxy]: * haproxy (systemd:haproxy): Started controller-1 (UNCLEAN) * Started: [ controller-0 controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-1 (UNCLEAN) Transition Summary: * Fence (reboot) controller-1 'peer is no longer part of the cluster' * Move messaging-1 ( controller-1 -> controller-2 ) * Move galera-0 ( controller-1 -> controller-2 ) * Move galera-2 ( controller-1 -> controller-2 ) - * Stop redis:0 ( Unpromoted controller-1 ) due to node availability + * Stop redis:0 ( Unpromoted controller-1 ) due to node availability * Move ip-172.17.1.14 ( controller-1 -> controller-2 ) * Move ip-172.17.1.17 ( controller-1 -> controller-2 ) * Move ip-172.17.4.11 ( controller-1 -> controller-2 ) * Stop haproxy:0 ( controller-1 ) due to node availability * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing Cluster Transition: * Pseudo action: messaging-1_stop_0 * Pseudo action: galera-0_stop_0 * Pseudo action: galera-2_stop_0 * Pseudo action: redis-master_pre_notify_stop_0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) * Resource action: messaging-1 start on controller-2 * Resource action: galera-0 start on controller-2 * Resource action: galera-2 start on controller-2 * Resource action: rabbitmq monitor=10000 on messaging-1 * Resource action: galera monitor=10000 on galera-2 * Resource action: galera monitor=10000 on galera-0 * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-pre_notify_stop_0 * Pseudo action: redis-master_stop_0 * Pseudo action: haproxy-clone_stop_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 * Resource action: messaging-1 monitor=20000 on controller-2 * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: galera-2 monitor=20000 on controller-2 * Pseudo action: redis_stop_0 * Pseudo action: redis-master_stopped_0 * Pseudo action: haproxy_stop_0 * Pseudo action: haproxy-clone_stopped_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 * Pseudo action: redis-master_post_notify_stopped_0 * Pseudo action: ip-172.17.1.14_stop_0 * Pseudo action: ip-172.17.1.17_stop_0 * Pseudo action: ip-172.17.4.11_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-post_notify_stopped_0 * Resource action: ip-172.17.1.14 start on controller-2 * Resource action: ip-172.17.1.17 start on controller-2 * Resource action: ip-172.17.4.11 start on controller-2 * Pseudo action: redis_notified_0 * Resource action: ip-172.17.1.14 monitor=10000 on controller-2 * Resource action: ip-172.17.1.17 monitor=10000 on controller-2 * Resource action: ip-172.17.4.11 monitor=10000 on controller-2 Using the original execution date of: 2017-05-03 13:33:24Z Revised Cluster Status: * Node List: * Online: [ controller-0 controller-2 ] * OFFLINE: [ controller-1 ] * RemoteOnline: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Started controller-2 * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-2 * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Started controller-2 * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-1 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 galera-2 ] * Stopped: [ controller-0 controller-1 controller-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-2 * Clone Set: haproxy-clone [haproxy]: * Started: [ controller-0 controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-2 diff --git a/cts/scheduler/summary/remote-recover-no-resources.summary b/cts/scheduler/summary/remote-recover-no-resources.summary index 332d1c4123..d7d9ef942c 100644 --- a/cts/scheduler/summary/remote-recover-no-resources.summary +++ b/cts/scheduler/summary/remote-recover-no-resources.summary @@ -1,137 +1,137 @@ Using the original execution date of: 2017-05-03 13:33:24Z Current cluster status: * Node List: * Node controller-1: UNCLEAN (offline) * Online: [ controller-0 controller-2 ] * RemoteOnline: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-1 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 ] * Stopped: [ controller-0 controller-1 controller-2 galera-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * redis (ocf:heartbeat:redis): Unpromoted controller-1 (UNCLEAN) * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * Clone Set: haproxy-clone [haproxy]: * haproxy (systemd:haproxy): Started controller-1 (UNCLEAN) * Started: [ controller-0 controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-1 (UNCLEAN) Transition Summary: * Fence (reboot) messaging-1 'resources are active and the connection is unrecoverable' * Fence (reboot) controller-1 'peer is no longer part of the cluster' * Stop messaging-1 ( controller-1 ) due to node availability * Move galera-0 ( controller-1 -> controller-2 ) * Stop galera-2 ( controller-1 ) due to node availability * Stop rabbitmq:2 ( messaging-1 ) due to node availability - * Stop redis:0 ( Unpromoted controller-1 ) due to node availability + * Stop redis:0 ( Unpromoted controller-1 ) due to node availability * Move ip-172.17.1.14 ( controller-1 -> controller-2 ) * Move ip-172.17.1.17 ( controller-1 -> controller-2 ) * Move ip-172.17.4.11 ( controller-1 -> controller-2 ) * Stop haproxy:0 ( controller-1 ) due to node availability * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing Cluster Transition: * Pseudo action: messaging-1_stop_0 * Pseudo action: galera-0_stop_0 * Pseudo action: galera-2_stop_0 * Pseudo action: redis-master_pre_notify_stop_0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-pre_notify_stop_0 * Pseudo action: redis-master_stop_0 * Pseudo action: haproxy-clone_stop_0 * Fencing messaging-1 (reboot) * Resource action: galera-0 start on controller-2 * Pseudo action: rabbitmq_post_notify_stop_0 * Pseudo action: rabbitmq-clone_stop_0 * Resource action: galera monitor=10000 on galera-0 * Pseudo action: redis_stop_0 * Pseudo action: redis-master_stopped_0 * Pseudo action: haproxy_stop_0 * Pseudo action: haproxy-clone_stopped_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: rabbitmq notify on messaging-2 * Resource action: rabbitmq notify on messaging-0 * Pseudo action: rabbitmq_notified_0 * Pseudo action: rabbitmq_stop_0 * Pseudo action: rabbitmq-clone_stopped_0 * Pseudo action: redis-master_post_notify_stopped_0 * Pseudo action: ip-172.17.1.14_stop_0 * Pseudo action: ip-172.17.1.17_stop_0 * Pseudo action: ip-172.17.4.11_stop_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-post_notify_stopped_0 * Resource action: ip-172.17.1.14 start on controller-2 * Resource action: ip-172.17.1.17 start on controller-2 * Resource action: ip-172.17.4.11 start on controller-2 * Pseudo action: redis_notified_0 * Resource action: ip-172.17.1.14 monitor=10000 on controller-2 * Resource action: ip-172.17.1.17 monitor=10000 on controller-2 * Resource action: ip-172.17.4.11 monitor=10000 on controller-2 Using the original execution date of: 2017-05-03 13:33:24Z Revised Cluster Status: * Node List: * Online: [ controller-0 controller-2 ] * OFFLINE: [ controller-1 ] * RemoteOnline: [ galera-0 galera-1 messaging-0 messaging-2 ] * RemoteOFFLINE: [ galera-2 messaging-1 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Stopped * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-2 * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Stopped * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 messaging-1 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 ] * Stopped: [ controller-0 controller-1 controller-2 galera-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-2 * Clone Set: haproxy-clone [haproxy]: * Started: [ controller-0 controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-2 diff --git a/cts/scheduler/summary/remote-recover-unknown.summary b/cts/scheduler/summary/remote-recover-unknown.summary index ac5143a16e..4f3d045284 100644 --- a/cts/scheduler/summary/remote-recover-unknown.summary +++ b/cts/scheduler/summary/remote-recover-unknown.summary @@ -1,139 +1,139 @@ Using the original execution date of: 2017-05-03 13:33:24Z Current cluster status: * Node List: * Node controller-1: UNCLEAN (offline) * Online: [ controller-0 controller-2 ] * RemoteOnline: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-1 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 ] * Stopped: [ controller-0 controller-1 controller-2 galera-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * redis (ocf:heartbeat:redis): Unpromoted controller-1 (UNCLEAN) * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * Clone Set: haproxy-clone [haproxy]: * haproxy (systemd:haproxy): Started controller-1 (UNCLEAN) * Started: [ controller-0 controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-1 (UNCLEAN) Transition Summary: * Fence (reboot) galera-2 'resources are in an unknown state and the connection is unrecoverable' * Fence (reboot) messaging-1 'resources are active and the connection is unrecoverable' * Fence (reboot) controller-1 'peer is no longer part of the cluster' * Stop messaging-1 ( controller-1 ) due to node availability * Move galera-0 ( controller-1 -> controller-2 ) * Stop galera-2 ( controller-1 ) due to node availability * Stop rabbitmq:2 ( messaging-1 ) due to node availability - * Stop redis:0 ( Unpromoted controller-1 ) due to node availability + * Stop redis:0 ( Unpromoted controller-1 ) due to node availability * Move ip-172.17.1.14 ( controller-1 -> controller-2 ) * Move ip-172.17.1.17 ( controller-1 -> controller-2 ) * Move ip-172.17.4.11 ( controller-1 -> controller-2 ) * Stop haproxy:0 ( controller-1 ) due to node availability * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing Cluster Transition: * Pseudo action: messaging-1_stop_0 * Pseudo action: galera-0_stop_0 * Pseudo action: galera-2_stop_0 * Pseudo action: redis-master_pre_notify_stop_0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-pre_notify_stop_0 * Pseudo action: redis-master_stop_0 * Pseudo action: haproxy-clone_stop_0 * Fencing galera-2 (reboot) * Fencing messaging-1 (reboot) * Resource action: galera-0 start on controller-2 * Pseudo action: rabbitmq_post_notify_stop_0 * Pseudo action: rabbitmq-clone_stop_0 * Resource action: galera monitor=10000 on galera-0 * Pseudo action: redis_stop_0 * Pseudo action: redis-master_stopped_0 * Pseudo action: haproxy_stop_0 * Pseudo action: haproxy-clone_stopped_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: rabbitmq notify on messaging-2 * Resource action: rabbitmq notify on messaging-0 * Pseudo action: rabbitmq_notified_0 * Pseudo action: rabbitmq_stop_0 * Pseudo action: rabbitmq-clone_stopped_0 * Pseudo action: redis-master_post_notify_stopped_0 * Pseudo action: ip-172.17.1.14_stop_0 * Pseudo action: ip-172.17.1.17_stop_0 * Pseudo action: ip-172.17.4.11_stop_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-post_notify_stopped_0 * Resource action: ip-172.17.1.14 start on controller-2 * Resource action: ip-172.17.1.17 start on controller-2 * Resource action: ip-172.17.4.11 start on controller-2 * Pseudo action: redis_notified_0 * Resource action: ip-172.17.1.14 monitor=10000 on controller-2 * Resource action: ip-172.17.1.17 monitor=10000 on controller-2 * Resource action: ip-172.17.4.11 monitor=10000 on controller-2 Using the original execution date of: 2017-05-03 13:33:24Z Revised Cluster Status: * Node List: * Online: [ controller-0 controller-2 ] * OFFLINE: [ controller-1 ] * RemoteOnline: [ galera-0 galera-1 messaging-0 messaging-2 ] * RemoteOFFLINE: [ galera-2 messaging-1 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Stopped * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-2 * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Stopped * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 messaging-1 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 ] * Stopped: [ controller-0 controller-1 controller-2 galera-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-2 * Clone Set: haproxy-clone [haproxy]: * Started: [ controller-0 controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-2 diff --git a/cts/scheduler/summary/remote-recovery.summary b/cts/scheduler/summary/remote-recovery.summary index fd6900dd96..a9723bc5e1 100644 --- a/cts/scheduler/summary/remote-recovery.summary +++ b/cts/scheduler/summary/remote-recovery.summary @@ -1,132 +1,132 @@ Using the original execution date of: 2017-05-03 13:33:24Z Current cluster status: * Node List: * Node controller-1: UNCLEAN (offline) * Online: [ controller-0 controller-2 ] * RemoteOnline: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Started controller-1 (UNCLEAN) * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-1 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 galera-2 ] * Stopped: [ controller-0 controller-1 controller-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * redis (ocf:heartbeat:redis): Unpromoted controller-1 (UNCLEAN) * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-1 (UNCLEAN) * Clone Set: haproxy-clone [haproxy]: * haproxy (systemd:haproxy): Started controller-1 (UNCLEAN) * Started: [ controller-0 controller-2 ] * Stopped: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-1 (UNCLEAN) Transition Summary: * Fence (reboot) controller-1 'peer is no longer part of the cluster' * Move messaging-1 ( controller-1 -> controller-2 ) * Move galera-0 ( controller-1 -> controller-2 ) * Move galera-2 ( controller-1 -> controller-2 ) - * Stop redis:0 ( Unpromoted controller-1 ) due to node availability + * Stop redis:0 ( Unpromoted controller-1 ) due to node availability * Move ip-172.17.1.14 ( controller-1 -> controller-2 ) * Move ip-172.17.1.17 ( controller-1 -> controller-2 ) * Move ip-172.17.4.11 ( controller-1 -> controller-2 ) * Stop haproxy:0 ( controller-1 ) due to node availability * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing Cluster Transition: * Pseudo action: messaging-1_stop_0 * Pseudo action: galera-0_stop_0 * Pseudo action: galera-2_stop_0 * Pseudo action: redis-master_pre_notify_stop_0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) * Resource action: messaging-1 start on controller-2 * Resource action: galera-0 start on controller-2 * Resource action: galera-2 start on controller-2 * Resource action: rabbitmq monitor=10000 on messaging-1 * Resource action: galera monitor=10000 on galera-2 * Resource action: galera monitor=10000 on galera-0 * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-pre_notify_stop_0 * Pseudo action: redis-master_stop_0 * Pseudo action: haproxy-clone_stop_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 * Resource action: messaging-1 monitor=20000 on controller-2 * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: galera-2 monitor=20000 on controller-2 * Pseudo action: redis_stop_0 * Pseudo action: redis-master_stopped_0 * Pseudo action: haproxy_stop_0 * Pseudo action: haproxy-clone_stopped_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 * Pseudo action: redis-master_post_notify_stopped_0 * Pseudo action: ip-172.17.1.14_stop_0 * Pseudo action: ip-172.17.1.17_stop_0 * Pseudo action: ip-172.17.4.11_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 * Pseudo action: redis-master_confirmed-post_notify_stopped_0 * Resource action: ip-172.17.1.14 start on controller-2 * Resource action: ip-172.17.1.17 start on controller-2 * Resource action: ip-172.17.4.11 start on controller-2 * Pseudo action: redis_notified_0 * Resource action: ip-172.17.1.14 monitor=10000 on controller-2 * Resource action: ip-172.17.1.17 monitor=10000 on controller-2 * Resource action: ip-172.17.4.11 monitor=10000 on controller-2 Using the original execution date of: 2017-05-03 13:33:24Z Revised Cluster Status: * Node List: * Online: [ controller-0 controller-2 ] * OFFLINE: [ controller-1 ] * RemoteOnline: [ galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * Full List of Resources: * messaging-0 (ocf:pacemaker:remote): Started controller-0 * messaging-1 (ocf:pacemaker:remote): Started controller-2 * messaging-2 (ocf:pacemaker:remote): Started controller-0 * galera-0 (ocf:pacemaker:remote): Started controller-2 * galera-1 (ocf:pacemaker:remote): Started controller-0 * galera-2 (ocf:pacemaker:remote): Started controller-2 * Clone Set: rabbitmq-clone [rabbitmq]: * Started: [ messaging-0 messaging-1 messaging-2 ] * Stopped: [ controller-0 controller-1 controller-2 galera-0 galera-1 galera-2 ] * Clone Set: galera-master [galera] (promotable): * Promoted: [ galera-0 galera-1 galera-2 ] * Stopped: [ controller-0 controller-1 controller-2 messaging-0 messaging-1 messaging-2 ] * Clone Set: redis-master [redis] (promotable): * Promoted: [ controller-0 ] * Unpromoted: [ controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * ip-192.168.24.6 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-10.0.0.102 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.1.14 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.1.17 (ocf:heartbeat:IPaddr2): Started controller-2 * ip-172.17.3.15 (ocf:heartbeat:IPaddr2): Started controller-0 * ip-172.17.4.11 (ocf:heartbeat:IPaddr2): Started controller-2 * Clone Set: haproxy-clone [haproxy]: * Started: [ controller-0 controller-2 ] * Stopped: [ controller-1 galera-0 galera-1 galera-2 messaging-0 messaging-1 messaging-2 ] * openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 * stonith-fence_ipmilan-525400bbf613 (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-525400b4f6bd (stonith:fence_ipmilan): Started controller-0 * stonith-fence_ipmilan-5254005bdbb5 (stonith:fence_ipmilan): Started controller-2 diff --git a/cts/scheduler/summary/rsc-sets-promoted.summary b/cts/scheduler/summary/rsc-sets-promoted.summary index a45e4b16e8..3db15881a0 100644 --- a/cts/scheduler/summary/rsc-sets-promoted.summary +++ b/cts/scheduler/summary/rsc-sets-promoted.summary @@ -1,49 +1,49 @@ Current cluster status: * Node List: * Node node1: standby (with active resources) * Online: [ node2 ] * Full List of Resources: * Clone Set: ms-rsc [rsc] (promotable): * Promoted: [ node1 ] * Unpromoted: [ node2 ] * rsc1 (ocf:pacemaker:Dummy): Started node1 * rsc2 (ocf:pacemaker:Dummy): Started node1 * rsc3 (ocf:pacemaker:Dummy): Started node1 Transition Summary: - * Stop rsc:0 ( Promoted node1 ) due to node availability + * Stop rsc:0 ( Promoted node1 ) due to node availability * Promote rsc:1 ( Unpromoted -> Promoted node2 ) * Move rsc1 ( node1 -> node2 ) * Move rsc2 ( node1 -> node2 ) * Move rsc3 ( node1 -> node2 ) Executing Cluster Transition: * Resource action: rsc1 stop on node1 * Resource action: rsc2 stop on node1 * Resource action: rsc3 stop on node1 * Pseudo action: ms-rsc_demote_0 * Resource action: rsc:0 demote on node1 * Pseudo action: ms-rsc_demoted_0 * Pseudo action: ms-rsc_stop_0 * Resource action: rsc:0 stop on node1 * Pseudo action: ms-rsc_stopped_0 * Pseudo action: ms-rsc_promote_0 * Resource action: rsc:1 promote on node2 * Pseudo action: ms-rsc_promoted_0 * Resource action: rsc1 start on node2 * Resource action: rsc2 start on node2 * Resource action: rsc3 start on node2 Revised Cluster Status: * Node List: * Node node1: standby * Online: [ node2 ] * Full List of Resources: * Clone Set: ms-rsc [rsc] (promotable): * Promoted: [ node2 ] * Stopped: [ node1 ] * rsc1 (ocf:pacemaker:Dummy): Started node2 * rsc2 (ocf:pacemaker:Dummy): Started node2 * rsc3 (ocf:pacemaker:Dummy): Started node2 diff --git a/cts/scheduler/summary/ticket-promoted-14.summary b/cts/scheduler/summary/ticket-promoted-14.summary index ee8912b2e9..80ff84346b 100644 --- a/cts/scheduler/summary/ticket-promoted-14.summary +++ b/cts/scheduler/summary/ticket-promoted-14.summary @@ -1,31 +1,31 @@ Current cluster status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * Clone Set: ms1 [rsc1] (promotable): * Promoted: [ node1 ] * Unpromoted: [ node2 ] Transition Summary: - * Stop rsc1:0 ( Promoted node1 ) due to node availability - * Stop rsc1:1 ( Unpromoted node2 ) due to node availability + * Stop rsc1:0 ( Promoted node1 ) due to node availability + * Stop rsc1:1 ( Unpromoted node2 ) due to node availability Executing Cluster Transition: * Pseudo action: ms1_demote_0 * Resource action: rsc1:1 demote on node1 * Pseudo action: ms1_demoted_0 * Pseudo action: ms1_stop_0 * Resource action: rsc1:1 stop on node1 * Resource action: rsc1:0 stop on node2 * Pseudo action: ms1_stopped_0 Revised Cluster Status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * Clone Set: ms1 [rsc1] (promotable): * Stopped: [ node1 node2 ] diff --git a/cts/scheduler/summary/ticket-promoted-15.summary b/cts/scheduler/summary/ticket-promoted-15.summary index ee8912b2e9..80ff84346b 100644 --- a/cts/scheduler/summary/ticket-promoted-15.summary +++ b/cts/scheduler/summary/ticket-promoted-15.summary @@ -1,31 +1,31 @@ Current cluster status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * Clone Set: ms1 [rsc1] (promotable): * Promoted: [ node1 ] * Unpromoted: [ node2 ] Transition Summary: - * Stop rsc1:0 ( Promoted node1 ) due to node availability - * Stop rsc1:1 ( Unpromoted node2 ) due to node availability + * Stop rsc1:0 ( Promoted node1 ) due to node availability + * Stop rsc1:1 ( Unpromoted node2 ) due to node availability Executing Cluster Transition: * Pseudo action: ms1_demote_0 * Resource action: rsc1:1 demote on node1 * Pseudo action: ms1_demoted_0 * Pseudo action: ms1_stop_0 * Resource action: rsc1:1 stop on node1 * Resource action: rsc1:0 stop on node2 * Pseudo action: ms1_stopped_0 Revised Cluster Status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * Clone Set: ms1 [rsc1] (promotable): * Stopped: [ node1 node2 ] diff --git a/cts/scheduler/summary/ticket-promoted-21.summary b/cts/scheduler/summary/ticket-promoted-21.summary index f116a2eea0..788573facb 100644 --- a/cts/scheduler/summary/ticket-promoted-21.summary +++ b/cts/scheduler/summary/ticket-promoted-21.summary @@ -1,36 +1,36 @@ Current cluster status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * Clone Set: ms1 [rsc1] (promotable): * Promoted: [ node1 ] * Unpromoted: [ node2 ] Transition Summary: * Fence (reboot) node1 'deadman ticket was lost' * Move rsc_stonith ( node1 -> node2 ) - * Stop rsc1:0 ( Promoted node1 ) due to node availability + * Stop rsc1:0 ( Promoted node1 ) due to node availability Executing Cluster Transition: * Pseudo action: rsc_stonith_stop_0 * Pseudo action: ms1_demote_0 * Fencing node1 (reboot) * Resource action: rsc_stonith start on node2 * Pseudo action: rsc1:1_demote_0 * Pseudo action: ms1_demoted_0 * Pseudo action: ms1_stop_0 * Pseudo action: rsc1:1_stop_0 * Pseudo action: ms1_stopped_0 Revised Cluster Status: * Node List: * Online: [ node2 ] * OFFLINE: [ node1 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node2 * Clone Set: ms1 [rsc1] (promotable): * Unpromoted: [ node2 ] * Stopped: [ node1 ] diff --git a/cts/scheduler/summary/ticket-promoted-3.summary b/cts/scheduler/summary/ticket-promoted-3.summary index ee8912b2e9..80ff84346b 100644 --- a/cts/scheduler/summary/ticket-promoted-3.summary +++ b/cts/scheduler/summary/ticket-promoted-3.summary @@ -1,31 +1,31 @@ Current cluster status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * Clone Set: ms1 [rsc1] (promotable): * Promoted: [ node1 ] * Unpromoted: [ node2 ] Transition Summary: - * Stop rsc1:0 ( Promoted node1 ) due to node availability - * Stop rsc1:1 ( Unpromoted node2 ) due to node availability + * Stop rsc1:0 ( Promoted node1 ) due to node availability + * Stop rsc1:1 ( Unpromoted node2 ) due to node availability Executing Cluster Transition: * Pseudo action: ms1_demote_0 * Resource action: rsc1:1 demote on node1 * Pseudo action: ms1_demoted_0 * Pseudo action: ms1_stop_0 * Resource action: rsc1:1 stop on node1 * Resource action: rsc1:0 stop on node2 * Pseudo action: ms1_stopped_0 Revised Cluster Status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * Clone Set: ms1 [rsc1] (promotable): * Stopped: [ node1 node2 ] diff --git a/cts/scheduler/summary/ticket-promoted-9.summary b/cts/scheduler/summary/ticket-promoted-9.summary index f116a2eea0..788573facb 100644 --- a/cts/scheduler/summary/ticket-promoted-9.summary +++ b/cts/scheduler/summary/ticket-promoted-9.summary @@ -1,36 +1,36 @@ Current cluster status: * Node List: * Online: [ node1 node2 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node1 * Clone Set: ms1 [rsc1] (promotable): * Promoted: [ node1 ] * Unpromoted: [ node2 ] Transition Summary: * Fence (reboot) node1 'deadman ticket was lost' * Move rsc_stonith ( node1 -> node2 ) - * Stop rsc1:0 ( Promoted node1 ) due to node availability + * Stop rsc1:0 ( Promoted node1 ) due to node availability Executing Cluster Transition: * Pseudo action: rsc_stonith_stop_0 * Pseudo action: ms1_demote_0 * Fencing node1 (reboot) * Resource action: rsc_stonith start on node2 * Pseudo action: rsc1:1_demote_0 * Pseudo action: ms1_demoted_0 * Pseudo action: ms1_stop_0 * Pseudo action: rsc1:1_stop_0 * Pseudo action: ms1_stopped_0 Revised Cluster Status: * Node List: * Online: [ node2 ] * OFFLINE: [ node1 ] * Full List of Resources: * rsc_stonith (stonith:null): Started node2 * Clone Set: ms1 [rsc1] (promotable): * Unpromoted: [ node2 ] * Stopped: [ node1 ] diff --git a/cts/scheduler/summary/whitebox-ms-ordering-move.summary b/cts/scheduler/summary/whitebox-ms-ordering-move.summary index 6a5fb6eaeb..c9b13e032d 100644 --- a/cts/scheduler/summary/whitebox-ms-ordering-move.summary +++ b/cts/scheduler/summary/whitebox-ms-ordering-move.summary @@ -1,107 +1,107 @@ Current cluster status: * Node List: * Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * GuestOnline: [ lxc1@rhel7-1 lxc2@rhel7-1 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started rhel7-3 * FencingPass (stonith:fence_dummy): Started rhel7-4 * FencingFail (stonith:fence_dummy): Started rhel7-5 * rsc_rhel7-1 (ocf:heartbeat:IPaddr2): Started rhel7-1 * rsc_rhel7-2 (ocf:heartbeat:IPaddr2): Started rhel7-2 * rsc_rhel7-3 (ocf:heartbeat:IPaddr2): Started rhel7-3 * rsc_rhel7-4 (ocf:heartbeat:IPaddr2): Started rhel7-4 * rsc_rhel7-5 (ocf:heartbeat:IPaddr2): Started rhel7-5 * migrator (ocf:pacemaker:Dummy): Started rhel7-4 * Clone Set: Connectivity [ping-1]: * Started: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * Stopped: [ lxc1 lxc2 ] * Clone Set: master-1 [stateful-1] (promotable): * Promoted: [ rhel7-3 ] * Unpromoted: [ rhel7-1 rhel7-2 rhel7-4 rhel7-5 ] * Resource Group: group-1: * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel7-3 * petulant (service:DummySD): Started rhel7-3 * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel7-3 * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started rhel7-3 * container1 (ocf:heartbeat:VirtualDomain): Started rhel7-1 * container2 (ocf:heartbeat:VirtualDomain): Started rhel7-1 * Clone Set: lxc-ms-master [lxc-ms] (promotable): * Promoted: [ lxc1 ] * Unpromoted: [ lxc2 ] Transition Summary: * Move container1 ( rhel7-1 -> rhel7-2 ) - * Restart lxc-ms:0 ( Promoted lxc1 ) due to required container1 start + * Restart lxc-ms:0 ( Promoted lxc1 ) due to required container1 start * Move lxc1 ( rhel7-1 -> rhel7-2 ) Executing Cluster Transition: * Resource action: rsc_rhel7-1 monitor on lxc2 * Resource action: rsc_rhel7-2 monitor on lxc2 * Resource action: rsc_rhel7-3 monitor on lxc2 * Resource action: rsc_rhel7-4 monitor on lxc2 * Resource action: rsc_rhel7-5 monitor on lxc2 * Resource action: migrator monitor on lxc2 * Resource action: ping-1 monitor on lxc2 * Resource action: stateful-1 monitor on lxc2 * Resource action: r192.168.122.207 monitor on lxc2 * Resource action: petulant monitor on lxc2 * Resource action: r192.168.122.208 monitor on lxc2 * Resource action: lsb-dummy monitor on lxc2 * Pseudo action: lxc-ms-master_demote_0 * Resource action: lxc1 monitor on rhel7-5 * Resource action: lxc1 monitor on rhel7-4 * Resource action: lxc1 monitor on rhel7-3 * Resource action: lxc1 monitor on rhel7-2 * Resource action: lxc2 monitor on rhel7-5 * Resource action: lxc2 monitor on rhel7-4 * Resource action: lxc2 monitor on rhel7-3 * Resource action: lxc2 monitor on rhel7-2 * Resource action: lxc-ms demote on lxc1 * Pseudo action: lxc-ms-master_demoted_0 * Pseudo action: lxc-ms-master_stop_0 * Resource action: lxc-ms stop on lxc1 * Pseudo action: lxc-ms-master_stopped_0 * Pseudo action: lxc-ms-master_start_0 * Resource action: lxc1 stop on rhel7-1 * Resource action: container1 stop on rhel7-1 * Resource action: container1 start on rhel7-2 * Resource action: lxc1 start on rhel7-2 * Resource action: lxc-ms start on lxc1 * Pseudo action: lxc-ms-master_running_0 * Resource action: lxc1 monitor=30000 on rhel7-2 * Pseudo action: lxc-ms-master_promote_0 * Resource action: lxc-ms promote on lxc1 * Pseudo action: lxc-ms-master_promoted_0 Revised Cluster Status: * Node List: * Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * GuestOnline: [ lxc1@rhel7-2 lxc2@rhel7-1 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started rhel7-3 * FencingPass (stonith:fence_dummy): Started rhel7-4 * FencingFail (stonith:fence_dummy): Started rhel7-5 * rsc_rhel7-1 (ocf:heartbeat:IPaddr2): Started rhel7-1 * rsc_rhel7-2 (ocf:heartbeat:IPaddr2): Started rhel7-2 * rsc_rhel7-3 (ocf:heartbeat:IPaddr2): Started rhel7-3 * rsc_rhel7-4 (ocf:heartbeat:IPaddr2): Started rhel7-4 * rsc_rhel7-5 (ocf:heartbeat:IPaddr2): Started rhel7-5 * migrator (ocf:pacemaker:Dummy): Started rhel7-4 * Clone Set: Connectivity [ping-1]: * Started: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] * Stopped: [ lxc1 lxc2 ] * Clone Set: master-1 [stateful-1] (promotable): * Promoted: [ rhel7-3 ] * Unpromoted: [ rhel7-1 rhel7-2 rhel7-4 rhel7-5 ] * Resource Group: group-1: * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel7-3 * petulant (service:DummySD): Started rhel7-3 * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel7-3 * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started rhel7-3 * container1 (ocf:heartbeat:VirtualDomain): Started rhel7-2 * container2 (ocf:heartbeat:VirtualDomain): Started rhel7-1 * Clone Set: lxc-ms-master [lxc-ms] (promotable): * Promoted: [ lxc1 ] * Unpromoted: [ lxc2 ] diff --git a/cts/scheduler/summary/whitebox-ms-ordering.summary b/cts/scheduler/summary/whitebox-ms-ordering.summary index 921f6d068d..4d23221fa6 100644 --- a/cts/scheduler/summary/whitebox-ms-ordering.summary +++ b/cts/scheduler/summary/whitebox-ms-ordering.summary @@ -1,73 +1,73 @@ Current cluster status: * Node List: * Online: [ 18node1 18node2 18node3 ] * Full List of Resources: * shooter (stonith:fence_xvm): Started 18node2 * container1 (ocf:heartbeat:VirtualDomain): FAILED * container2 (ocf:heartbeat:VirtualDomain): FAILED * Clone Set: lxc-ms-master [lxc-ms] (promotable): * Stopped: [ 18node1 18node2 18node3 ] Transition Summary: * Fence (reboot) lxc2 (resource: container2) 'guest is unclean' * Fence (reboot) lxc1 (resource: container1) 'guest is unclean' * Start container1 ( 18node1 ) * Start container2 ( 18node1 ) - * Recover lxc-ms:0 ( Promoted lxc1 ) - * Recover lxc-ms:1 ( Unpromoted lxc2 ) + * Recover lxc-ms:0 ( Promoted lxc1 ) + * Recover lxc-ms:1 ( Unpromoted lxc2 ) * Start lxc1 ( 18node1 ) * Start lxc2 ( 18node1 ) Executing Cluster Transition: * Resource action: container1 monitor on 18node3 * Resource action: container1 monitor on 18node2 * Resource action: container1 monitor on 18node1 * Resource action: container2 monitor on 18node3 * Resource action: container2 monitor on 18node2 * Resource action: container2 monitor on 18node1 * Resource action: lxc-ms monitor on 18node3 * Resource action: lxc-ms monitor on 18node2 * Resource action: lxc-ms monitor on 18node1 * Pseudo action: lxc-ms-master_demote_0 * Resource action: lxc1 monitor on 18node3 * Resource action: lxc1 monitor on 18node2 * Resource action: lxc1 monitor on 18node1 * Resource action: lxc2 monitor on 18node3 * Resource action: lxc2 monitor on 18node2 * Resource action: lxc2 monitor on 18node1 * Pseudo action: stonith-lxc2-reboot on lxc2 * Pseudo action: stonith-lxc1-reboot on lxc1 * Resource action: container1 start on 18node1 * Resource action: container2 start on 18node1 * Pseudo action: lxc-ms_demote_0 * Pseudo action: lxc-ms-master_demoted_0 * Pseudo action: lxc-ms-master_stop_0 * Resource action: lxc1 start on 18node1 * Resource action: lxc2 start on 18node1 * Pseudo action: lxc-ms_stop_0 * Pseudo action: lxc-ms_stop_0 * Pseudo action: lxc-ms-master_stopped_0 * Pseudo action: lxc-ms-master_start_0 * Resource action: lxc1 monitor=30000 on 18node1 * Resource action: lxc2 monitor=30000 on 18node1 * Resource action: lxc-ms start on lxc1 * Resource action: lxc-ms start on lxc2 * Pseudo action: lxc-ms-master_running_0 * Resource action: lxc-ms monitor=10000 on lxc2 * Pseudo action: lxc-ms-master_promote_0 * Resource action: lxc-ms promote on lxc1 * Pseudo action: lxc-ms-master_promoted_0 Revised Cluster Status: * Node List: * Online: [ 18node1 18node2 18node3 ] * GuestOnline: [ lxc1@18node1 lxc2@18node1 ] * Full List of Resources: * shooter (stonith:fence_xvm): Started 18node2 * container1 (ocf:heartbeat:VirtualDomain): Started 18node1 * container2 (ocf:heartbeat:VirtualDomain): Started 18node1 * Clone Set: lxc-ms-master [lxc-ms] (promotable): * Promoted: [ lxc1 ] * Unpromoted: [ lxc2 ] diff --git a/cts/scheduler/summary/whitebox-orphan-ms.summary b/cts/scheduler/summary/whitebox-orphan-ms.summary index 0d0007dcc6..7e1b45b272 100644 --- a/cts/scheduler/summary/whitebox-orphan-ms.summary +++ b/cts/scheduler/summary/whitebox-orphan-ms.summary @@ -1,87 +1,87 @@ Current cluster status: * Node List: * Online: [ 18node1 18node2 18node3 ] * GuestOnline: [ lxc1@18node1 lxc2@18node1 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started 18node2 * FencingPass (stonith:fence_dummy): Started 18node3 * FencingFail (stonith:fence_dummy): Started 18node3 * rsc_18node1 (ocf:heartbeat:IPaddr2): Started 18node1 * rsc_18node2 (ocf:heartbeat:IPaddr2): Started 18node2 * rsc_18node3 (ocf:heartbeat:IPaddr2): Started 18node3 * migrator (ocf:pacemaker:Dummy): Started 18node1 * Clone Set: Connectivity [ping-1]: * Started: [ 18node1 18node2 18node3 ] * Clone Set: master-1 [stateful-1] (promotable): * Promoted: [ 18node1 ] * Unpromoted: [ 18node2 18node3 ] * Resource Group: group-1: * r192.168.122.87 (ocf:heartbeat:IPaddr2): Started 18node1 * r192.168.122.88 (ocf:heartbeat:IPaddr2): Started 18node1 * r192.168.122.89 (ocf:heartbeat:IPaddr2): Started 18node1 * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started 18node1 * container2 (ocf:heartbeat:VirtualDomain): ORPHANED Started 18node1 * lxc1 (ocf:pacemaker:remote): ORPHANED Started 18node1 * lxc-ms (ocf:pacemaker:Stateful): ORPHANED Promoted [ lxc1 lxc2 ] * lxc2 (ocf:pacemaker:remote): ORPHANED Started 18node1 * container1 (ocf:heartbeat:VirtualDomain): ORPHANED Started 18node1 Transition Summary: * Move FencingFail ( 18node3 -> 18node1 ) * Stop container2 ( 18node1 ) due to node availability * Stop lxc1 ( 18node1 ) due to node availability - * Stop lxc-ms ( Promoted lxc1 ) due to node availability - * Stop lxc-ms ( Promoted lxc2 ) due to node availability + * Stop lxc-ms ( Promoted lxc1 ) due to node availability + * Stop lxc-ms ( Promoted lxc2 ) due to node availability * Stop lxc2 ( 18node1 ) due to node availability * Stop container1 ( 18node1 ) due to node availability Executing Cluster Transition: * Resource action: FencingFail stop on 18node3 * Resource action: lxc-ms demote on lxc2 * Resource action: lxc-ms demote on lxc1 * Resource action: FencingFail start on 18node1 * Resource action: lxc-ms stop on lxc2 * Resource action: lxc-ms stop on lxc1 * Resource action: lxc-ms delete on 18node3 * Resource action: lxc-ms delete on 18node2 * Resource action: lxc-ms delete on 18node1 * Resource action: lxc2 stop on 18node1 * Resource action: lxc2 delete on 18node3 * Resource action: lxc2 delete on 18node2 * Resource action: lxc2 delete on 18node1 * Resource action: container2 stop on 18node1 * Resource action: container2 delete on 18node3 * Resource action: container2 delete on 18node2 * Resource action: container2 delete on 18node1 * Resource action: lxc1 stop on 18node1 * Resource action: lxc1 delete on 18node3 * Resource action: lxc1 delete on 18node2 * Resource action: lxc1 delete on 18node1 * Resource action: container1 stop on 18node1 * Resource action: container1 delete on 18node3 * Resource action: container1 delete on 18node2 * Resource action: container1 delete on 18node1 Revised Cluster Status: * Node List: * Online: [ 18node1 18node2 18node3 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started 18node2 * FencingPass (stonith:fence_dummy): Started 18node3 * FencingFail (stonith:fence_dummy): Started 18node1 * rsc_18node1 (ocf:heartbeat:IPaddr2): Started 18node1 * rsc_18node2 (ocf:heartbeat:IPaddr2): Started 18node2 * rsc_18node3 (ocf:heartbeat:IPaddr2): Started 18node3 * migrator (ocf:pacemaker:Dummy): Started 18node1 * Clone Set: Connectivity [ping-1]: * Started: [ 18node1 18node2 18node3 ] * Clone Set: master-1 [stateful-1] (promotable): * Promoted: [ 18node1 ] * Unpromoted: [ 18node2 18node3 ] * Resource Group: group-1: * r192.168.122.87 (ocf:heartbeat:IPaddr2): Started 18node1 * r192.168.122.88 (ocf:heartbeat:IPaddr2): Started 18node1 * r192.168.122.89 (ocf:heartbeat:IPaddr2): Started 18node1 * lsb-dummy (lsb:/usr/share/pacemaker/tests/cts/LSBDummy): Started 18node1 diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 081a337c52..9fefa15a64 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -1,1977 +1,1986 @@ /* * Copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // Check whether we have a high-resolution monotonic clock #undef PCMK__TIME_USE_CGT #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC) # define PCMK__TIME_USE_CGT # include /* clock_gettime */ #endif #include #include #include #include #include #include #include #include "pacemaker-execd.h" #define EXIT_REASON_MAX_LEN 128 GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; guint interval_ms; int start_delay; int timeout_orig; int call_id; int exec_rc; int lrmd_op_status; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *exit_reason; char *output; char *userdata_str; /* We can track operation queue time and run time, to be saved with the CIB * resource history (and displayed in cluster status). We need * high-resolution monotonic time for this purpose, so we use * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature * is disabled). * * However, we also need epoch timestamps for recording the time the command * last ran and the time its return value last changed, for use in time * displays (as opposed to interval calculations). We keep time_t values for * this purpose. * * The last run time is used for both purposes, so we keep redundant * monotonic and epoch values for this. Technically the two could represent * different times, but since time_t has only second resolution and the * values are used for distinct purposes, that is not significant. */ #ifdef PCMK__TIME_USE_CGT /* Recurring and systemd operations may involve more than one executor * command per operation, so they need info about the original and the most * recent. */ struct timespec t_first_run; // When op first ran struct timespec t_run; // When op most recently ran struct timespec t_first_queue; // When op was first queued struct timespec t_queue; // When op was most recently queued #endif time_t epoch_last_run; // Epoch timestamp of when op last ran time_t epoch_rcchange; // Epoch timestamp of when rc last changed - int first_notify_sent; + bool first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); #ifdef PCMK__TIME_USE_CGT /*! * \internal * \brief Check whether a struct timespec has been set * * \param[in] timespec Time to check * * \return true if timespec has been set (i.e. is nonzero), false otherwise */ static inline bool time_is_set(struct timespec *timespec) { return (timespec != NULL) && ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0)); } /* * \internal * \brief Set a timespec (and its original if unset) to the current time * * \param[out] t_current Where to store current time * \param[out] t_orig Where to copy t_current if unset */ static void get_current_time(struct timespec *t_current, struct timespec *t_orig) { clock_gettime(CLOCK_MONOTONIC, t_current); if ((t_orig != NULL) && !time_is_set(t_orig)) { *t_orig = *t_current; } } /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or unset) * * \note Can overflow on 32bit machines when the differences is around * 24 days or more. */ static int time_diff_ms(struct timespec *now, struct timespec *old) { int diff_ms = 0; if (time_is_set(old)) { struct timespec local_now = { 0, }; if (now == NULL) { clock_gettime(CLOCK_MONOTONIC, &local_now); now = &local_now; } diff_ms = (now->tv_sec - old->tv_sec) * 1000 + (now->tv_nsec - old->tv_nsec) / 1000000; } return diff_ms; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * \param[in] cmd Executor command object to reset * * \note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif +static inline bool +action_matches(lrmd_cmd_t *cmd, const char *action, guint interval_ms) +{ + return (cmd->interval_ms == interval_ms) + && pcmk__str_eq(cmd->action, action, pcmk__str_casei); +} + static void log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time) { char pid_str[32] = { 0, }; int log_level = LOG_INFO; if (cmd->last_pid) { snprintf(pid_str, 32, "%d", cmd->last_pid); } if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { log_level = LOG_DEBUG; } #ifdef PCMK__TIME_USE_CGT do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d" " (execution time %dms, queue time %dms)", cmd->rsc_id, cmd->action, cmd->call_id, (cmd->last_pid? ", PID " : ""), pid_str, cmd->exec_rc, exec_time, queue_time); #else do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d" cmd->rsc_id, cmd->action, cmd->call_id, (cmd->last_pid? ", PID " : ""), pid_str, cmd->exec_rc); #endif } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (pcmk__str_eq(action, "monitor", pcmk__str_casei) && pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { return "status"; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running" return rsc; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) { crm_debug("Setting flag to leave pid group on timeout and " "only kill action pid for " PCMK__OP_FMT, cmd->rsc_id, cmd->action, cmd->interval_ms); cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", cmd->action, 0, SVC_ACTION_LEAVE_GROUP, "SVC_ACTION_LEAVE_GROUP"); } return cmd; } static void stop_recurring_timer(lrmd_cmd_t *cmd) { if (cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = 0; } } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { stop_recurring_timer(cmd); if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->output); free(cmd->exit_reason); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); return FALSE; } static inline void start_recurring_timer(lrmd_cmd_t *cmd) { if (cmd && (cmd->interval_ms > 0)) { cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms, stonith_recurring_op_helper, cmd); } } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } -static gboolean +/*! + * \internal + * \brief Check whether a list already contains the equivalent of a given action + */ +static lrmd_cmd_t * +find_duplicate_action(GList *action_list, lrmd_cmd_t *cmd) +{ + for (GList *item = action_list; item != NULL; item = item->next) { + lrmd_cmd_t *dup = item->data; + + if (action_matches(cmd, dup->action, dup->interval_ms)) { + return dup; + } + } + return NULL; +} + +static bool merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { - GList *gIter = NULL; lrmd_cmd_t * dup = NULL; - gboolean dup_pending = FALSE; + bool dup_pending = true; if (cmd->interval_ms == 0) { - return 0; + return false; } - for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { - dup = gIter->data; - if (pcmk__str_eq(cmd->action, dup->action, pcmk__str_casei) - && (cmd->interval_ms == dup->interval_ms)) { - dup_pending = TRUE; - goto merge_dup; + // Search for a duplicate of this action (in-flight or not) + dup = find_duplicate_action(rsc->pending_ops, cmd); + if (dup == NULL) { + dup_pending = false; + dup = find_duplicate_action(rsc->recurring_ops, cmd); + if (dup == NULL) { + return false; } } - /* if dup is in recurring_ops list, that means it has already executed - * and is in the interval loop. we can't just remove it in this case. */ - for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { - dup = gIter->data; - if (pcmk__str_eq(cmd->action, dup->action, pcmk__str_casei) - && (cmd->interval_ms == dup->interval_ms)) { - if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - if (dup->lrmd_op_status == PCMK_LRM_OP_CANCELLED) { - /* Fencing monitors marked for cancellation will not be merged to respond to cancellation. */ - return FALSE; - } - } - goto merge_dup; - } + /* Do not merge fencing monitors marked for cancellation, so we can reply to + * the cancellation separately. + */ + if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, + pcmk__str_casei) + && (dup->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { + return false; } - return FALSE; -merge_dup: - - /* This should not occur. If it does, we need to investigate how something * like this is possible in the controller. */ crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT "), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); - /* merge */ - dup->first_notify_sent = 0; + // Merge new action's call ID and user data into existing action + dup->first_notify_sent = false; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; + free_lrmd_cmd(cmd); + cmd = NULL; - if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - /* if we are waiting for the next interval, kick it off now */ - if (dup_pending == TRUE) { - stop_recurring_timer(cmd); - stonith_recurring_op_helper(cmd); + /* If dup is not pending, that means it has already executed at least once + * and is waiting in the interval. In that case, stop waiting and initiate + * a new instance now. + */ + if (!dup_pending) { + if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, + pcmk__str_casei)) { + stop_recurring_timer(dup); + stonith_recurring_op_helper(dup); + } else { + services_action_kick(rsc->rsc_id, + normalize_action_name(rsc, dup->action), + dup->interval_ms); } - - } else if (dup_pending == FALSE) { - /* if we've already handed this to the service lib, kick off an early execution */ - services_action_kick(rsc->rsc_id, - normalize_action_name(rsc, dup->action), - dup->interval_ms); } - free_lrmd_cmd(cmd); - - return TRUE; + return true; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { - gboolean dup_processed = FALSE; CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); - dup_processed = merge_recurring_duplicate(rsc, cmd); - if (dup_processed) { - /* duplicate recurring cmd found, cmds merged */ + if (merge_recurring_duplicate(rsc, cmd)) { + // Equivalent of cmd has already been scheduled return; } /* The controller expects the executor to automatically cancel * recurring operations before a resource stops. */ if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static xmlNode * create_lrmd_reply(const char *origin, int rc, int call_id) { xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, origin); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); return reply; } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; int rc; int log_level = LOG_WARNING; const char *msg = NULL; CRM_CHECK(client != NULL, return); if (client->name == NULL) { crm_trace("Skipping notification to client without name"); return; } if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) { /* We only want to notify clients of the executor IPC API. If we are * running as Pacemaker Remote, we may have clients proxied to other * IPC services in the cluster, so skip those. */ crm_trace("Skipping executor API notification to client %s", pcmk__client_name(client)); return; } rc = lrmd_server_send_notify(client, update_msg); if (rc == pcmk_rc_ok) { return; } switch (rc) { case ENOTCONN: case EPIPE: // Client exited without waiting for notification log_level = LOG_INFO; msg = "Disconnected"; break; default: msg = pcmk_rc_str(rc); break; } do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d", pcmk__client_name(client), msg, rc); } static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { xmlNode *notify = NULL; #ifdef PCMK__TIME_USE_CGT int exec_time = time_diff_ms(NULL, &(cmd->t_run)); int queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue)); log_finished(cmd, exec_time, queue_time); #else log_finished(cmd, 0, 0); #endif /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if (cmd->last_notify_rc == cmd->exec_rc && cmd->last_notify_op_status == cmd->lrmd_op_status) { /* only send changes */ return; } } - cmd->first_notify_sent = 1; + cmd->first_notify_sent = true; cmd->last_notify_rc = cmd->exec_rc; cmd->last_notify_op_status = cmd->lrmd_op_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME, (long long) cmd->epoch_last_run); crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME, (long long) cmd->epoch_rcchange); #ifdef PCMK__TIME_USE_CGT crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason); if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else { pcmk__foreach_ipc_client(send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (pcmk__ipc_client_count() != 0) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); pcmk__foreach_ipc_client(send_client_notify, notify); free_xml(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->lrmd_op_status = 0; cmd->last_pid = 0; #ifdef PCMK__TIME_USE_CGT memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); #endif cmd->epoch_last_run = 0; free(cmd->exit_reason); cmd->exit_reason = NULL; free(cmd->output); cmd->output = NULL; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if (cmd->interval_ms && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval_ms == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } static int ocf2uniform_rc(int rc) { switch (rc) { case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: break; default: if (rc < 0 || rc > PCMK_OCF_FAILED_PROMOTED) { return PCMK_OCF_UNKNOWN_ERROR; } } return rc; } static int stonith2uniform_rc(const char *action, int rc) { switch (rc) { case pcmk_ok: rc = PCMK_OCF_OK; break; case -ENODEV: /* This should be possible only for probes in practice, but * interpret for all actions to be safe. */ if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) { rc = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(action, "stop", pcmk__str_casei)) { rc = PCMK_OCF_OK; } else { rc = PCMK_OCF_NOT_INSTALLED; } break; case -EOPNOTSUPP: rc = PCMK_OCF_UNIMPLEMENT_FEATURE; break; case -ETIME: case -ETIMEDOUT: rc = PCMK_OCF_TIMEOUT; break; default: rc = PCMK_OCF_UNKNOWN_ERROR; break; } return rc; } #if SUPPORT_NAGIOS static int nagios2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } switch (rc) { case NAGIOS_STATE_OK: return PCMK_OCF_OK; case NAGIOS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case NAGIOS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case NAGIOS_STATE_WARNING: case NAGIOS_STATE_CRITICAL: case NAGIOS_STATE_UNKNOWN: case NAGIOS_STATE_DEPENDENT: default: return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; } #endif static int get_uniform_rc(const char *standard, const char *action, int rc) { if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) { return ocf2uniform_rc(rc); } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { return stonith2uniform_rc(action, rc); } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { return rc; } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_casei)) { return rc; #if SUPPORT_NAGIOS } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return nagios2uniform_rc(action, rc); #endif } else { return services_get_ocf_exitcode(action, rc); } } static int action_get_uniform_rc(svc_action_t * action) { lrmd_cmd_t *cmd = action->cb_data; return get_uniform_rc(action->standard, cmd->action, action->rc); } struct notify_new_client_data { xmlNode *notify; pcmk__client_t *new_client; }; static void notify_one_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *client = value; struct notify_new_client_data *data = user_data; if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) { send_client_notify(key, (gpointer) client, (gpointer) data->notify); } } void notify_of_new_client(pcmk__client_t *new_client) { struct notify_new_client_data data; data.new_client = new_client; data.notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__); crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); pcmk__foreach_ipc_client(notify_one_client, &data); free_xml(data.notify); } static char * parse_exit_reason(const char *output) { const char *cur = NULL; const char *last = NULL; static int cookie_len = 0; char *eol = NULL; size_t reason_len = EXIT_REASON_MAX_LEN; if (output == NULL) { return NULL; } if (!cookie_len) { cookie_len = strlen(PCMK_OCF_REASON_PREFIX); } cur = strstr(output, PCMK_OCF_REASON_PREFIX); for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { /* skip over the cookie delimiter string */ cur += cookie_len; last = cur; } if (last == NULL) { return NULL; } // Truncate everything after a new line, and limit reason string size eol = strchr(last, '\n'); if (eol) { reason_len = QB_MIN(reason_len, eol - last); } return strndup(last, reason_len); } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; const char *rclass = NULL; #ifdef PCMK__TIME_USE_CGT bool goagain = false; #endif if (!cmd) { crm_err("Completed executor action (%s) does not match any known operations", action->id); return; } #ifdef PCMK__TIME_USE_CGT if (cmd->exec_rc != action->rc) { cmd->epoch_rcchange = time(NULL); } #endif cmd->last_pid = action->pid; cmd->exec_rc = action_get_uniform_rc(action); cmd->lrmd_op_status = action->status; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { rclass = resources_find_service_class(rsc->type); } else if(rsc) { rclass = rsc->class; } #ifdef PCMK__TIME_USE_CGT if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { if ((cmd->exec_rc == PCMK_OCF_OK) && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) { /* systemd returns from start and stop actions after the action * begins, not after it completes. We have to jump through a few * hoops so that we don't report 'complete' to the rest of pacemaker * until it's actually done. */ goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if (cmd->real_action != NULL) { // This is follow-up monitor to check whether start/stop completed if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE) && (cmd->exec_rc == PCMK_OCF_PENDING)) { goagain = true; } else if ((cmd->exec_rc == PCMK_OCF_OK) && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { goagain = true; } else { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s systemd %s is now complete (elapsed=%dms, " "remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc); cmd_original_times(cmd); // Monitors may return "not running", but start/stop shouldn't if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE) && (cmd->exec_rc == PCMK_OCF_NOT_RUNNING)) { if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) { cmd->exec_rc = PCMK_OCF_UNKNOWN_ERROR; } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { cmd->exec_rc = PCMK_OCF_OK; } } } } } #endif #if SUPPORT_NAGIOS if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { - if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei) && - (cmd->interval_ms == 0) && cmd->exec_rc == PCMK_OCF_OK) { + if (action_matches(cmd, "monitor", 0) + && (cmd->exec_rc == PCMK_OCF_OK)) { /* Successfully executed --version for the nagios plugin */ cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei) && cmd->exec_rc != PCMK_OCF_OK) { #ifdef PCMK__TIME_USE_CGT goagain = true; #endif } } #endif #ifdef PCMK__TIME_USE_CGT if (goagain) { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; if(delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if(cmd->exec_rc == PCMK_OCF_OK) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if(cmd->exec_rc == PCMK_OCF_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay); } cmd_reset(cmd); if(rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left); cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; cmd->exec_rc = PCMK_OCF_TIMEOUT; cmd_original_times(cmd); } } #endif if (action->stderr_data) { cmd->output = strdup(action->stderr_data); cmd->exit_reason = parse_exit_reason(action->stderr_data); } else if (action->stdout_data) { cmd->output = strdup(action->stdout_data); } cmd_finalize(cmd, rsc); } /*! * \internal * \brief Determine operation status of a stonith operation * * Non-stonith resource operations get their operation status directly from the * service library, but the fencer does not have an equivalent, so we must infer * an operation status from the fencer API's return code. * * \param[in] action Name of action performed on stonith resource * \param[in] interval_ms Action interval * \param[in] rc Action result from fencer * * \return Operation status corresponding to fencer API return code */ static int stonith_rc2status(const char *action, guint interval_ms, int rc) { int status = PCMK_LRM_OP_DONE; switch (rc) { case pcmk_ok: break; case -EOPNOTSUPP: case -EPROTONOSUPPORT: status = PCMK_LRM_OP_NOTSUPPORTED; break; case -ETIME: case -ETIMEDOUT: status = PCMK_LRM_OP_TIMEOUT; break; case -ENOTCONN: case -ECOMM: // Couldn't talk to fencer status = PCMK_LRM_OP_ERROR; break; case -ENODEV: // The device is not registered with the fencer status = PCMK_LRM_OP_ERROR; break; default: break; } return status; } static void stonith_action_complete(lrmd_cmd_t * cmd, int rc) { // This can be NULL if resource was removed before command completed lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); cmd->exec_rc = stonith2uniform_rc(cmd->action, rc); /* This function may be called with status already set to cancelled, if a * pending action was aborted. Otherwise, we need to determine status from * the fencer return code. */ if (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED) { cmd->lrmd_op_status = stonith_rc2status(cmd->action, cmd->interval_ms, rc); // Certain successful actions change the known state of the resource if (rsc && (cmd->exec_rc == PCMK_OCF_OK)) { if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING } } } /* The recurring timer should not be running at this point in any case, but * as a failsafe, stop it if it is. */ stop_recurring_timer(cmd); /* Reschedule this command if appropriate. If a recurring command is *not* * rescheduled, its status must be PCMK_LRM_OP_CANCELLED, otherwise it will * not be removed from recurring_ops by cmd_finalize(). */ if (rsc && (cmd->interval_ms > 0) && (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED)) { start_recurring_timer(cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { /* If we registered this fence device, we don't know whether the * fencer still has the registration or not. Cause future probes to * return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or * started successfully. This is especially important if the * controller also went away (possibly due to a cluster layer * restart) and won't receive our client notification of any * monitors finalized below. */ if (rsc->st_probe_rc == pcmk_ok) { rsc->st_probe_rc = pcmk_err_generic; } if (rsc->active) { cmd_list = g_list_append(cmd_list, rsc->active); } if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("Connection to fencer failed, finalizing %d pending operations", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } /*! * \internal * \brief Execute a stonith resource "start" action * * Start a stonith resource by registering it with the fencer. * (Stonith agents don't have a start command.) * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to start * \param[in] cmd Start command to execute * * \return pcmk_ok on success, -errno otherwise */ static int execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; int rc = pcmk_ok; // Convert command parameters to stonith API key/values if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* The fencer will automatically register devices via CIB notifications * when the CIB changes, but to avoid a possible race condition between * the fencer receiving the notification and the executor requesting that * resource, the executor registers the device as well. The fencer knows how * to handle duplicate registrations. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); return rc; } /*! * \internal * \brief Execute a stonith resource "stop" action * * Stop a stonith resource by unregistering it with the fencer. * (Stonith agents don't have a stop command.) * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to stop * * \return pcmk_ok on success, -errno otherwise */ static inline int execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc) { /* @TODO Failure would indicate a problem communicating with fencer; * perhaps we should try reconnecting and retrying a few times? */ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, rsc->rsc_id); } /*! * \internal * \brief Initiate a stonith resource agent recurring "monitor" action * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to monitor * \param[in] cmd Monitor command being executed * * \return pcmk_ok if monitor was successfully initiated, -errno otherwise */ static inline int execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); if (rc == TRUE) { rsc->active = cmd; rc = pcmk_ok; } else { rc = -pcmk_err_generic; } return rc; } static void lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; bool do_monitor = FALSE; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { rc = -ENOTCONN; } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { rc = execd_stonith_start(stonith_api, rsc, cmd); if (rc == 0) { do_monitor = TRUE; } } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { rc = execd_stonith_stop(stonith_api, rsc); } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { if (cmd->interval_ms > 0) { do_monitor = TRUE; } else { rc = rsc->st_probe_rc; } } if (do_monitor) { rc = execd_stonith_monitor(stonith_api, rsc, cmd); if (rc == pcmk_ok) { // Don't clean up yet, we will find out result of the monitor later return; } } stonith_action_complete(cmd, rc); } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei) && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { cmd->exec_rc = PCMK_OCF_OK; goto exec_done; } #endif params_copy = pcmk__str_table_dup(cmd->params); action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval_ms, cmd->timeout, params_copy, cmd->service_flags); if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } action->cb_data = cmd; /* 'cmd' may not be valid after this point if * services_action_async() returned TRUE * * Upstart and systemd both synchronously determine monitor/status * results and call action_complete (which may free 'cmd') if necessary. */ if (services_action_async(action, action_complete)) { return TRUE; } cmd->exec_rc = action->rc; if(action->status != PCMK_LRM_OP_DONE) { cmd->lrmd_op_status = action->status; } else { cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_run), &(cmd->t_first_run)); #endif cmd->epoch_last_run = time(NULL); } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval_ms) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GList *gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei); gIter = rsc->pending_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval_ms); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, xmlNode **reply) { int rc = pcmk_ok; const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_MIN_PROTOCOL_VERSION, protocol_version); rc = -EPROTO; } if (crm_is_true(is_ipc_provider)) { #ifdef PCMK__COMPILE_REMOTE if ((client->remote != NULL) && client->remote->tls_handshake_complete) { // This is a remote connection from a cluster node's controller ipc_proxy_add_provider(client); } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif } *reply = create_lrmd_reply(__func__, rc, call_id); crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(*reply, F_LRMD_CLIENTID, client->id); crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); return rc; } static int process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) && pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) { crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Cached agent information for '%s'", rsc->rsc_id); return rc; } static xmlNode * process_lrmd_get_rsc_info(xmlNode *request, int call_id) { int rc = pcmk_ok; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; if (rsc_id == NULL) { rc = -ENODEV; } else { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Agent information for '%s' not in cache", rsc_id); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } return reply; } static int process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Ignoring unregistration of resource '%s', which is not registered", rsc_id); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation (0x%p) still in progress for unregistered resource %s", rsc->active, rsc_id); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, guint interval_ms) { GList *gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; - if (pcmk__str_eq(cmd->action, action, pcmk__str_casei) - && (cmd->interval_ms == interval_ms)) { - + if (action_matches(cmd, action, interval_ms)) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; - if (pcmk__str_eq(cmd->action, action, pcmk__str_casei) - && (cmd->interval_ms == interval_ms)) { - + if (action_matches(cmd, action, interval_ms)) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval_ms) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval_ms == 0) { continue; } if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); guint interval_ms = 0; crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval_ms); } static void add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc) { xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC); crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id); for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) { lrmd_cmd_t *cmd = item->data; xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP); crm_xml_add(op_xml, F_LRMD_RSC_ACTION, (cmd->real_action? cmd->real_action : cmd->action)); crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig); } } static xmlNode * process_lrmd_get_recurring(xmlNode *request, int call_id) { int rc = pcmk_ok; const char *rsc_id = NULL; lrmd_rsc_t *rsc = NULL; xmlNode *reply = NULL; xmlNode *rsc_xml = NULL; // Resource ID is optional rsc_xml = first_named_child(request, F_LRMD_CALLDATA); if (rsc_xml) { rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC); } if (rsc_xml) { rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); } // If resource ID is specified, resource must exist if (rsc_id != NULL) { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); // If resource ID is not specified, check all resources if (rsc_id == NULL) { GHashTableIter iter; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rsc)) { add_recurring_op_xml(reply, rsc); } } else if (rsc) { add_recurring_op_xml(reply, rsc); } return reply; } void process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; xmlNode *reply = NULL; /* Certain IPC commands may be done only by privileged users (i.e. root or * hacluster), because they would otherwise provide a means of bypassing * ACLs. */ bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged); crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) { #ifdef PCMK__COMPILE_REMOTE if (allowed) { ipc_proxy_forward_client(client, request); } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif do_reply = 1; } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { rc = process_lrmd_signon(client, request, call_id, &reply); do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_rsc_info(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_cancel(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) { do_notify = 1; do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) { if (allowed) { xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); CRM_LOG_ASSERT(data != NULL); pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG)); } else { rc = -EACCES; } } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_alert_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_recurring(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown IPC request '%s' from client %s", op, pcmk__client_name(client)); } if (rc == -EACCES) { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", op, pcmk__client_name(client)); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { int send_rc = pcmk_rc_ok; if (reply == NULL) { reply = create_lrmd_reply(__func__, rc, call_id); } send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc != pcmk_rc_ok) { crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d", pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc); } } if (do_notify) { send_generic_notify(rc, request); } } diff --git a/extra/alerts/alert_file.sh.sample b/extra/alerts/alert_file.sh.sample index 7844c77e60..f6c2211513 100644 --- a/extra/alerts/alert_file.sh.sample +++ b/extra/alerts/alert_file.sh.sample @@ -1,109 +1,122 @@ #!/bin/sh # # Copyright 2015-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # ############################################################################## # Sample configuration (cib fragment in xml notation) # ================================ # # # # # # # # # # # # # +# Explicitly list all environment variables used, to make static analysis happy +: ${CRM_alert_version:=""} +: ${CRM_alert_recipient:=""} +: ${CRM_alert_node_sequence:=""} +: ${CRM_alert_timestamp:=""} +: ${CRM_alert_kind:=""} +: ${CRM_alert_node:=""} +: ${CRM_alert_desc:=""} +: ${CRM_alert_task:=""} +: ${CRM_alert_rsc:=""} +: ${CRM_alert_attribute_name:=""} +: ${CRM_alert_attribute_value:=""} + # No one will probably ever see this echo, unless they run the script manually. # An alternative would be to log to the system log, or similar. (We can't send # this to the configured recipient, because that variable won't be defined in # this case either.) if [ -z $CRM_alert_version ]; then echo "$0 must be run by Pacemaker version 1.1.15 or later" exit 0 fi # Alert agents must always handle the case where no recipients are defined, # even if it's a no-op (a recipient might be added to the configuration later). if [ -z "${CRM_alert_recipient}" ]; then echo "$0 requires a recipient configured with a full filename path" exit 0 fi debug_exec_order_default="false" # Pacemaker passes instance attributes to alert agents as environment variables. # It is completely up to the agent what instance attributes to support. # Here, we define an instance attribute "debug_exec_order". : ${debug_exec_order=${debug_exec_order_default}} if [ "${debug_exec_order}" = "true" ]; then tstamp=`printf "%04d. " "$CRM_alert_node_sequence"` if [ ! -z "$CRM_alert_timestamp" ]; then tstamp="${tstamp} $CRM_alert_timestamp (`date "+%H:%M:%S.%06N"`): " fi else if [ ! -z "$CRM_alert_timestamp" ]; then tstamp="$CRM_alert_timestamp: " fi fi case $CRM_alert_kind in node) echo "${tstamp}Node '${CRM_alert_node}' is now '${CRM_alert_desc}'" >> "${CRM_alert_recipient}" ;; fencing) # Other keys: # # CRM_alert_node # CRM_alert_task # CRM_alert_rc # echo "${tstamp}Fencing ${CRM_alert_desc}" >> "${CRM_alert_recipient}" ;; resource) # Other keys: # # CRM_alert_target_rc # CRM_alert_status # CRM_alert_rc # if [ ${CRM_alert_interval} = "0" ]; then CRM_alert_interval="" else CRM_alert_interval=" (${CRM_alert_interval})" fi if [ ${CRM_alert_target_rc} = "0" ]; then CRM_alert_target_rc="" else CRM_alert_target_rc=" (target: ${CRM_alert_target_rc})" fi case ${CRM_alert_desc} in Cancelled) ;; *) echo "${tstamp}Resource operation '${CRM_alert_task}${CRM_alert_interval}' for '${CRM_alert_rsc}' on '${CRM_alert_node}': ${CRM_alert_desc}${CRM_alert_target_rc}" >> "${CRM_alert_recipient}" ;; esac ;; attribute) # echo "${tstamp}Attribute '${CRM_alert_attribute_name}' on node '${CRM_alert_node}' was updated to '${CRM_alert_attribute_value}'" >> "${CRM_alert_recipient}" ;; *) echo "${tstamp}Unhandled $CRM_alert_kind alert" >> "${CRM_alert_recipient}" env | grep CRM_alert >> "${CRM_alert_recipient}" ;; esac diff --git a/extra/alerts/alert_smtp.sh.sample b/extra/alerts/alert_smtp.sh.sample index 5da83ade4a..62bfc41517 100644 --- a/extra/alerts/alert_smtp.sh.sample +++ b/extra/alerts/alert_smtp.sh.sample @@ -1,106 +1,118 @@ #!/bin/sh # -# Copyright 2016-2017 the Pacemaker project contributors +# Copyright 2016-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # ############################################################################## # # Sample configuration (cib fragment in xml notation) # ================================ # # # # # # # # # # # # +# Explicitly list all environment variables used, to make static analysis happy +: ${CRM_alert_version:=""} +: ${CRM_alert_recipient:=""} +: ${CRM_alert_timestamp:=""} +: ${CRM_alert_kind:=""} +: ${CRM_alert_node:=""} +: ${CRM_alert_desc:=""} +: ${CRM_alert_task:=""} +: ${CRM_alert_rsc:=""} +: ${CRM_alert_attribute_name:=""} +: ${CRM_alert_attribute_value:=""} + email_client_default="sendmail" email_sender_default="hacluster" email_recipient_default="root" : ${email_client=${email_client_default}} : ${email_sender=${email_sender_default}} email_recipient="${CRM_alert_recipient-${email_recipient_default}}" node_name=`uname -n` cluster_name=`crm_attribute --query -n cluster-name -q` email_body=`env | grep CRM_alert_` if [ ! -z "${email_sender##*@*}" ]; then email_sender="${email_sender}@${node_name}" fi if [ ! -z "${email_recipient##*@*}" ]; then email_recipient="${email_recipient}@${node_name}" fi if [ -z ${CRM_alert_version} ]; then email_subject="Pacemaker version 1.1.15 or later is required for alerts" else case ${CRM_alert_kind} in node) email_subject="${CRM_alert_timestamp} ${cluster_name}: Node '${CRM_alert_node}' is now '${CRM_alert_desc}'" ;; fencing) email_subject="${CRM_alert_timestamp} ${cluster_name}: Fencing ${CRM_alert_desc}" ;; resource) if [ ${CRM_alert_interval} = "0" ]; then CRM_alert_interval="" else CRM_alert_interval=" (${CRM_alert_interval})" fi if [ ${CRM_alert_target_rc} = "0" ]; then CRM_alert_target_rc="" else CRM_alert_target_rc=" (target: ${CRM_alert_target_rc})" fi case ${CRM_alert_desc} in Cancelled) ;; *) email_subject="${CRM_alert_timestamp} ${cluster_name}: Resource operation '${CRM_alert_task}${CRM_alert_interval}' for '${CRM_alert_rsc}' on '${CRM_alert_node}': ${CRM_alert_desc}${CRM_alert_target_rc}" ;; esac ;; attribute) # email_subject="${CRM_alert_timestamp} ${cluster_name}: The '${CRM_alert_attribute_name}' attribute of the '${CRM_alert_node}' node was updated in '${CRM_alert_attribute_value}'" ;; *) email_subject="${CRM_alert_timestamp} ${cluster_name}: Unhandled $CRM_alert_kind alert" ;; esac fi if [ ! -z "${email_subject}" ]; then case $email_client in # This sample script supports only sendmail for sending the email. # Support for additional senders can easily be added by adding # new cases here. sendmail) sendmail -t -r "${email_sender}" <<__EOF__ From: ${email_sender} To: ${email_recipient} Return-Path: ${email_sender} Subject: ${email_subject} ${email_body} __EOF__ ;; *) ;; esac fi diff --git a/extra/alerts/alert_snmp.sh.sample b/extra/alerts/alert_snmp.sh.sample index 8354f82308..841f30f0c7 100644 --- a/extra/alerts/alert_snmp.sh.sample +++ b/extra/alerts/alert_snmp.sh.sample @@ -1,183 +1,186 @@ #!/bin/sh # # Copyright 2013 Florian CROUZAT # Later changes copyright 2013-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # # Description: Manages a SNMP trap, provided by NTT OSSC as a # script under Pacemaker control # ############################################################################## # This sample script assumes that only users who already have # hacluster-equivalent access to the cluster nodes can edit the CIB. Otherwise, # a malicious user could run commands as hacluster by inserting shell code into # the trap_options or timestamp-format parameters. # # Sample configuration (cib fragment in xml notation) # ================================ # # # # # # # # # # # # # # ================================ # # This uses the official Pacemaker MIB. # 1.3.6.1.4.1.32723 has been assigned to the project by IANA: # http://www.iana.org/assignments/enterprise-numbers -if [ -z "$CRM_alert_version" ]; then - echo "$0 must be run by Pacemaker version 1.1.15 or later" - exit 0 -fi - -if [ -z "$CRM_alert_recipient" ]; then - echo "$0 requires a recipient configured with the SNMP server IP address" - exit 0 -fi - # Defaults for user-configurable values trap_binary_default="/usr/bin/snmptrap" trap_version_default="2c" trap_options_default="" trap_community_default="public" trap_node_states_default="all" trap_fencing_tasks_default="all" trap_resource_tasks_default="all" trap_monitor_success_default="false" trap_add_hires_timestamp_oid_default="true" trap_snmp_persistent_dir_default="/var/lib/pacemaker/snmp" trap_ignore_int32_default=2147483647 # maximum Integer32 value trap_ignore_string_default="n/a" # doesn't conflict with valid XML IDs # Ensure all user-provided variables have values. : ${trap_binary=${trap_binary_default}} : ${trap_version=${trap_version_default}} : ${trap_options=${trap_options_default}} : ${trap_community=${trap_community_default}} : ${trap_node_states=${trap_node_states_default}} : ${trap_fencing_tasks=${trap_fencing_tasks_default}} : ${trap_resource_tasks=${trap_resource_tasks_default}} : ${trap_monitor_success=${trap_monitor_success_default}} : ${trap_add_hires_timestamp_oid=${trap_add_hires_timestamp_oid_default}} : ${trap_snmp_persistent_dir=${trap_snmp_persistent_dir_default}} : ${trap_ignore_int32=${trap_ignore_int32_default}} : ${trap_ignore_string=${trap_ignore_string_default}} # Ensure all cluster-provided variables have values, regardless of alert type. : ${CRM_alert_node=${trap_ignore_string}} : ${CRM_alert_rsc=${trap_ignore_string}} : ${CRM_alert_task=${trap_ignore_string}} : ${CRM_alert_desc=${trap_ignore_string}} : ${CRM_alert_status=${trap_ignore_int32}} : ${CRM_alert_rc=${trap_ignore_int32}} : ${CRM_alert_target_rc=${trap_ignore_int32}} : ${CRM_alert_attribute_name=${trap_ignore_string}} : ${CRM_alert_attribute_value=${trap_ignore_string}} +: ${CRM_alert_version:=""} +: ${CRM_alert_recipient:=""} +: ${CRM_alert_kind:=""} + +if [ -z "$CRM_alert_version" ]; then + echo "$0 must be run by Pacemaker version 1.1.15 or later" + exit 0 +fi + +if [ -z "$CRM_alert_recipient" ]; then + echo "$0 requires a recipient configured with the SNMP server IP address" + exit 0 +fi # Echo a high-resolution equivalent of the Pacemaker-provided time values # using NetSNMP's DateAndTime specification ("%Y-%m-%d,%H:%M:%S.%01N"). get_system_date() { : ${CRM_alert_timestamp_epoch=$(date +%s)} : ${CRM_alert_timestamp_usec=0} YMDHMS=$(date --date="@${CRM_alert_timestamp_epoch}" +"%Y-%m-%d,%H:%M:%S") USEC=$(echo ${CRM_alert_timestamp_usec} | cut -b 1) echo "${YMDHMS}.${USEC}" } is_in_list() { item_list=`echo "$1" | tr ',' ' '` if [ "${item_list}" = "all" ]; then return 0 else for act in $item_list do act=`echo "$act" | tr A-Z a-z` [ "$act" != "$2" ] && continue return 0 done fi return 1 } send_pacemaker_trap() { PREFIX="PACEMAKER-MIB::pacemakerNotification" OUTPUT=$("${trap_binary}" -v "${trap_version}" ${trap_options} \ -c "${trap_community}" "${CRM_alert_recipient}" "" \ "${PREFIX}Trap" \ "${PREFIX}Node" s "${CRM_alert_node}" \ "${PREFIX}Resource" s "${CRM_alert_rsc}" \ "${PREFIX}Operation" s "${CRM_alert_task}" \ "${PREFIX}Description" s "${CRM_alert_desc}" \ "${PREFIX}Status" i "${CRM_alert_status}" \ "${PREFIX}ReturnCode" i "${CRM_alert_rc}" \ "${PREFIX}TargetReturnCode" i "${CRM_alert_target_rc}" \ "${PREFIX}AttributeName" s "${CRM_alert_attribute_name}" \ "${PREFIX}AttributeValue" s "${CRM_alert_attribute_value}" \ ${hires_timestamp} 2>&1) if [ $? -ne 0 ]; then echo "${trap_binary} returned error : rc=$? $OUTPUT" fi } if [ "${trap_add_hires_timestamp_oid}" = "true" ]; then hires_timestamp="HOST-RESOURCES-MIB::hrSystemDate s $(get_system_date)" fi if [ -z ${SNMP_PERSISTENT_DIR} ]; then export SNMP_PERSISTENT_DIR="${trap_snmp_persistent_dir}" # mkdir for snmp trap tools. if [ ! -d ${SNMP_PERSISTENT_DIR} ]; then mkdir -p ${SNMP_PERSISTENT_DIR} fi fi case "$CRM_alert_kind" in node) if is_in_list "${trap_node_states}" "${CRM_alert_desc}"; then send_pacemaker_trap fi ;; fencing) if is_in_list "${trap_fencing_tasks}" "${CRM_alert_task}"; then send_pacemaker_trap fi ;; resource) if is_in_list "${trap_resource_tasks}" "${CRM_alert_task}" && \ [ "${CRM_alert_desc}" != "Cancelled" ] ; then if [ "${trap_monitor_success}" = "false" ] && \ [ "${CRM_alert_rc}" = "${CRM_alert_target_rc}" ] && \ [ "${CRM_alert_task}" = "monitor" ]; then exit 0 fi send_pacemaker_trap fi ;; attribute) send_pacemaker_trap ;; *) ;; esac diff --git a/extra/resources/ClusterMon.in b/extra/resources/ClusterMon.in index 2f1f8bc1c4..94c4c92f9b 100755 --- a/extra/resources/ClusterMon.in +++ b/extra/resources/ClusterMon.in @@ -1,272 +1,268 @@ #!@BASH_PATH@ # # ocf:pacemaker:ClusterMon resource agent # # Original copyright 2004 SUSE LINUX AG, Lars Marowsky-Bre -# Later changes copyright 2008-2019 the Pacemaker project contributors +# Later changes copyright 2008-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # Starts crm_mon in background which logs cluster status as # html to the specified file. -# -# OCF instance parameters: -# OCF_RESKEY_user -# OCF_RESKEY_pidfile -# OCF_RESKEY_update -# OCF_RESKEY_extra_options -# OCF_RESKEY_htmlfile ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} +# Explicitly list all environment variables used, to make static analysis happy +: ${OCF_RESKEY_user:=""} +: ${OCF_RESKEY_pidfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} +: ${OCF_RESKEY_update:="15000"} +: ${OCF_RESKEY_extra_options:=""} +: ${OCF_RESKEY_htmlfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} + ####################################################################### meta_data() { cat < 1.0 This is a ClusterMon Resource Agent. It outputs current cluster status to the html. Runs crm_mon in the background, recording the cluster status to an HTML file The user we want to run crm_mon as The user we want to run crm_mon as How frequently should we update the cluster status (in milliseconds). For compatibility with old documentation, values less than 1000 will be treated as seconds. Update interval in milliseconds Additional options to pass to crm_mon. Eg. -n -r Extra options PID file location to ensure only one instance is running PID file Location to write HTML output to. HTML output END } ####################################################################### ClusterMon_usage() { cat </dev/null | \ grep -qE "[c]rm_mon.*${OCF_RESKEY_pidfile}" case $? in 0) exit $OCF_SUCCESS;; 1) exit $OCF_NOT_RUNNING;; *) exit $OCF_ERR_GENERIC;; esac fi fi exit $OCF_NOT_RUNNING } CheckOptions() { while getopts Vi:nrh:cdp: OPTION do case "$OPTION" in V|n|r|c|d);; i) ocf_log warn "You should not have specified the -i option, since OCF_RESKEY_update is set already!";; h) ocf_log warn "You should not have specified the -h option, since OCF_RESKEY_htmlfile is set already!";; p) ocf_log warn "You should not have specified the -p option, since OCF_RESKEY_pidfile is set already!";; *) return $OCF_ERR_ARGS;; esac done if [ $? -ne 0 ]; then return $OCF_ERR_ARGS fi # We should have eaten all options at this stage shift $(($OPTIND -1)) if [ $# -gt 0 ]; then false else true fi } ClusterMon_validate() { # Existence of the user if [ -n "$OCF_RESKEY_user" ]; then getent passwd "$OCF_RESKEY_user" >/dev/null if [ $? -eq 0 ]; then : Yes, user exists. We can further check his permission on crm_mon if necessary else ocf_log err "The user $OCF_RESKEY_user does not exist!" exit $OCF_ERR_ARGS fi fi # Pidfile better be an absolute path case "$OCF_RESKEY_pidfile" in /*) ;; *) ocf_log warn "You should have pidfile($OCF_RESKEY_pidfile) of absolute path!" ;; esac # Check the update interval if ocf_is_decimal "$OCF_RESKEY_update" && [ $OCF_RESKEY_update -gt 0 ]; then : else ocf_log err "Invalid update interval $OCF_RESKEY_update. It should be positive integer!" exit $OCF_ERR_ARGS fi if CheckOptions $OCF_RESKEY_extra_options; then : else ocf_log err "Invalid options $OCF_RESKEY_extra_options!" exit $OCF_ERR_ARGS fi # Htmlfile better be an absolute path case "$OCF_RESKEY_htmlfile" in /*) ;; *) ocf_log warn "You should have htmlfile($OCF_RESKEY_htmlfile) of absolute path!" ;; esac echo "Validate OK" return $OCF_SUCCESS } if [ $# -ne 1 ]; then ClusterMon_usage exit $OCF_ERR_ARGS fi -: ${OCF_RESKEY_update:="15000"} -: ${OCF_RESKEY_pidfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} -: ${OCF_RESKEY_htmlfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} - if [ ${OCF_RESKEY_update} -ge 1000 ]; then OCF_RESKEY_update=$(( $OCF_RESKEY_update / 1000 )) fi CMON_CMD="${HA_SBIN_DIR}/crm_mon -p \"$OCF_RESKEY_pidfile\" -d -i $OCF_RESKEY_update $OCF_RESKEY_extra_options -h \"$OCF_RESKEY_htmlfile\"" case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) ClusterMon_start ;; stop) ClusterMon_stop ;; monitor) ClusterMon_monitor ;; validate-all) ClusterMon_validate ;; usage|help) ClusterMon_usage exit $OCF_SUCCESS ;; *) ClusterMon_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/extra/resources/Dummy b/extra/resources/Dummy index ae58d77bc5..096e12ed70 100755 --- a/extra/resources/Dummy +++ b/extra/resources/Dummy @@ -1,317 +1,324 @@ #!/bin/sh # # ocf:pacemaker:Dummy resource agent # # Original copyright 2004 SUSE LINUX AG, Lars Marowsky-Bre # Later changes copyright 2008-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # The Dummy agent is intended primarily for testing, and has various options to # make actions intentionally fail or take a long time. It may also be used as a # template for resource agent writers, in which case: # # - Replace all occurrences of "dummy" and "Dummy" with your agent name. # - Update the meta-data appropriately for your agent, such as the description # and supported options. Pay particular attention to the timeouts specified in # the actions section; they should be meaningful for the kind of service the # agent manages. They should be the minimum advised timeouts, but shouldn't # try to cover _all_ possible instances. So, try to be neither overly generous # nor too stingy, but moderate. The minimum timeouts should never be below 10 # seconds. # - Don't copy the stuff here that is just for testing, such as the # sigterm_handler() or dump_env(). # - You don't need the state file stuff here if you have a better way of # determining whether your service is running. It's only useful for agents # such as health agents that don't actually correspond to a running service. # - Implement the actions appropriately for your service. Your monitor action # must differentiate correctly between running, not running, and failed (that # is THREE states, not just yes/no). The migrate_to, migrate_from, and reload # actions are optional and not appropriate to all services. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} +# Explicitly list all environment variables used, to make static analysis happy +: ${OCF_RESKEY_fake:="dummy"} +: ${OCF_RESKEY_op_sleep:=0} +: ${OCF_RESKEY_CRM_meta_interval:=0} +: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} +: ${OCF_RESKEY_envfile:=""} +: ${OCF_RESKEY_fail_start_on:=""} +: ${OCF_RESKEY_migrate_source:=""} +: ${OCF_RESKEY_migrate_target:=""} +: ${OCF_RESKEY_envfile:=""} +: ${OCF_RESKEY_state:=""} + ####################################################################### meta_data() { cat < 1.1 This is a dummy OCF resource agent. It does absolutely nothing except keep track of whether it is running or not, and can be configured so that actions fail or take a long time. Its purpose is primarily for testing, and to serve as a template for resource agent writers. Example stateless resource agent Location to store the resource state in. State file Fake password field Password Fake attribute that can be changed to cause an agent reload Fake attribute that can be changed to cause an agent reload Number of seconds to sleep during operations. This can be used to test how the cluster reacts to operation timeouts. Operation sleep duration in seconds. Start, migrate_from, and reload-agent actions will return failure if running on the host specified here, but the resource will run successfully anyway (future monitor calls will find it running). This can be used to test on-fail=ignore. Report bogus start failure on specified host If this is set, the environment will be dumped to this file for every call. Environment dump file END } ####################################################################### # don't exit on TERM, to test that pacemaker-execd makes sure that we do exit trap sigterm_handler TERM sigterm_handler() { ocf_log info "They use TERM to bring us down. No such luck." # Since we're likely going to get KILLed, clean up any monitor # serialization in progress, so the next probe doesn't return an error. rm -f "${VERIFY_SERIALIZED_FILE}" return } dummy_usage() { cat <> "${OCF_RESKEY_envfile}" fi } dummy_start() { dummy_monitor DS_RETVAL=$? if [ $DS_RETVAL -eq $OCF_SUCCESS ]; then if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then DS_RETVAL=$OCF_ERR_GENERIC fi return $DS_RETVAL fi touch "${OCF_RESKEY_state}" DS_RETVAL=$? if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then DS_RETVAL=$OCF_ERR_GENERIC fi return $DS_RETVAL } dummy_stop() { dummy_monitor --force if [ $? -eq $OCF_SUCCESS ]; then rm "${OCF_RESKEY_state}" fi rm -f "${VERIFY_SERIALIZED_FILE}" return $OCF_SUCCESS } dummy_monitor() { if [ $OCF_RESKEY_op_sleep -ne 0 ]; then if [ "$1" = "" ] && [ -f "${VERIFY_SERIALIZED_FILE}" ]; then # two monitor ops have occurred at the same time. # This verifies a condition in pacemaker-execd regression tests. ocf_log err "$VERIFY_SERIALIZED_FILE exists already" ocf_exit_reason "alternate universe collision" return $OCF_ERR_GENERIC fi touch "${VERIFY_SERIALIZED_FILE}" sleep ${OCF_RESKEY_op_sleep} rm "${VERIFY_SERIALIZED_FILE}" fi if [ -f "${OCF_RESKEY_state}" ]; then # Multiple monitor levels are defined to support various tests case "$OCF_CHECK_LEVEL" in 10) # monitor level with delay, useful for testing timeouts sleep 30 ;; 20) # monitor level that fails intermittently n=$(expr "$(dd if=/dev/urandom bs=1 count=1 2>/dev/null | od | head -1 | cut -f2 -d' ')" % 5) if [ $n -eq 1 ]; then ocf_exit_reason "smoke detected near CPU fan" return $OCF_ERR_GENERIC fi ;; 30) # monitor level that always fails ocf_exit_reason "hyperdrive quota reached" return $OCF_ERR_GENERIC ;; 40) # monitor level that returns error code from state file rc=$(cat ${OCF_RESKEY_state}) [ -n "$rc" ] && ocf_exit_reason "CPU ejected. Observed leaving the Kronosnet galaxy at $rc times the speed of light." && return $rc ;; *) ;; esac return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } dummy_validate() { # If specified, is op_sleep an integer? case "$OCF_RESKEY_op_sleep" in ""|*[0-9]*) ;; *) return $OCF_ERR_CONFIGURED ;; esac # Host-specific checks if [ "$OCF_CHECK_LEVEL" = "10" ]; then # Is the state directory writable? state_dir=$(dirname "$OCF_RESKEY_state") [ -d "$state_dir" ] && [ -w "$state_dir" ] && [ -x "$state_dir" ] if [ $? -ne 0 ]; then return $OCF_ERR_ARGS fi # If specified, is the environment file directory writable? if [ -n "$OCF_RESKEY_envfile" ]; then envfile_dir=$(dirname "$OCF_RESKEY_envfile") [ -d "$envfile_dir" ] && [ -w "$envfile_dir" ] && [ -x "$envfile_dir" ] if [ $? -ne 0 ]; then return $OCF_ERR_ARGS fi fi fi return $OCF_SUCCESS } -: ${OCF_RESKEY_fake:="dummy"} -: ${OCF_RESKEY_op_sleep:=0} -: ${OCF_RESKEY_CRM_meta_interval:=0} -: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} - if [ -z "$OCF_RESKEY_state" ]; then OCF_RESKEY_state="${HA_VARRUN%%/}/Dummy-${OCF_RESOURCE_INSTANCE}.state" if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then # Strip off the trailing clone marker (note + is not portable in sed) OCF_RESKEY_state=$(echo $OCF_RESKEY_state | sed s/:[0-9][0-9]*\.state/.state/) fi fi VERIFY_SERIALIZED_FILE="${OCF_RESKEY_state}.serialized" dump_env case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) dummy_start;; stop) dummy_stop;; monitor) dummy_monitor;; migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}." dummy_stop ;; migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}." dummy_start ;; reload) ocf_log debug "Reloading $OCF_RESOURCE_INSTANCE (service)" exit $OCF_SUCCESS ;; reload-agent) ocf_log err "Reloading $OCF_RESOURCE_INSTANCE (agent)" dummy_start ;; validate-all) dummy_validate;; usage|help) dummy_usage exit $OCF_SUCCESS ;; *) dummy_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/extra/resources/HealthSMART.in b/extra/resources/HealthSMART.in index f8fc9d08a3..efc7ebc764 100755 --- a/extra/resources/HealthSMART.in +++ b/extra/resources/HealthSMART.in @@ -1,317 +1,325 @@ #!@BASH_PATH@ # # ocf:pacemaker:HealthSMART resource agent # -# Copyright 2009-2019 the Pacemaker project contributors +# Copyright 2009-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # Checks the S.M.A.R.T. status of all given drives and writes the #health-smart # status into the CIB # ####################################################################### ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} # SMARTCTL=/usr/sbin/smartctl ATTRDUP=/usr/sbin/attrd_updater +# Explicitly list all environment variables used, to make static analysis happy +: ${OCF_RESKEY_CRM_meta_interval:=0} +: ${OCF_RESKEY_CRM_meta_globally_unique:="true"} +: ${OCF_RESKEY_temp_warning:=""} +: ${OCF_RESKEY_temp_lower_limit:=""} +: ${OCF_RESKEY_temp_upper_limit:=""} +: ${OCF_RESKEY_drives:=""} +: ${OCF_RESKEY_devices:=""} +: ${OCF_RESKEY_state:=""} + ####################################################################### meta_data() { cat < 1.0 System health agent that checks the S.M.A.R.T. status of the given drives and updates the #health-smart attribute. SMART health status Location to store the resource state in. State file The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". Drives to check The device type(s) to assume for the drive(s) being tested as a SPACE separated list. Device types Lower limit of the temperature in deg C of the drive(s). Below this limit the status will be red. Lower limit for the red smart attribute Upper limit of the temperature if deg C of the drives(s). If the drive reports a temperature higher than this value the status of #health-smart will be red. Upper limit for red smart attribute Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow. Deg C below/above the upper limits for yellow smart attribute END } ####################################################################### check_temperature() { if [ $1 -lt ${lower_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C" "$ATTRDUP" -n "#health-smart" -U "red" -d "5s" return 1 fi if [ $1 -gt ${upper_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C" "$ATTRDUP" -n "#health-smart" -U "red" -d "5s" return 1 fi if [ $1 -lt ${lower_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C" "$ATTRDUP" -n "#health-smart" -U "yellow" -d "5s" return 1 fi if [ $1 -gt ${upper_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C" "$ATTRDUP" -n "#health-smart" -U "yellow" -d "5s" return 1 fi } init_smart() { #Set temperature defaults if [ -z "${OCF_RESKEY_temp_warning}" ]; then yellow_threshold=5 else yellow_threshold=${OCF_RESKEY_temp_warning} fi if [ -z "${OCF_RESKEY_temp_lower_limit}" ] ; then lower_red_limit=0 else lower_red_limit=${OCF_RESKEY_temp_lower_limit} fi lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold})) if [ -z "${OCF_RESKEY_temp_upper_limit}" ] ; then upper_red_limit=60 else upper_red_limit=${OCF_RESKEY_temp_upper_limit} fi upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold})) #Set disk defaults if [ -z "${OCF_RESKEY_drives}" ] ; then DRIVES="/dev/sda" else DRIVES=${OCF_RESKEY_drives} fi #Test for presence of smartctl if [ ! -x "$SMARTCTL" ] ; then ocf_log err "${SMARTCTL} not installed." exit $OCF_ERR_INSTALLED fi for DRIVE in $DRIVES; do if [ -n "${OCF_RESKEY_devices}" ]; then for DEVICE in ${OCF_RESKEY_devices}; do "$SMARTCTL" -d "$DEVICE" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi done else "$SMARTCTL" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi fi done } HealthSMART_usage() { cat < -1.0 +1.1 -This is an example resource agent that implements two states +This is an example resource agent that implements Promoted and Unpromoted roles Example stateful resource agent - + Location to store the resource state in State file - + If this is set, the environment will be dumped to this file for every call. Environment dump file - + The notify action will sleep for this many seconds before returning, to simulate a long-running notify. Notify delay in seconds + END exit $OCF_SUCCESS } ####################################################################### stateful_usage() { cat < + +where is one of: meta-data validate-all start stop monitor + promote demote notify reload-agent -Expects to have a fully populated OCF RA-compliant environment set. +This conforms to the OCF Resource Agent API version 1.1, and expects +to have OCF-compliant environment variables provided. END exit $1 } stateful_update() { echo $1 > "${OCF_RESKEY_state}" } stateful_check_state() { target="$1" if [ -f "${OCF_RESKEY_state}" ]; then state=$(cat "${OCF_RESKEY_state}") if [ "$target" = "$state" ]; then return 0 fi else if [ -z "$target" ]; then return 0 fi fi return 1 } dump_env() { if [ "${OCF_RESKEY_envfile}" != "" ]; then echo "### ${__OCF_ACTION} @ $(date) ### $(env | sort) ###" >> "${OCF_RESKEY_envfile}" fi } set_promotion_score() { "${HA_SBIN_DIR}/crm_attribute" --promotion -v "$1" } stateful_start() { stateful_check_state Promoted if [ $? -eq 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_PROMOTED fi stateful_update Unpromoted set_promotion_score $SCORE_UNPROMOTED return 0 } stateful_demote() { stateful_check_state if [ $? -eq 0 ]; then # CRM Error - Should never happen return $OCF_NOT_RUNNING fi stateful_update Unpromoted set_promotion_score $SCORE_UNPROMOTED return 0 } stateful_promote() { stateful_check_state if [ $? -eq 0 ]; then return $OCF_NOT_RUNNING fi stateful_update Promoted set_promotion_score $SCORE_PROMOTED return 0 } stateful_stop() { "${HA_SBIN_DIR}/crm_attribute" --promotion -D stateful_check_state Promoted if [ $? -eq 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_PROMOTED fi if [ -f "${OCF_RESKEY_state}" ]; then rm "${OCF_RESKEY_state}" fi return 0 } stateful_monitor() { # for testing if [ -f "${OCF_RESKEY_state}.rc" ]; then rc=$(cat "${OCF_RESKEY_state}.rc") ocf_exit_reason "$rc GB redirected to /dev/null" exit $rc fi stateful_check_state Promoted if [ $? -eq 0 ]; then if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then # Restore the promotion score during probes set_promotion_score $SCORE_PROMOTED fi return $OCF_RUNNING_PROMOTED fi stateful_check_state Unpromoted if [ $? -eq 0 ]; then if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then # Restore the promotion score during probes set_promotion_score $SCORE_UNPROMOTED fi return $OCF_SUCCESS fi if [ -f "${OCF_RESKEY_state}" ]; then echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents" cat "${OCF_RESKEY_state}" return $OCF_ERR_GENERIC fi return 7 } stateful_notify() { if [ "${OCF_RESKEY_notify_delay}" != "0" ]; then sleep "${OCF_RESKEY_notify_delay}" fi return $OCF_SUCCESS } stateful_validate() { exit $OCF_SUCCESS } -: ${OCF_RESKEY_CRM_meta_interval:=0} -: ${OCF_RESKEY_notify_delay:=0} -: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} +stateful_reload_agent() { + return $OCF_SUCCESS +} if [ -z "$OCF_RESKEY_state" ]; then if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then state="${HA_VARRUN%%/}/Stateful-${OCF_RESOURCE_INSTANCE}.state" # Strip off the trailing clone marker OCF_RESKEY_state=$(echo $state | sed s/:[0-9][0-9]*\.state/.state/) else OCF_RESKEY_state="${HA_VARRUN%%/}/Stateful-${OCF_RESOURCE_INSTANCE}.state" fi fi dump_env case "$__OCF_ACTION" in meta-data) meta_data;; start) stateful_start;; promote) stateful_promote;; demote) stateful_demote;; notify) stateful_notify ;; stop) stateful_stop;; monitor) stateful_monitor;; validate-all) stateful_validate;; +reload-agent) stateful_reload_agent;; usage|help) stateful_usage $OCF_SUCCESS;; *) stateful_usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/extra/resources/SysInfo.in b/extra/resources/SysInfo.in index 4780ac6082..6845bd03d2 100755 --- a/extra/resources/SysInfo.in +++ b/extra/resources/SysInfo.in @@ -1,375 +1,381 @@ #!@BASH_PATH@ # # ocf:pacemaker:SysInfo resource agent # # Original copyright 2004 SUSE LINUX AG, Lars Marowsky-Bre -# Later changes copyright 2008-2019 the Pacemaker project contributors +# Later changes copyright 2008-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # This agent records (in the CIB) various attributes of a node # ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} +# Explicitly list all environment variables used, to make static analysis happy +: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} +: ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/SysInfo-${OCF_RESOURCE_INSTANCE}"} +: ${OCF_RESKEY_disk_unit:="G"} +: ${OCF_RESKEY_clone:="0"} +: ${OCF_RESKEY_disks:=""} +: ${OCF_RESKEY_delay:=""} +: ${OCF_RESKEY_mind_disk_free:=""} + ####################################################################### meta_data() { cat < 1.0 This is a SysInfo Resource Agent. It records (in the CIB) various attributes of a node Sample Linux output: arch: i686 os: Linux-2.4.26-gentoo-r14 free_swap: 1999 cpu_info: Intel(R) Celeron(R) CPU 2.40GHz cpu_speed: 4771.02 cpu_cores: 1 cpu_load: 0.00 ram_total: 513 ram_free: 117 root_free: 2.4 #health_disk: red Sample Darwin output: arch: i386 os: Darwin-8.6.2 cpu_info: Intel Core Duo cpu_speed: 2.16 cpu_cores: 2 cpu_load: 0.18 ram_total: 2016 ram_free: 787 root_free: 13 #health_disk: green Units: free_swap: MB ram_*: MB cpu_speed (Linux): bogomips cpu_speed (Darwin): GHz *_free: GB (or user-defined: disk_unit) SysInfo resource agent PID file PID file Interval to allow values to stabilize Dampening Delay Filesystems or Paths to be queried for free disk space as a SPACE separated list - e.g "/dev/sda1 /tmp". Results will be written to an attribute with leading slashes removed, and other slashes replaced with underscore, and the word 'free' appended - e.g for /dev/sda1 it would be 'dev_sda1_free'. Note: The root filesystem '/' is always queried to an attribute named 'root_free' List of Filesytems/Paths to query for free disk space Unit to report disk free space in. Can be one of: B, K, M, G, T, P (case-insensitive) Unit to report disk free space in The amount of free space required in monitored disks. If any of the monitored disks has less than this amount of free space, , with the node attribute "#health_disk" changing to "red", all resources will move away from the node. Set the node-health-strategy property appropriately for this to take effect. If the unit is not specified, it defaults to disk_unit. minimum disk free space required END } ####################################################################### UpdateStat() { name="$1"; shift value="$*" printf "%s:\t%s\n" "$name" "$value" if [ "$__OCF_ACTION" = "start" ] ; then "${HA_SBIN_DIR}/attrd_updater" ${OCF_RESKEY_delay} -S status -n $name -B "$value" else "${HA_SBIN_DIR}/attrd_updater" ${OCF_RESKEY_delay} -S status -n $name -v "$value" fi } SysInfoStats() { local DISK_STATUS="green" UpdateStat arch "$(uname -m)" UpdateStat os "$(uname -s)-$(uname -r)" case $(uname -s) in "Darwin") mem=$(top -l 1 | grep Mem: | awk '{print $10}') mem_used=$(top -l 1 | grep Mem: | awk '{print $8}') mem=$(SysInfo_mem_units "$mem") mem_used=$(SysInfo_mem_units "$mem_used") mem_total=$(expr $mem_used + $mem) cpu_type=$(system_profiler SPHardwareDataType | awk -F': ' '/^CPU Type/ {print $2; exit}') cpu_speed=$(system_profiler SPHardwareDataType | awk -F': ' '/^CPU Speed/ {print $2; exit}') cpu_cores=$(system_profiler SPHardwareDataType | awk -F': ' '/^Number Of/ {print $2; exit}') cpu_load=$(uptime | awk '{ print $10 }') ;; "FreeBSD") cpu_type=$(sysctl -in hw.model) cpu_speed=$(sysctl -in dev.cpu.0.freq) cpu_cores=$(sysctl -in hw.ncpu) cpu_load=$(sysctl -in vm.loadavg | awk '{ print $4 }') free_pages=$(sysctl -in vm.stats.vm.v_free_count) page_count=$(sysctl -in vm.stats.vm.v_page_count) page_size=$(sysctl -in vm.stats.vm.v_page_size) mem=$(expr $free_pages \* $page_size / 1024 / 1024)M mem_total=$(expr $page_count \* $page_size / 1024 / 1024)M ;; "Linux") if [ -f /proc/cpuinfo ]; then cpu_type=$(awk -F': ' '/model name/ {print $2; exit}' /proc/cpuinfo) cpu_speed=$(awk -F': ' '/bogomips/ {print $2; exit}' /proc/cpuinfo) cpu_cores=$(grep "^processor" /proc/cpuinfo | wc -l) fi cpu_load=$(uptime | awk '{ print $10 }') if [ -f /proc/meminfo ]; then # meminfo results are in kB mem=$(grep "SwapFree" /proc/meminfo | awk '{print $2"k"}') if [ -n "$mem" ]; then UpdateStat free_swap "$(SysInfo_mem_units "$mem")" fi mem=$(grep "Inactive" /proc/meminfo | awk '{print $2"k"}') mem_total=$(grep "MemTotal" /proc/meminfo | awk '{print $2"k"}') else mem=$(top -n 1 | grep Mem: | awk '{print $7}') fi ;; *) esac if [ -n "$cpu_type" ]; then UpdateStat cpu_info "$cpu_type" fi if [ -n "$cpu_speed" ]; then UpdateStat cpu_speed "$cpu_speed" fi if [ -n "$cpu_cores" ]; then UpdateStat cpu_cores "$cpu_cores" fi if [ -n "$cpu_load" ]; then UpdateStat cpu_load "$cpu_load" fi if [ -n "$mem" ]; then # Massage the memory values UpdateStat ram_total "$(SysInfo_mem_units "$mem_total")" UpdateStat ram_free "$(SysInfo_mem_units "$mem")" fi # Portability notes: # o tail: explicit "-n" not available in Solaris; instead simplify # 'tail -n ' to the equivalent 'tail -'. for disk in "/" ${OCF_RESKEY_disks}; do unset disk_free disk_label disk_free=$(df -h "${disk}" | tail -1 | awk '{print $4}') if [ -n "$disk_free" ]; then disk_label=$(echo $disk | sed -e 's#^/$#root#;s#^/*##;s#/#_#g') disk_free=$(SysInfo_hdd_units "$disk_free") UpdateStat "${disk_label}_free" $disk_free if [ -n "$MIN_FREE" ] && [ $disk_free -le $MIN_FREE ]; then DISK_STATUS="red" fi fi done UpdateStat "#health_disk" "$DISK_STATUS" } SysInfo_megabytes() { # Size in megabytes echo $1 | awk '{ n = $0; sub( /[0-9]+(.[0-9]+)?/, "" ); if ( $0 == "" ) { $0 = "G" }; # Do not change previous behavior `if ($0 == "G" || $0 == "") { n *= 1024 };` split( n, a, $0 ); n = a[1]; if ( /^[pP]i?[bB]?/ ) { n *= 1024 * 1024 * 1024 }; if ( /^[tT]i?[bB]?/ ) { n *= 1024 * 1024 }; if ( /^[gG]i?[bB]?/ ) { n *= 1024 }; if ( /^[mM]i?[bB]?/ ) { n *= 1 }; if ( /^[kK]i?[bB]?/ ) { n /= 1024 }; if ( /^[bB]i?/ ) { n /= 1024 * 1024 }; printf "%d\n", n }' # Intentionally round to an integer } SysInfo_mem_units() { mem="$1" if [ -z "$1" ]; then return fi mem=$(SysInfo_megabytes "$1") # Round to the next multiple of 50 r=$(($mem % 50)) if [ $r -ne 0 ]; then mem=$(($mem + 50 - $r)) fi echo $mem } SysInfo_hdd_units() { # Defauts to size in gigabytes case "$OCF_RESKEY_disk_unit" in [Pp]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024 / 1024));; [Tt]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024));; [Gg]) echo $(($(SysInfo_megabytes "$1") / 1024));; [Mm]) echo "$(SysInfo_megabytes "$1")" ;; [Kk]) echo $(($(SysInfo_megabytes "$1") * 1024));; [Bb]) echo $(($(SysInfo_megabytes "$1") * 1024 * 1024));; *) ocf_log err "Invalid value for disk_unit: $OCF_RESKEY_disk_unit" echo $(($(SysInfo_megabytes "$1") / 1024));; esac } SysInfo_usage() { cat < "$OCF_RESKEY_pidfile" SysInfoStats exit $OCF_SUCCESS } SysInfo_stop() { rm "$OCF_RESKEY_pidfile" exit $OCF_SUCCESS } SysInfo_monitor() { if [ -f "$OCF_RESKEY_pidfile" ]; then clone=$(cat "$OCF_RESKEY_pidfile") fi if [ -z "$clone" ]; then rm "$OCF_RESKEY_pidfile" exit $OCF_NOT_RUNNING elif [ "$clone" = "$OCF_RESKEY_clone" ]; then SysInfoStats exit $OCF_SUCCESS elif ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then SysInfoStats exit $OCF_SUCCESS fi exit $OCF_NOT_RUNNING } SysInfo_validate() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then SysInfo_usage exit $OCF_ERR_ARGS fi -: ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/SysInfo-${OCF_RESOURCE_INSTANCE}"} -: ${OCF_RESKEY_disk_unit:="G"} -: ${OCF_RESKEY_clone:="0"} if [ -n "${OCF_RESKEY_delay}" ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" else OCF_RESKEY_delay="-d 0" fi MIN_FREE="" if [ -n "$OCF_RESKEY_min_disk_free" ]; then ocf_is_decimal "$OCF_RESKEY_min_disk_free" && OCF_RESKEY_min_disk_free="$OCF_RESKEY_min_disk_free$OCF_RESKEY_disk_unit" MIN_FREE=$(SysInfo_hdd_units $OCF_RESKEY_min_disk_free) fi case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) SysInfo_start ;; stop) SysInfo_stop ;; monitor) SysInfo_monitor ;; validate-all) SysInfo_validate ;; usage|help) SysInfo_usage exit $OCF_SUCCESS ;; *) SysInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/extra/resources/controld b/extra/resources/controld index ea74e80699..ed43326df9 100755 --- a/extra/resources/controld +++ b/extra/resources/controld @@ -1,288 +1,292 @@ #!/bin/sh # # ocf:pacemaker:controld resource agent # -# Copyright 2008-2019 the Pacemaker project contributors +# Copyright 2008-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # Manages the DLM controld process # ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} +# Explicitly list all environment variables used, to make static analysis happy +: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} +: ${OCF_RESKEY_allow_stonith_disabled:="false"} +: ${OCF_RESKEY_sctp:="false"} +: ${OCF_RESOURCE_INSTANCE:=""} + +case "$OCF_RESOURCE_INSTANCE" in + *[gG][fF][sS]*) + : ${OCF_RESKEY_args=-g 0} + : ${OCF_RESKEY_daemon:=gfs_controld} + ;; + *[dD][lL][mM]*) + : ${OCF_RESKEY_args=-s 0} + : ${OCF_RESKEY_daemon:=dlm_controld} + ;; + *) + : ${OCF_RESKEY_args=-s 0} + : ${OCF_RESKEY_daemon:=dlm_controld} +esac + + ####################################################################### if [ -e "$OCF_ROOT/resource.d/heartbeat/controld" ]; then ocf_log info "Using heartbeat controld agent" "$OCF_ROOT/resource.d/heartbeat/controld" "$1" exit $? fi meta_data() { cat < 1.0 This Resource Agent can control the dlm_controld services needed by cluster-aware file systems. It assumes that dlm_controld is in your default PATH. In most cases, it should be run as an anonymous clone. DLM Agent for cluster file systems Any additional options to start the dlm_controld service with DLM Options The daemon to start - supports gfs_controld and dlm_controld The daemon to start Allow DLM start-up even if STONITH/fencing is disabled in the cluster. Setting this option to true will cause cluster malfunction and hangs on fail-over for DLM clients that require fencing (such as GFS2, OCFS2, and cLVM2). This option is advanced use only. Allow start-up even without STONITH/fencing END } ####################################################################### CONFIGFS_DIR="/sys/kernel/config" DLM_CONFIGFS_DIR="${CONFIGFS_DIR}/dlm" DLM_SYSFS_DIR="/sys/kernel/dlm" controld_usage() { cat <&1) if [ $? -eq 0 ]; then if [ -n "$CUL_TMP" ]; then ocf_log err "Uncontrolled lockspace exists, system must reboot. Executing suicide fencing" stonith_admin --reboot="$(crm_node -n)" --tag controld exit $OCF_ERR_GENERIC fi fi } controld_start() { controld_monitor; rc=$? case $rc in - $OCF_SUCCESS) return $OCF_SUCCESS;; - $OCF_NOT_RUNNING) ;; + "$OCF_SUCCESS") return $OCF_SUCCESS;; + "$OCF_NOT_RUNNING") ;; *) return $OCF_ERR_GENERIC;; esac # Ensure configfs is mounted if [ ! -e "$CONFIGFS_DIR" ]; then modprobe configfs if [ ! -e "$CONFIGFS_DIR" ]; then ocf_log err "$CONFIGFS_DIR not available" return $OCF_ERR_INSTALLED fi fi mount -t configfs | grep " $CONFIGFS_DIR " >/dev/null 2>/dev/null if [ $? -ne 0 ]; then mount -t configfs none "$CONFIGFS_DIR" fi # Ensure DLM is available if [ ! -e "$DLM_CONFIGFS_DIR" ]; then modprobe dlm if [ ! -e "$DLM_CONFIGFS_DIR" ]; then ocf_log err "$DLM_CONFIGFS_DIR not available" return $OCF_ERR_INSTALLED fi fi if ! ocf_is_true "$OCF_RESKEY_allow_stonith_disabled" && \ ! ocf_is_true "$(crm_attribute --type=crm_config --name=stonith-enabled --query --quiet --default=true)"; then ocf_log err "The cluster property stonith-enabled may not be deactivated to use the DLM" return $OCF_ERR_CONFIGURED fi "${OCF_RESKEY_daemon}" $OCF_RESKEY_args while true do sleep 1 controld_monitor; rc=$? case $rc in - $OCF_SUCCESS) + "$OCF_SUCCESS") CS_ADDR_LIST="$(cat "${DLM_CONFIGFS_DIR}"/cluster/comms/*/addr_list 2>/dev/null)" if [ $? -eq 0 ] && [ -n "$CS_ADDR_LIST" ]; then return $OCF_SUCCESS fi ;; - $OCF_NOT_RUNNING) + "$OCF_NOT_RUNNING") return $OCF_NOT_RUNNING ;; *) return $OCF_ERR_GENERIC ;; esac ocf_log debug "Waiting for ${OCF_RESKEY_daemon} to be ready" done } controld_stop() { controld_monitor; rc=$? if [ $rc -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi killall -TERM "${OCF_RESKEY_daemon}"; rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC fi rc=$OCF_SUCCESS while [ $rc -eq $OCF_SUCCESS ]; do controld_monitor; rc=$? sleep 1 done if [ $rc -eq $OCF_NOT_RUNNING ]; then rc=$OCF_SUCCESS fi return $rc } controld_monitor() { killall -0 ${OCF_RESKEY_daemon} >/dev/null 2>&1 ; CM_RC=$? case $CM_RC in 0) smw=$(dlm_tool status -v | grep "stateful_merge_wait=" | cut -d= -f2) if [ -n "$smw" ] && [ $smw -eq 1 ]; then ocf_log err "DLM status is: stateful_merge_wait" CM_RC=$OCF_ERR_GENERIC elif [ -z "$smw" ] && dlm_tool ls | grep -q "wait fencing" && \ ! stonith_admin -H '*' --output-as xml | grep -q "extended-status=\"pending\""; then ocf_log err "DLM status is: wait fencing" CM_RC=$OCF_ERR_GENERIC else CM_RC=$OCF_SUCCESS fi ;; 1) CM_RC=$OCF_NOT_RUNNING;; *) CM_RC=$OCF_ERR_GENERIC;; esac # if the dlm is not successfully running, but # dlm lockspace bits are left over, we self must fence. if [ $CM_RC -ne $OCF_SUCCESS ]; then check_uncontrolled_locks fi return $CM_RC } controld_validate() { check_binary killall check_binary "${OCF_RESKEY_daemon}" case "${OCF_RESKEY_CRM_meta_globally_unique}" in yes|Yes|true|True|1) ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the globally_unique=false meta attribute" exit $OCF_ERR_CONFIGURED ;; esac [ -d /var/run/cluster ] || mkdir /var/run/cluster return $OCF_SUCCESS } -: ${OCF_RESKEY_sctp:="false"} -: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} - -case "$OCF_RESOURCE_INSTANCE" in - *[gG][fF][sS]*) - : ${OCF_RESKEY_args=-g 0} - : ${OCF_RESKEY_daemon:=gfs_controld} - ;; - *[dD][lL][mM]*) - : ${OCF_RESKEY_args=-s 0} - : ${OCF_RESKEY_daemon:=dlm_controld} - ;; - *) - : ${OCF_RESKEY_args=-s 0} - : ${OCF_RESKEY_daemon:=dlm_controld} -esac - case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) controld_validate; controld_start;; stop) controld_stop;; monitor) controld_validate; controld_monitor;; validate-all) controld_validate;; usage|help) controld_usage exit $OCF_SUCCESS ;; *) controld_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? exit $rc # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/extra/resources/ifspeed.in b/extra/resources/ifspeed.in index 4d8f93e1a9..78b1029890 100755 --- a/extra/resources/ifspeed.in +++ b/extra/resources/ifspeed.in @@ -1,550 +1,555 @@ #!@BASH_PATH@ # # ocf:pacemaker:ifspeed resource agent # -# Copyright 2011-2019 the Pacemaker project contributors +# Copyright 2011-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # # # OCF resource agent which monitors state of network interface and records it # as a node attribute in the CIB based on the sum of speeds of its active (up, # link detected, not blocked) underlying interfaces. # # Partially based on 'ping' RA by Andrew Beekhof # # Change on 2017 by Tomer Azran : # Add "ip" parameter to detect network interface name by ip address: # http://lists.clusterlabs.org/pipermail/users/2017-August/006224.html # # OCF instance parameters: # OCF_RESKEY_name: name of attribute to set in CIB # OCF_RESKEY_ip ip address to check # OCF_RESKEY_iface: network interface to monitor # OCF_RESKEY_bridge_ports: if not null and OCF_RESKEY_iface is a bridge, list of # bridge ports to consider. # Default is all ports which have designated_bridge=root_id # OCF_RESKEY_weight_base: Relative weight of 1Gbps. This can be used to tune # value of resulting CIB attribute. # # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} # If these aren't available, we can still show help, # which is all that is needed to build the man pages. [ -r "${OCF_FUNCTIONS}" ] && . "${OCF_FUNCTIONS}" [ -r "${OCF_FUNCTIONS_DIR}/findif.sh" ] && . "${OCF_FUNCTIONS_DIR}/findif.sh" : ${OCF_SUCCESS:=0} : ${__OCF_ACTION:=$1} FINDIF=findif # Defaults OCF_RESKEY_name_default="ifspeed" OCF_RESKEY_bridge_ports_default="detect" OCF_RESKEY_weight_base_default=1000 OCF_RESKEY_dampen_default=5 +# Explicitly list all environment variables used, to make static analysis happy : ${OCF_RESKEY_name:=${OCF_RESKEY_name_default}} : ${OCF_RESKEY_bridge_ports:=${OCF_RESKEY_bridge_ports_default}} : ${OCF_RESKEY_weight_base:=${OCF_RESKEY_weight_base_default}} : ${OCF_RESKEY_dampen:=${OCF_RESKEY_dampen_default}} +: ${OCF_RESKEY_iface:=""} +: ${OCF_RESKEY_ip:=""} +: ${OCF_RESKEY_debug:="false"} + meta_data() { cat < 1.0 Every time the monitor action is run, this resource agent records (in the CIB) (relative) speed of network interface it monitors. This RA can monitor physical interfaces, bonds, bridges, vlans and (hopefully) any combination of them. Examples: *) Bridge on top of one 10Gbps interface (eth2) and 802.3ad bonding (bond0) built on two 1Gbps interfaces (eth0 and eth1). *) Active-backup bonding built on top of one physical interface and one vlan on another interface. For STP-enabled bridges this RA tries to some-how guess network topology and by default looks only on ports which are connected to upstream switch. This can be overridden by 'bridge_ports' parameter. Active interfaces in this case are those in "forwarding" state. For balancing bonds this RA summs speeds of underlying "up" slave interfaces (and applies coefficient 0.8 to result). For non-balancing bonds ('active-backup' and probably 'broadcast'), only the speed of the currently active slave is used. Network interface speed monitor The name of the attribute to set. This is the name to be used in the constraints. Attribute name Network interface to monitor. Network interface Try to detect interface name by detecting the interface that holds the IP address. The IPv4 (dotted quad notation) or IPv6 address (colon hexadecimal notation) example IPv4 "192.168.1.1". example IPv6 "2001:db8:DC28:0:0:FC57:D4C8:1FFF". IPv4 or IPv6 address If not null and OCF_RESKEY_iface is a bridge, list of bridge ports to consider. Default is all ports which have designated_bridge=root_id. Bridge ports Relative weight of 1Gbps in interface speed. Can be used to tune how big attribute value will be. Weight of 1Gbps The time to wait (dampening) for further changes to occur. Dampening interval Log what have been done more verbosely. Verbose logging END } usage() { cat </dev/null) test -n "$MOUNTOUT" return $? } # # Unload a filesystem driver. # Be careful to notice if the driver is built-in and do nothing. # # 0 is success, 1 is error, 2 is already unloaded. # -unload_filesystem() -{ - if [ $# -ne 1 -o -z "$1" ] +unload_filesystem() { + if [ $# -ne 1 ] || [ -z "$1" ] then ocf_log err "unload_filesystem(): Missing an argument" return 1 fi FSNAME="$1" driver_filesystem "$FSNAME" || return 2 MODOUT=$(awk '$1 ~ /^'$FSNAME'$/{print $1,$3;exit}' < /proc/modules 2>/dev/null) if [ -z "$MODOUT" ]; then # The driver is built in, we can't unload it. return 0 fi case "$MODOUT" in - $FSNAME\ 0) + "$FSNAME 0") ;; - $FSNAME\ *) + "$FSNAME "*) # The driver is busy, leave it alone ocf_log err "Module $FSNAME is still in use" return 1 ;; *) ocf_log err "Invalid module parsing! " return 1 ;; esac modprobe -rs "$FSNAME" if [ $? -ne 0 ]; then ocf_log err "Unable to unload module: $FSNAME" return 1 fi return 0 } -status_daemon() -{ +status_daemon() { PID=$(pidof "$DAEMON") if [ -n "$PID" ]; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } -bringup_daemon() -{ +bringup_daemon() { if [ ! -e "$DAEMON" ]; then ocf_log err "Required binary not found: $DAEMON" return $OCF_ERR_INSTALLED fi "$DAEMON"; rc=$? if [ $rc -ne 0 ]; then ocf_log err "Could not start $DAEMON" return $OCF_ERR_GENERIC fi sleep 1 COUNT=0 rc=$OCF_NOT_RUNNING while [ $rc -eq $OCF_NOT_RUNNING ]; do COUNT=$(expr $COUNT + 1) if [ $COUNT -gt $OCF_RESKEY_daemon_timeout ]; then ocf_log err "$(basename $DAEMON) did not come up" return $OCF_ERR_GENERIC fi status_daemon; rc=$? sleep 1 done return $rc } -kill_daemon() -{ +kill_daemon() { status_daemon; rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi ocf_log info "Stopping $(basename "$DAEMON")" killproc "$DAEMON" while [ $rc -eq $OCF_NOT_RUNNING ]; do sleep 1 status_daemon; rc=$? done return $OCF_SUCCESS } # # Unload a module # 0 is success, 1 is error, 2 is not loaded # -unload_module() -{ - if [ $# -lt 1 -o -z "$1" ] +unload_module() { + if [ $# -lt 1 ] || [ -z "$1" ] then ocf_log err "unload_module(): Requires an argument" return 1 fi MODNAME="$1" MODOUT=$(awk '$1 ~ /^'$MODNAME'$/{print $1,$3;exit}' < /proc/modules 2>/dev/null) if [ -z "$MODOUT" ] then return 2 fi case "$MODOUT" in - $MODNAME\ 0) + "$MODNAME 0") ;; - $MODNAME\ *) + "$MODNAME "*) return 2 ;; *) ocf_log err "Invalid module parsing!" return 1 ;; esac modprobe -rs "$MODNAME" if [ $? -ne 0 ]; then ocf_log err "Unable to unload module \"$MODNAME\"" return 1 fi return 0 } o2cb_start() { o2cb_monitor; rc=$? if [ $rc -ne $OCF_NOT_RUNNING ]; then return $rc fi ocf_log info "Starting $OCF_RESOURCE_INSTANCE" if [ ! -e "$CLUSTER_STACK_FILE" ]; then modprobe -s ocfs2_stackglue if [ $? -ne 0 ]; then ocf_log err "Could not load ocfs2_stackglue" return $OCF_ERR_INSTALLED fi fi SP_OUT="$(awk '/^'user'$/{print; exit}' "$LOADED_PLUGINS_FILE" 2>/dev/null)" if [ -z "$SP_OUT" ] then modprobe -s ocfs2_stack_user if [ $? -ne 0 ]; then ocf_log err "Could not load ocfs2_stack_user" return $OCF_ERR_INSTALLED fi fi SP_OUT="$(awk '/^'user'$/{print; exit}' "$LOADED_PLUGINS_FILE" 2>/dev/null)" if [ -z "$SP_OUT" ]; then ocf_log err "Switch to userspace stack unsuccessful" return $OCF_ERR_INSTALLED fi if [ -f "$CLUSTER_STACK_FILE" ]; then echo "$OCF_RESKEY_stack" >"$CLUSTER_STACK_FILE" if [ $? -ne 0 ]; then ocf_log err "Userspace stack '$OCF_RESKEY_stack' not supported" return $OCF_ERR_INSTALLED fi else ocf_log err "Switch to userspace stack not supported" return $OCF_ERR_INSTALLED fi driver_filesystem ocfs2; rc=$? if [ $rc -ne 0 ]; then modprobe -s ocfs2 if [ $? -ne 0 ]; then ocf_log err "Unable to load ocfs2 module" return $OCF_ERR_INSTALLED fi fi bringup_daemon return $? } o2cb_stop() { o2cb_monitor; rc=$? case $rc in - $OCF_NOT_RUNNING) return $OCF_SUCCESS;; + "$OCF_NOT_RUNNING") return $OCF_SUCCESS;; esac ocf_log info "Stopping $OCF_RESOURCE_INSTANCE" kill_daemon if [ $? -ne 0 ]; then ocf_log err "Unable to unload modules: the cluster is still online" return $OCF_ERR_GENERIC fi unload_filesystem ocfs2 if [ $? -eq 1 ]; then ocf_log err "Unable to unload ocfs2 module" return $OCF_ERR_GENERIC fi # If we can't find the stack glue, we have nothing to do. [ ! -e "$LOADED_PLUGINS_FILE" ] && return $OCF_SUCCESS while read plugin do unload_module "ocfs2_stack_${plugin}" if [ $? -eq 1 ]; then ocf_log err "Unable to unload ocfs2_stack_${plugin}" return $OCF_ERR_GENERIC fi done <"$LOADED_PLUGINS_FILE" unload_module "ocfs2_stackglue" if [ $? -eq 1 ]; then ocf_log err "Unable to unload ocfs2_stackglue" return $OCF_ERR_GENERIC fi # Don't unmount configfs - it's always in use by libdlm } o2cb_monitor() { o2cb_validate # Assume that ocfs2_controld will terminate if any of the conditions below are met driver_filesystem configfs; rc=$? if [ $rc -ne 0 ]; then ocf_log info "configfs not loaded" return $OCF_NOT_RUNNING fi check_filesystem configfs "${OCF_RESKEY_configfs}"; rc=$? if [ $rc -ne 0 ]; then ocf_log info "configfs not mounted" return $OCF_NOT_RUNNING fi if [ ! -e "$LOADED_PLUGINS_FILE" ]; then ocf_log info "Stack glue driver not loaded" return $OCF_NOT_RUNNING fi grep user "$LOADED_PLUGINS_FILE" >/dev/null 2>&1; rc=$? if [ $rc -ne 0 ]; then ocf_log err "Wrong stack $(cat $LOADED_PLUGINS_FILE)" return $OCF_ERR_INSTALLED fi driver_filesystem ocfs2; rc=$? if [ $rc -ne 0 ]; then ocf_log info "ocfs2 not loaded" return $OCF_NOT_RUNNING fi status_daemon return $? } o2cb_usage() { echo "usage: $0 {start|stop|monitor|validate-all|meta-data}" echo " Expects to have a fully populated OCF RA-compliant environment set." echo " In particualr, a value for OCF_ROOT" } o2cb_validate() { check_binary ${DAEMON} case "${OCF_RESKEY_CRM_meta_globally_unique}" in yes|Yes|true|True|1) ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the globally_unique=false meta attribute" exit $OCF_ERR_CONFIGURED ;; esac return $OCF_SUCCESS } meta_data() { cat < 1.0 OCFS2 daemon resource agent This Resource Agent controls the userspace daemon needed by OCFS2. Location where sysfs is mounted Sysfs location Location where configfs is mounted Configfs location Which userspace stack to use. Known values: pcmk Userspace stack Number of seconds to allow the control daemon to come up Daemon Timeout END } case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) o2cb_start ;; stop) o2cb_stop ;; monitor) o2cb_monitor ;; validate-all) o2cb_validate ;; usage|help) o2cb_usage exit $OCF_SUCCESS ;; *) o2cb_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/extra/resources/ping b/extra/resources/ping index ab22e795e3..3cf8dfe7e0 100755 --- a/extra/resources/ping +++ b/extra/resources/ping @@ -1,418 +1,421 @@ #!/bin/sh # # ocf:pacemaker:ping resource agent # -# Copyright 2009-2019 the Pacemaker project contributors +# Copyright 2009-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} +# Explicitly list all environment variables used, to make static analysis happy +: ${OCF_RESKEY_CRM_meta_timeout:="20000"} +: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} +: ${OCF_RESKEY_name:="pingd"} +: ${OCF_RESKEY_dampen:="5s"} +: ${OCF_RESKEY_attempts:="3"} +: ${OCF_RESKEY_multiplier:="1"} +: ${OCF_RESKEY_debug:="false"} +: ${OCF_RESKEY_failure_score:="0"} +: ${OCF_RESKEY_use_fping:="1"} +: ${OCF_RESKEY_host_list:=""} +: ${OCF_RESKEY_options:=""} +: ${OCF_RESKEY_timeout:=""} + ####################################################################### meta_data() { cat < 1.0 Every time the monitor action is run, this resource agent records (in the CIB) the current number of nodes the host can connect to using the system fping (preferred) or ping tool. node connectivity PID file PID file The time to wait (dampening) further changes occur Dampening interval The name of the attributes to set. This is the name to be used in the constraints. Attribute name The number by which to multiply the number of connected ping nodes by Value multiplier A space separated list of ping nodes to count. Host list Number of ping attempts, per host, before declaring it dead no. of ping attempts How long, in seconds, to wait before declaring a ping lost ping timeout in seconds A catch all for any other options that need to be passed to ping. Extra Options Resource is failed if the score is less than failure_score. Default never fails. failure_score Use fping rather than ping, if found. If set to 0, fping will not be used even if present. Use fping if available Enables to use default attrd_updater verbose logging on every call. Verbose logging END } ####################################################################### ping_conditional_log() { level="$1"; shift if [ "${OCF_RESKEY_debug}" = "true" ]; then ocf_log "$level" "$*" fi } ping_usage() { cat <&1); rc=$? active=$(echo "$output" | grep "is alive" | wc -l) case $rc in 0) ;; 1) for h in $(echo "$output" | grep "is unreachable" | awk '{print $1}'); do ping_conditional_log warn "$h is inactive" done ;; *) ocf_log err "Unexpected result for '$cmd' $rc: $(echo "$output" | tr '\n' ';')" ;; esac return $active } ping_check() { active=0 for host in $OCF_RESKEY_host_list; do p_exe=ping case $(uname) in Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";; Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; FreeBSD) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; *) ocf_log err "Unknown host type: $(uname)"; exit $OCF_ERR_INSTALLED;; esac case "$host" in *:*) p_exe=ping6 esac p_out=$($p_exe $p_args $OCF_RESKEY_options $host 2>&1); rc=$? case $rc in 0) active=$(expr $active + 1);; 1) ping_conditional_log warn "$host is inactive: $p_out";; *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";; esac done return $active } ping_update() { if use_fping; then fping_check active=$? else ping_check active=$? fi score=$(expr $active \* $OCF_RESKEY_multiplier) if [ "$__OCF_ACTION" = "start" ] ; then attrd_updater -n "$OCF_RESKEY_name" -B "$score" -d "$OCF_RESKEY_dampen" $attrd_options else attrd_updater -n "$OCF_RESKEY_name" -v "$score" -d "$OCF_RESKEY_dampen" $attrd_options fi rc=$? case $rc in 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; esac if [ $rc -ne 0 ]; then return $rc fi if [ -n "$OCF_RESKEY_failure_score" ] && [ "$score" -lt "$OCF_RESKEY_failure_score" ]; then ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" return 1 fi return 0 } use_fping() { ocf_is_true "$OCF_RESKEY_use_fping" && have_binary fping; } # return values: # 4 IPv4 # 6 IPv6 # 0 indefinite (i.e. hostname) host_family() { case $1 in *[0-9].*[0-9].*[0-9].*[0-9]) return 4 ;; *:*) return 6 ;; *) return 0 ;; esac } # return values same as host_family plus # 99 ambiguous families hosts_family() { # For fping allow only same IP versions or hostnames family=0 for host in $OCF_RESKEY_host_list; do host_family "$host" f=$? if [ $family -ne 0 ] && [ $f -ne 0 ] && [ $f -ne $family ] ; then family=99 break fi [ $f -ne 0 ] && family=$f done return $family } -: ${OCF_RESKEY_name:="pingd"} -: ${OCF_RESKEY_dampen:="5s"} -: ${OCF_RESKEY_attempts:="3"} -: ${OCF_RESKEY_multiplier:="1"} -: ${OCF_RESKEY_debug:="false"} -: ${OCF_RESKEY_failure_score:="0"} -: ${OCF_RESKEY_use_fping:="1"} - -: ${OCF_RESKEY_CRM_meta_timeout:="20000"} -: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} - integer=$(echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*') case "${OCF_RESKEY_timeout}" in *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=$(expr $integer / 1000);; *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=$(expr $integer \* 60);; *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=$(expr $integer \* 60 \* 60);; *) OCF_RESKEY_timeout=$integer;; esac if [ -z "${OCF_RESKEY_timeout}" ]; then if [ -n "$OCF_RESKEY_host_list" ]; then host_count=$(echo $OCF_RESKEY_host_list | awk '{print NF}') OCF_RESKEY_timeout=$(expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts) OCF_RESKEY_timeout=$(expr $OCF_RESKEY_timeout / 1100) # Convert to seconds and finish 10% early else OCF_RESKEY_timeout=5 fi fi if [ ${OCF_RESKEY_timeout} -lt 1 ]; then OCF_RESKEY_timeout=5 elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then # ping actually complains if this value is too high, 5 minutes is plenty OCF_RESKEY_timeout=300 fi if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESKEY_name}"} else : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESOURCE_INSTANCE}"} fi # Check the debug option case "${OCF_RESKEY_debug}" in true|True|TRUE|1) OCF_RESKEY_debug=true;; false|False|FALSE|0) OCF_RESKEY_debug=false;; *) ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}" OCF_RESKEY_debug=false ;; esac attrd_options='-q' if [ "${OCF_RESKEY_debug}" = "true" ]; then attrd_options='' fi case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) ping_start;; stop) ping_stop;; monitor) ping_monitor;; validate-all) ping_validate;; usage|help) ping_usage exit $OCF_SUCCESS ;; *) ping_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h index bc728e46d4..32496f3cbd 100644 --- a/include/crm/pengine/pe_types.h +++ b/include/crm/pengine/pe_types.h @@ -1,543 +1,544 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PE_TYPES__H # define PE_TYPES__H #ifdef __cplusplus extern "C" { #endif /*! * \file * \brief Data types for cluster status * \ingroup pengine */ # include // bool # include // time_t +# include // xmlNode # include // gboolean, guint, GList, GHashTable # include # include typedef struct pe_node_s pe_node_t; typedef struct pe_action_s pe_action_t; typedef struct pe_resource_s pe_resource_t; typedef struct pe_working_set_s pe_working_set_t; enum pe_obj_types { pe_unknown = -1, pe_native = 0, pe_group = 1, pe_clone = 2, pe_container = 3, }; typedef struct resource_object_functions_s { gboolean (*unpack) (pe_resource_t*, pe_working_set_t*); pe_resource_t *(*find_rsc) (pe_resource_t *parent, const char *search, const pe_node_t *node, int flags); /* parameter result must be free'd */ char *(*parameter) (pe_resource_t*, pe_node_t*, gboolean, const char*, pe_working_set_t*); //! \deprecated will be removed in a future release void (*print) (pe_resource_t*, const char*, long, void*); gboolean (*active) (pe_resource_t*, gboolean); enum rsc_role_e (*state) (const pe_resource_t*, gboolean); pe_node_t *(*location) (const pe_resource_t*, GList**, int); void (*free) (pe_resource_t*); void (*count) (pe_resource_t*); gboolean (*is_filtered) (pe_resource_t*, GList *, gboolean); } resource_object_functions_t; typedef struct resource_alloc_functions_s resource_alloc_functions_t; enum pe_quorum_policy { no_quorum_freeze, no_quorum_stop, no_quorum_ignore, no_quorum_suicide, no_quorum_demote }; enum node_type { node_ping, node_member, node_remote }; //! \deprecated will be removed in a future release enum pe_restart { pe_restart_restart, pe_restart_ignore }; //! Determine behavior of pe_find_resource_with_flags() enum pe_find { pe_find_renamed = 0x001, //!< match resource ID or LRM history ID pe_find_anon = 0x002, //!< match base name of anonymous clone instances pe_find_clone = 0x004, //!< match only clone instances pe_find_current = 0x008, //!< match resource active on specified node pe_find_inactive = 0x010, //!< match resource not running anywhere pe_find_any = 0x020, //!< match base name of any clone instance }; // @TODO Make these an enum # define pe_flag_have_quorum 0x00000001ULL # define pe_flag_symmetric_cluster 0x00000002ULL # define pe_flag_maintenance_mode 0x00000008ULL # define pe_flag_stonith_enabled 0x00000010ULL # define pe_flag_have_stonith_resource 0x00000020ULL # define pe_flag_enable_unfencing 0x00000040ULL # define pe_flag_concurrent_fencing 0x00000080ULL # define pe_flag_stop_rsc_orphans 0x00000100ULL # define pe_flag_stop_action_orphans 0x00000200ULL # define pe_flag_stop_everything 0x00000400ULL # define pe_flag_start_failure_fatal 0x00001000ULL //! \deprecated # define pe_flag_remove_after_stop 0x00002000ULL # define pe_flag_startup_fencing 0x00004000ULL # define pe_flag_shutdown_lock 0x00008000ULL # define pe_flag_startup_probes 0x00010000ULL # define pe_flag_have_status 0x00020000ULL # define pe_flag_have_remote_nodes 0x00040000ULL # define pe_flag_quick_location 0x00100000ULL # define pe_flag_sanitized 0x00200000ULL //! \deprecated # define pe_flag_stdout 0x00400000ULL //! Don't count total, disabled and blocked resource instances # define pe_flag_no_counts 0x00800000ULL /*! Skip deprecated code that is kept solely for backward API compatibility. * (Internal code should always set this.) */ # define pe_flag_no_compat 0x01000000ULL # define pe_flag_show_scores 0x02000000ULL # define pe_flag_show_utilization 0x04000000ULL struct pe_working_set_s { xmlNode *input; crm_time_t *now; /* options extracted from the input */ char *dc_uuid; pe_node_t *dc_node; const char *stonith_action; const char *placement_strategy; unsigned long long flags; int stonith_timeout; enum pe_quorum_policy no_quorum_policy; GHashTable *config_hash; GHashTable *tickets; // Actions for which there can be only one (e.g. fence nodeX) GHashTable *singletons; GList *nodes; GList *resources; GList *placement_constraints; GList *ordering_constraints; GList *colocation_constraints; GList *ticket_constraints; GList *actions; xmlNode *failed; xmlNode *op_defaults; xmlNode *rsc_defaults; /* stats */ int num_synapse; int max_valid_nodes; //! Deprecated (will be removed in a future release) int order_id; int action_id; /* final output */ xmlNode *graph; GHashTable *template_rsc_sets; const char *localhost; GHashTable *tags; int blocked_resources; int disabled_resources; GList *param_check; // History entries that need to be checked GList *stop_needed; // Containers that need stop actions time_t recheck_by; // Hint to controller to re-run scheduler by this time int ninstances; // Total number of resource instances guint shutdown_lock;// How long (seconds) to lock resources to shutdown node int priority_fencing_delay; // Priority fencing delay void *priv; }; enum pe_check_parameters { /* Clear fail count if parameters changed for un-expired start or monitor * last_failure. */ pe_check_last_failure, /* Clear fail count if parameters changed for start, monitor, promote, or * migrate_from actions for active resources. */ pe_check_active, }; struct pe_node_shared_s { const char *id; const char *uname; enum node_type type; /* @TODO convert these flags into a bitfield */ gboolean online; gboolean standby; gboolean standby_onfail; gboolean pending; gboolean unclean; gboolean unseen; gboolean shutdown; gboolean expected_up; gboolean is_dc; gboolean maintenance; gboolean rsc_discovery_enabled; gboolean remote_requires_reset; gboolean remote_was_fenced; gboolean remote_maintenance; /* what the remote-rsc is thinking */ gboolean unpacked; int num_resources; pe_resource_t *remote_rsc; GList *running_rsc; /* pe_resource_t* */ GList *allocated_rsc; /* pe_resource_t* */ GHashTable *attrs; /* char* => char* */ GHashTable *utilization; GHashTable *digest_cache; //!< cache of calculated resource digests int priority; // calculated based on the priority of resources running on the node }; struct pe_node_s { int weight; gboolean fixed; int count; struct pe_node_shared_s *details; int rsc_discover_mode; }; # define pe_rsc_orphan 0x00000001ULL # define pe_rsc_managed 0x00000002ULL # define pe_rsc_block 0x00000004ULL # define pe_rsc_orphan_container_filler 0x00000008ULL # define pe_rsc_notify 0x00000010ULL # define pe_rsc_unique 0x00000020ULL # define pe_rsc_fence_device 0x00000040ULL # define pe_rsc_promotable 0x00000080ULL # define pe_rsc_provisional 0x00000100ULL # define pe_rsc_allocating 0x00000200ULL # define pe_rsc_merging 0x00000400ULL # define pe_rsc_stop 0x00001000ULL # define pe_rsc_reload 0x00002000ULL # define pe_rsc_allow_remote_remotes 0x00004000ULL # define pe_rsc_critical 0x00008000ULL # define pe_rsc_failed 0x00010000ULL # define pe_rsc_runnable 0x00040000ULL # define pe_rsc_start_pending 0x00080000ULL # define pe_rsc_starting 0x00100000ULL # define pe_rsc_stopping 0x00200000ULL # define pe_rsc_allow_migrate 0x00800000ULL # define pe_rsc_failure_ignored 0x01000000ULL # define pe_rsc_maintenance 0x04000000ULL # define pe_rsc_is_container 0x08000000ULL # define pe_rsc_needs_quorum 0x10000000ULL # define pe_rsc_needs_fencing 0x20000000ULL # define pe_rsc_needs_unfencing 0x40000000ULL enum pe_graph_flags { pe_graph_none = 0x00000, pe_graph_updated_first = 0x00001, pe_graph_updated_then = 0x00002, pe_graph_disable = 0x00004, }; /* *INDENT-OFF* */ enum pe_action_flags { pe_action_pseudo = 0x00001, pe_action_runnable = 0x00002, pe_action_optional = 0x00004, pe_action_print_always = 0x00008, pe_action_have_node_attrs = 0x00010, pe_action_implied_by_stonith = 0x00040, pe_action_migrate_runnable = 0x00080, pe_action_dumped = 0x00100, pe_action_processed = 0x00200, pe_action_clear = 0x00400, pe_action_dangle = 0x00800, /* This action requires one or more of its dependencies to be runnable. * We use this to clear the runnable flag before checking dependencies. */ pe_action_requires_any = 0x01000, pe_action_reschedule = 0x02000, pe_action_tracking = 0x04000, pe_action_dedup = 0x08000, //! Internal state tracking when creating graph pe_action_dc = 0x10000, //! Action may run on DC instead of target }; /* *INDENT-ON* */ struct pe_resource_s { char *id; char *clone_name; xmlNode *xml; xmlNode *orig_xml; xmlNode *ops_xml; pe_working_set_t *cluster; pe_resource_t *parent; enum pe_obj_types variant; void *variant_opaque; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; // @TODO only pe_restart_restart is of interest, so merge into flags enum pe_restart restart_type; //!< \deprecated will be removed in future release int priority; int stickiness; int sort_index; int failure_timeout; int migration_threshold; guint remote_reconnect_ms; char *pending_task; unsigned long long flags; // @TODO merge these into flags gboolean is_remote_node; gboolean exclusive_discover; //!@{ //! This field should be treated as internal to Pacemaker GList *rsc_cons_lhs; // List of pcmk__colocation_t* GList *rsc_cons; // List of pcmk__colocation_t* GList *rsc_location; // List of pe__location_t* GList *actions; // List of pe_action_t* GList *rsc_tickets; // List of rsc_ticket* //!@} pe_node_t *allocated_to; pe_node_t *partial_migration_target; pe_node_t *partial_migration_source; GList *running_on; /* pe_node_t* */ GHashTable *known_on; /* pe_node_t* */ GHashTable *allowed_nodes; /* pe_node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; //! \deprecated Use pe_rsc_params() instead GHashTable *utilization; GList *children; /* pe_resource_t* */ GList *dangling_migrations; /* pe_node_t* */ pe_resource_t *container; GList *fillers; pe_node_t *pending_node; // Node on which pending_task is happening pe_node_t *lock_node; // Resource is shutdown-locked to this node time_t lock_time; // When shutdown lock started /* Resource parameters may have node-attribute-based rules, which means the * values can vary by node. This table is a cache of parameter name/value * tables for each node (as needed). Use pe_rsc_params() to get the table * for a given node. */ GHashTable *parameter_cache; // Key = node name, value = parameters table #if ENABLE_VERSIONED_ATTRS xmlNode *versioned_parameters; #endif }; #if ENABLE_VERSIONED_ATTRS // Used as action->action_details if action->rsc is not NULL typedef struct pe_rsc_action_details_s { xmlNode *versioned_parameters; xmlNode *versioned_meta; } pe_rsc_action_details_t; #endif struct pe_action_s { int id; int priority; pe_resource_t *rsc; pe_node_t *node; xmlNode *op_entry; char *task; char *uuid; char *cancel_task; char *reason; enum pe_action_flags flags; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; GHashTable *meta; GHashTable *extra; /* * These two varables are associated with the constraint logic * that involves first having one or more actions runnable before * then allowing this action to execute. * * These varables are used with features such as 'clone-min' which * requires at minimum X number of cloned instances to be running * before an order dependency can run. Another option that uses * this is 'require-all=false' in ordering constrants. This option * says "only require one instance of a resource to start before * allowing dependencies to start" -- basically, require-all=false is * the same as clone-min=1. */ /* current number of known runnable actions in the before list. */ int runnable_before; /* the number of "before" runnable actions required for this action * to be considered runnable */ int required_runnable_before; GList *actions_before; /* pe_action_wrapper_t* */ GList *actions_after; /* pe_action_wrapper_t* */ /* Some of the above fields could be moved to the details, * except for API backward compatibility. */ void *action_details; // varies by type of action }; typedef struct pe_ticket_s { char *id; gboolean granted; time_t last_granted; gboolean standby; GHashTable *state; } pe_ticket_t; typedef struct pe_tag_s { char *id; GList *refs; } pe_tag_t; //! Internal tracking for transition graph creation enum pe_link_state { pe_link_not_dumped, //! Internal tracking for transition graph creation pe_link_dumped, //! Internal tracking for transition graph creation pe_link_dup, //! \deprecated No longer used by Pacemaker }; enum pe_discover_e { pe_discover_always = 0, pe_discover_never, pe_discover_exclusive, }; /* *INDENT-OFF* */ enum pe_ordering { pe_order_none = 0x0, /* deleted */ pe_order_optional = 0x1, /* pure ordering, nothing implied */ pe_order_apply_first_non_migratable = 0x2, /* Only apply this constraint's ordering if first is not migratable. */ pe_order_implies_first = 0x10, /* If 'then' is required, ensure 'first' is too */ pe_order_implies_then = 0x20, /* If 'first' is required, ensure 'then' is too */ pe_order_promoted_implies_first = 0x40, /* If 'then' is required and then's rsc is promoted, ensure 'first' becomes required too */ /* first requires then to be both runnable and migrate runnable. */ pe_order_implies_first_migratable = 0x80, pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */ pe_order_pseudo_left = 0x200, /* 'then' can only be pseudo if 'first' is runnable */ pe_order_implies_then_on_node = 0x400, /* If 'first' is required on 'nodeX', * ensure instances of 'then' on 'nodeX' are too. * Only really useful if 'then' is a clone and 'first' is not */ pe_order_probe = 0x800, /* If 'first->rsc' is * - running but about to stop, ignore the constraint * - otherwise, behave as runnable_left */ pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */ pe_order_stonith_stop = 0x2000, /* only applies if the action is non-pseudo */ pe_order_serialize_only = 0x4000, /* serialize */ pe_order_same_node = 0x8000, /* applies only if 'first' and 'then' are on same node */ pe_order_implies_first_printed = 0x10000, /* Like ..implies_first but only ensures 'first' is printed, not mandatory */ pe_order_implies_then_printed = 0x20000, /* Like ..implies_then but only ensures 'then' is printed, not mandatory */ pe_order_asymmetrical = 0x100000, /* Indicates asymmetrical one way ordering constraint. */ pe_order_load = 0x200000, /* Only relevant if... */ pe_order_one_or_more = 0x400000, /* 'then' is runnable only if one or more of its dependencies are too */ pe_order_anti_colocation = 0x800000, pe_order_preserve = 0x1000000, /* Hack for breaking user ordering constraints with container resources */ pe_order_then_cancels_first = 0x2000000, // if 'then' becomes required, 'first' becomes optional pe_order_trace = 0x4000000, /* test marker */ #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) // \deprecated Use pe_order_promoted_implies_first instead pe_order_implies_first_master = pe_order_promoted_implies_first, #endif }; /* *INDENT-ON* */ typedef struct pe_action_wrapper_s { enum pe_ordering type; enum pe_link_state state; pe_action_t *action; } pe_action_wrapper_t; #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif #ifdef __cplusplus } #endif #endif // PE_TYPES__H diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index 1201aa701d..648d5e2fd7 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -1,105 +1,106 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PENGINE_STATUS__H # define PENGINE_STATUS__H #ifdef __cplusplus extern "C" { #endif /*! * \file * \brief Cluster status and scheduling * \ingroup pengine */ # include // gboolean # include // bool +# include // pcmk_is_set() # include # include # include // pe_node_t, pe_resource_t, etc. # include const char *rsc_printable_id(pe_resource_t *rsc); gboolean cluster_status(pe_working_set_t * data_set); pe_working_set_t *pe_new_working_set(void); void pe_free_working_set(pe_working_set_t *data_set); void set_working_set_defaults(pe_working_set_t * data_set); void cleanup_calculations(pe_working_set_t * data_set); void pe_reset_working_set(pe_working_set_t *data_set); pe_resource_t *pe_find_resource(GList *rsc_list, const char *id_rh); pe_resource_t *pe_find_resource_with_flags(GList *rsc_list, const char *id, enum pe_find flags); pe_node_t *pe_find_node(GList *node_list, const char *uname); pe_node_t *pe_find_node_id(GList *node_list, const char *id); pe_node_t *pe_find_node_any(GList *node_list, const char *id, const char *uname); GList *find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set); int pe_bundle_replicas(const pe_resource_t *rsc); #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *pe_rsc_action_details(pe_action_t *action); #endif /*! * \brief Check whether a resource is any clone type * * \param[in] rsc Resource to check * * \return TRUE if resource is clone, FALSE otherwise */ static inline bool pe_rsc_is_clone(pe_resource_t *rsc) { return rsc && (rsc->variant == pe_clone); } /*! * \brief Check whether a resource is a globally unique clone * * \param[in] rsc Resource to check * * \return TRUE if resource is unique clone, FALSE otherwise */ static inline bool pe_rsc_is_unique_clone(pe_resource_t *rsc) { return pe_rsc_is_clone(rsc) && pcmk_is_set(rsc->flags, pe_rsc_unique); } /*! * \brief Check whether a resource is an anonymous clone * * \param[in] rsc Resource to check * * \return TRUE if resource is anonymous clone, FALSE otherwise */ static inline bool pe_rsc_is_anon_clone(pe_resource_t *rsc) { return pe_rsc_is_clone(rsc) && !pcmk_is_set(rsc->flags, pe_rsc_unique); } /*! * \brief Check whether a resource is part of a bundle * * \param[in] rsc Resource to check * * \return TRUE if resource is part of a bundle, FALSE otherwise */ static inline bool pe_rsc_is_bundled(pe_resource_t *rsc) { return uber_parent(rsc)->parent != NULL; } #ifdef __cplusplus } #endif #endif diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c index f9429b6b56..fa1702c8df 100644 --- a/lib/cluster/cpg.c +++ b/lib/cluster/cpg.c @@ -1,1066 +1,1075 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* PCMK__SPECIAL_PID* */ #include "crmcluster_private.h" /* @TODO Once we can update the public API to require crm_cluster_t* in more * functions, we can ditch this in favor of cluster->cpg_handle. */ static cpg_handle_t pcmk_cpg_handle = 0; // @TODO These could be moved to crm_cluster_t* at that time as well static bool cpg_evicted = false; static GList *cs_message_queue = NULL; static int cs_message_timer = 0; struct pcmk__cpg_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[MAX_NAME]; } __attribute__ ((packed)); typedef struct pcmk__cpg_host_s pcmk__cpg_host_t; struct pcmk__cpg_msg_s { struct qb_ipc_response_header header __attribute__ ((aligned(8))); uint32_t id; gboolean is_compressed; pcmk__cpg_host_t host; pcmk__cpg_host_t sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__ ((packed)); typedef struct pcmk__cpg_msg_s pcmk__cpg_msg_t; static void crm_cs_flush(gpointer data); #define msg_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) #define cs_repeat(rc, counter, max, code) do { \ rc = code; \ if ((rc == CS_ERR_TRY_AGAIN) || (rc == CS_ERR_QUEUE_FULL)) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while (counter < max) /*! * \brief Disconnect from Corosync CPG * * \param[in] Cluster to disconnect */ void cluster_disconnect_cpg(crm_cluster_t *cluster) { pcmk_cpg_handle = 0; if (cluster->cpg_handle) { crm_trace("Disconnecting CPG"); cpg_leave(cluster->cpg_handle, &cluster->group); cpg_finalize(cluster->cpg_handle); cluster->cpg_handle = 0; } else { crm_info("No CPG connection"); } } /*! * \brief Get the local Corosync node ID (via CPG) * * \param[in] handle CPG connection to use (or 0 to use new connection) * * \return Corosync ID of local node (or 0 if not known) */ uint32_t get_local_nodeid(cpg_handle_t handle) { cs_error_t rc = CS_OK; int retries = 0; static uint32_t local_nodeid = 0; cpg_handle_t local_handle = handle; cpg_callbacks_t cb = { }; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; if(local_nodeid != 0) { return local_nodeid; } if(handle == 0) { crm_trace("Creating connection"); cs_repeat(rc, retries, 5, cpg_initialize(&local_handle, &cb)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); return 0; } rc = cpg_fd_get(local_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CPG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); goto bail; } } if (rc == CS_OK) { retries = 0; crm_trace("Performing lookup"); cs_repeat(rc, retries, 5, cpg_local_get(local_handle, &local_nodeid)); } if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API: %s (%d)", pcmk__cs_err_str(rc), rc); } bail: if(handle == 0) { crm_trace("Closing connection"); cpg_finalize(local_handle); } crm_debug("Local nodeid is %u", local_nodeid); return local_nodeid; } /*! * \internal * \brief Callback function for Corosync message queue timer * * \param[in] data CPG handle * * \return FALSE (to indicate to glib that timer should not be removed) */ static gboolean crm_cs_flush_cb(gpointer data) { cs_message_timer = 0; crm_cs_flush(data); return FALSE; } // Send no more than this many CPG messages in one flush #define CS_SEND_MAX 200 /*! * \internal * \brief Send messages in Corosync CPG message queue * * \param[in] data CPG handle */ static void crm_cs_flush(gpointer data) { unsigned int sent = 0; guint queue_len = 0; cs_error_t rc = 0; cpg_handle_t *handle = (cpg_handle_t *) data; if (*handle == 0) { crm_trace("Connection is dead"); return; } queue_len = g_list_length(cs_message_queue); if (((queue_len % 1000) == 0) && (queue_len > 1)) { crm_err("CPG queue has grown to %d", queue_len); } else if (queue_len == CS_SEND_MAX) { crm_warn("CPG queue has grown to %d", queue_len); } if (cs_message_timer != 0) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active %d", cs_message_timer); return; } while ((cs_message_queue != NULL) && (sent < CS_SEND_MAX)) { struct iovec *iov = cs_message_queue->data; rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1); if (rc != CS_OK) { break; } sent++; crm_trace("CPG message sent, size=%llu", (unsigned long long) iov->iov_len); cs_message_queue = g_list_remove(cs_message_queue, iov); free(iov->iov_base); free(iov); } queue_len -= sent; if ((sent > 1) || (cs_message_queue != NULL)) { crm_info("Sent %u CPG messages (%d remaining): %s (%d)", sent, queue_len, pcmk__cs_err_str(rc), (int) rc); } else { crm_trace("Sent %u CPG messages (%d remaining): %s (%d)", sent, queue_len, pcmk__cs_err_str(rc), (int) rc); } if (cs_message_queue) { uint32_t delay_ms = 100; if (rc != CS_OK) { /* Proportionally more if sending failed but cap at 1s */ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); } cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data); } } /*! * \internal * \brief Dispatch function for CPG handle * * \param[in] user_data Cluster object * * \return 0 on success, -1 on error (per mainloop_io_t interface) */ static int pcmk_cpg_dispatch(gpointer user_data) { cs_error_t rc = CS_OK; crm_cluster_t *cluster = (crm_cluster_t *) user_data; rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %s (%d)", pcmk__cs_err_str(rc), rc); cpg_finalize(cluster->cpg_handle); cluster->cpg_handle = 0; return -1; } else if (cpg_evicted) { crm_err("Evicted from CPG membership"); return -1; } return 0; } static inline const char * ais_dest(const pcmk__cpg_host_t *host) { if (host->local) { return "local"; } else if (host->size > 0) { return host->uname; } else { return ""; } } static inline const char * msg_type2text(enum crm_ais_msg_types type) { const char *text = "unknown"; switch (type) { case crm_msg_none: text = "unknown"; break; case crm_msg_ais: text = "ais"; break; case crm_msg_cib: text = "cib"; break; case crm_msg_crmd: text = "crmd"; break; case crm_msg_pe: text = "pengine"; break; case crm_msg_te: text = "tengine"; break; case crm_msg_lrmd: text = "lrmd"; break; case crm_msg_attrd: text = "attrd"; break; case crm_msg_stonithd: text = "stonithd"; break; case crm_msg_stonith_ng: text = "stonith-ng"; break; } return text; } /*! * \internal * \brief Check whether a Corosync CPG message is valid * * \param[in] msg Corosync CPG message to check * * \return true if \p msg is valid, otherwise false */ static bool check_message_sanity(const pcmk__cpg_msg_t *msg) { - gboolean sane = TRUE; - int dest = msg->host.type; - int tmp_size = msg->header.size - sizeof(pcmk__cpg_msg_t); - - if (sane && msg->header.size == 0) { - crm_warn("Message with no size"); - sane = FALSE; - } - - if (sane && msg->header.error != CS_OK) { - crm_warn("Message header contains an error: %d", msg->header.error); - sane = FALSE; - } - - if (sane && msg_data_len(msg) != tmp_size) { - crm_warn("Message payload size is incorrect: expected %d, got %d", msg_data_len(msg), - tmp_size); - sane = FALSE; + int32_t payload_size = msg->header.size - sizeof(pcmk__cpg_msg_t); + + if (payload_size < 1) { + crm_err("%sCPG message %d from %s invalid: " + "Claimed size of %d bytes is too small " + CRM_XS " from %s[%u] to %s@%s", + (msg->is_compressed? "Compressed " : ""), + msg->id, ais_dest(&(msg->sender)), + (int) msg->header.size, + msg_type2text(msg->sender.type), msg->sender.pid, + msg_type2text(msg->host.type), ais_dest(&(msg->host))); + return false; } - if (sane && msg_data_len(msg) == 0) { - crm_warn("Message with no payload"); - sane = FALSE; + if (msg->header.error != CS_OK) { + crm_err("%sCPG message %d from %s invalid: " + "Sender indicated error %d " + CRM_XS " from %s[%u] to %s@%s", + (msg->is_compressed? "Compressed " : ""), + msg->id, ais_dest(&(msg->sender)), + msg->header.error, + msg_type2text(msg->sender.type), msg->sender.pid, + msg_type2text(msg->host.type), ais_dest(&(msg->host))); + return false; } - if (sane && !msg->is_compressed && (msg->size > 0)) { - size_t str_size = strlen(msg->data) + 1; - - if (msg->size != str_size) { - crm_warn("Message payload is corrupted: expected %llu bytes, got %llu", - (unsigned long long) msg->size, - (unsigned long long) str_size); - sane = FALSE; - } + if (msg_data_len(msg) != payload_size) { + crm_err("%sCPG message %d from %s invalid: " + "Total size %d inconsistent with payload size %d " + CRM_XS " from %s[%u] to %s@%s", + (msg->is_compressed? "Compressed " : ""), + msg->id, ais_dest(&(msg->sender)), + (int) msg->header.size, (int) msg_data_len(msg), + msg_type2text(msg->sender.type), msg->sender.pid, + msg_type2text(msg->host.type), ais_dest(&(msg->host))); + return false; } - if (sane == FALSE) { - crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)", - msg->id, ais_dest(&(msg->host)), msg_type2text(dest), - ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), - msg->sender.pid, msg->is_compressed, msg_data_len(msg), msg->header.size); - - } else { - crm_trace - ("Verified message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)", - msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), - msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, - msg_data_len(msg), msg->header.size); + if (!msg->is_compressed && + /* msg->size != (strlen(msg->data) + 1) would be a stronger check, + * but checking the last byte or two should be quick + */ + (((msg->size > 1) && (msg->data[msg->size - 2] == '\0')) + || (msg->data[msg->size - 1] != '\0'))) { + crm_err("CPG message %d from %s invalid: " + "Payload does not end at byte %llu " + CRM_XS " from %s[%u] to %s@%s", + msg->id, ais_dest(&(msg->sender)), + (unsigned long long) msg->size, + msg_type2text(msg->sender.type), msg->sender.pid, + msg_type2text(msg->host.type), ais_dest(&(msg->host))); + return false; } - return sane; + crm_trace("Verified %d-byte %sCPG message %d from %s[%u]@%s to %s@%s", + (int) msg->header.size, (msg->is_compressed? "compressed " : ""), + msg->id, msg_type2text(msg->sender.type), msg->sender.pid, + ais_dest(&(msg->sender)), + msg_type2text(msg->host.type), ais_dest(&(msg->host))); + return true; } /*! * \brief Extract text data from a Corosync CPG message * * \param[in] handle CPG connection (to get local node ID if not yet known) * \param[in] nodeid Corosync ID of node that sent message * \param[in] pid Process ID of message sender (for logging only) * \param[in] content CPG message * \param[out] kind If not NULL, will be set to CPG header ID * (which should be an enum crm_ais_msg_class value, * currently always crm_class_cluster) * \param[out] from If not NULL, will be set to sender uname * (valid for the lifetime of \p content) * * \return Newly allocated string with message data * \note It is the caller's responsibility to free the return value with free(). */ char * pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content, uint32_t *kind, const char **from) { char *data = NULL; pcmk__cpg_msg_t *msg = (pcmk__cpg_msg_t *) content; if(handle) { // Do filtering and field massaging uint32_t local_nodeid = get_local_nodeid(handle); const char *local_name = get_local_node_name(); if (msg->sender.id > 0 && msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id); return NULL; } else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) { /* Not for us */ crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid); return NULL; } else if (msg->host.size != 0 && !pcmk__str_eq(msg->host.uname, local_name, pcmk__str_casei)) { /* Not for us */ crm_trace("Not for us: %s != %s", msg->host.uname, local_name); return NULL; } msg->sender.id = nodeid; if (msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); msg->sender.size = strlen(peer->uname); memset(msg->sender.uname, 0, MAX_NAME); memcpy(msg->sender.uname, peer->uname, msg->sender.size); } } } crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", msg_data_len(msg), msg->size, msg->compressed_size); if (kind != NULL) { *kind = msg->header.id; } if (from != NULL) { *from = msg->sender.uname; } if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (!check_message_sanity(msg)) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (!check_message_sanity(msg)) { goto badmsg; } else { data = strdup(msg->data); } // Is this necessary? crm_get_peer(msg->sender.id, msg->sender.uname); crm_trace("Payload: %.200s", data); return data; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(pcmk__cpg_msg_t), msg->header.size, msg->size, msg->compressed_size); free(data); return NULL; } /*! * \internal * \brief Compare cpg_address objects by node ID * * \param[in] first First cpg_address structure to compare * \param[in] second Second cpg_address structure to compare * * \return Negative number if first's node ID is lower, * positive number if first's node ID is greater, * or 0 if both node IDs are equal */ static int cmp_member_list_nodeid(const void *first, const void *second) { const struct cpg_address *const a = *((const struct cpg_address **) first), *const b = *((const struct cpg_address **) second); if (a->nodeid < b->nodeid) { return -1; } else if (a->nodeid > b->nodeid) { return 1; } /* don't bother with "reason" nor "pid" */ return 0; } /*! * \internal * \brief Get a readable string equivalent of a cpg_reason_t value * * \param[in] reason CPG reason value * * \return Readable string suitable for logging */ static const char * cpgreason2str(cpg_reason_t reason) { switch (reason) { case CPG_REASON_JOIN: return " via cpg_join"; case CPG_REASON_LEAVE: return " via cpg_leave"; case CPG_REASON_NODEDOWN: return " via cluster exit"; case CPG_REASON_NODEUP: return " via cluster join"; case CPG_REASON_PROCDOWN: return " for unknown reason"; default: break; } return ""; } /*! * \internal * \brief Get a log-friendly node name * * \param[in] peer Node to check * * \return Node's uname, or readable string if not known */ static inline const char * peer_name(crm_node_t *peer) { if (peer == NULL) { return "unknown node"; } else if (peer->uname == NULL) { return "peer node"; } else { return peer->uname; } } /*! * \brief Handle a CPG configuration change event * * \param[in] handle CPG connection * \param[in] cpg_name CPG group name * \param[in] member_list List of current CPG members * \param[in] member_list_entries Number of entries in \p member_list * \param[in] left_list List of CPG members that left * \param[in] left_list_entries Number of entries in \p left_list * \param[in] joined_list List of CPG members that joined * \param[in] joined_list_entries Number of entries in \p joined_list */ void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; gboolean found = FALSE; static int counter = 0; uint32_t local_nodeid = get_local_nodeid(handle); const struct cpg_address *key, **sorted; sorted = malloc(member_list_entries * sizeof(const struct cpg_address *)); CRM_ASSERT(sorted != NULL); for (size_t iter = 0; iter < member_list_entries; iter++) { sorted[iter] = member_list + iter; } /* so that the cross-matching multiply-subscribed nodes is then cheap */ qsort(sorted, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); for (i = 0; i < left_list_entries; i++) { crm_node_t *peer = pcmk__search_cluster_node_cache(left_list[i].nodeid, NULL); const struct cpg_address **rival = NULL; /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation and not playing by this rule may go wild in case of multiple residual instances of the same pacemaker daemon at the same node -- we must ensure that the possible local rival(s) won't make us cry out and bail (e.g. when they quit themselves), since all the surrounding logic denies this simple fact that the full membership is discriminated also per the PID of the process beside mere node ID (and implicitly, group ID); practically, this will be sound in terms of not preventing progress, since all the CPG joiners are also API end-point carriers, and that's what matters locally (who's the winner); remotely, we will just compare leave_list and member_list and if the left process has its node retained in member_list (under some other PID, anyway) we will just ignore it as well XXX: long-term fix is to establish in-out PID-aware tracking? */ if (peer) { key = &left_list[i]; rival = bsearch(&key, sorted, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); } if (rival == NULL) { crm_info("Group %s event %d: %s (node %u pid %u) left%s", groupName->value, counter, peer_name(peer), left_list[i].nodeid, left_list[i].pid, cpgreason2str(left_list[i].reason)); if (peer) { crm_update_peer_proc(__func__, peer, crm_proc_cpg, OFFLINESTATUS); } } else if (left_list[i].nodeid == local_nodeid) { crm_warn("Group %s event %d: duplicate local pid %u left%s", groupName->value, counter, left_list[i].pid, cpgreason2str(left_list[i].reason)); } else { crm_warn("Group %s event %d: " "%s (node %u) duplicate pid %u left%s (%u remains)", groupName->value, counter, peer_name(peer), left_list[i].nodeid, left_list[i].pid, cpgreason2str(left_list[i].reason), (*rival)->pid); } } free(sorted); sorted = NULL; for (i = 0; i < joined_list_entries; i++) { crm_info("Group %s event %d: node %u pid %u joined%s", groupName->value, counter, joined_list[i].nodeid, joined_list[i].pid, cpgreason2str(joined_list[i].reason)); } for (i = 0; i < member_list_entries; i++) { crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); if (member_list[i].nodeid == local_nodeid && member_list[i].pid != getpid()) { /* see the note above */ crm_warn("Group %s event %d: detected duplicate local pid %u", groupName->value, counter, member_list[i].pid); continue; } crm_info("Group %s event %d: %s (node %u pid %u) is member", groupName->value, counter, peer_name(peer), member_list[i].nodeid, member_list[i].pid); /* If the caller left auto-reaping enabled, this will also update the * state to member. */ peer = crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) { /* The node is a CPG member, but we currently think it's not a * cluster member. This is possible only if auto-reaping was * disabled. The node may be joining, and we happened to get the CPG * notification before the quorum notification; or the node may have * just died, and we are processing its final messages; or a bug * has affected the peer cache. */ time_t now = time(NULL); if (peer->when_lost == 0) { // Track when we first got into this contradictory state peer->when_lost = now; } else if (now > (peer->when_lost + 60)) { // If it persists for more than a minute, update the state crm_warn("Node %u is member of group %s but was believed offline", member_list[i].nodeid, groupName->value); pcmk__update_peer_state(__func__, peer, CRM_NODE_MEMBER, 0); } } if (local_nodeid == member_list[i].nodeid) { found = TRUE; } } if (!found) { crm_err("Local node was evicted from group %s", groupName->value); cpg_evicted = true; } counter++; } /*! * \brief Connect to Corosync CPG * * \param[in] cluster Cluster object * * \return TRUE on success, otherwise FALSE */ gboolean cluster_connect_cpg(crm_cluster_t *cluster) { cs_error_t rc; int fd = -1; int retries = 0; uint32_t id = 0; crm_node_t *peer = NULL; cpg_handle_t handle = 0; const char *message_name = pcmk__message_name(crm_system_name); uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = cluster->destroy, }; cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = cluster->cpg.cpg_deliver_fn, .cpg_confchg_fn = cluster->cpg.cpg_confchg_fn, /* .cpg_deliver_fn = pcmk_cpg_deliver, */ /* .cpg_confchg_fn = pcmk_cpg_membership, */ }; cpg_evicted = false; cluster->group.length = 0; cluster->group.value[0] = 0; /* group.value is char[128] */ strncpy(cluster->group.value, message_name, 127); cluster->group.value[127] = 0; cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value)); cs_repeat(rc, retries, 30, cpg_initialize(&handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); goto bail; } rc = cpg_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CPG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = CS_ERR_ACCESS; goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); rc = CS_ERR_ACCESS; goto bail; } id = get_local_nodeid(handle); if (id == 0) { crm_err("Could not get local node id from the CPG API"); goto bail; } cluster->nodeid = id; retries = 0; cs_repeat(rc, retries, 30, cpg_join(handle, &cluster->group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", message_name, rc); goto bail; } pcmk_cpg_handle = handle; cluster->cpg_handle = handle; mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(handle); return FALSE; } peer = crm_get_peer(id, NULL); crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); return TRUE; } /*! * \internal * \brief Send an XML message via Corosync CPG * * \param[in] msg XML message to send * \param[in] node Cluster node to send message to * \param[in] dest Type of message to send * * \return TRUE on success, otherwise FALSE */ gboolean pcmk__cpg_send_xml(xmlNode *msg, crm_node_t *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; data = dump_xml_unformatted(msg); rc = send_cluster_text(crm_class_cluster, data, FALSE, node, dest); free(data); return rc; } /*! * \internal * \brief Send string data via Corosync CPG * * \param[in] msg_class Message class (to set as CPG header ID) * \param[in] data Data to send * \param[in] local What to set as host "local" value (which is never used) * \param[in] node Cluster node to send message to * \param[in] dest Type of message to send * * \return TRUE on success, otherwise FALSE */ gboolean send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; static int local_name_len = 0; static const char *local_name = NULL; char *target = NULL; struct iovec *iov; pcmk__cpg_msg_t *msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); switch (msg_class) { case crm_class_cluster: break; default: crm_err("Invalid message class: %d", msg_class); return FALSE; } CRM_CHECK(dest != crm_msg_ais, return FALSE); if (local_name == NULL) { local_name = get_local_node_name(); } if ((local_name_len == 0) && (local_name != NULL)) { local_name_len = strlen(local_name); } if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } msg = calloc(1, sizeof(pcmk__cpg_msg_t)); msg_id++; msg->id = msg_id; msg->header.id = msg_class; msg->header.error = CS_OK; msg->host.type = dest; msg->host.local = local; if (node) { if (node->uname) { target = strdup(node->uname); msg->host.size = strlen(node->uname); memset(msg->host.uname, 0, MAX_NAME); memcpy(msg->host.uname, node->uname, msg->host.size); } else { target = crm_strdup_printf("%u", node->id); } msg->host.id = node->id; } else { target = strdup("all"); } msg->sender.id = 0; msg->sender.type = sender; msg->sender.pid = local_pid; msg->sender.size = local_name_len; memset(msg->sender.uname, 0, MAX_NAME); if ((local_name != NULL) && (msg->sender.size != 0)) { memcpy(msg->sender.uname, local_name, msg->sender.size); } msg->size = 1 + strlen(data); msg->header.size = sizeof(pcmk__cpg_msg_t) + msg->size; if (msg->size < CRM_BZ2_THRESHOLD) { msg = pcmk__realloc(msg, msg->header.size); memcpy(msg->data, data, msg->size); } else { char *compressed = NULL; unsigned int new_size = 0; char *uncompressed = strdup(data); if (pcmk__compress(uncompressed, (unsigned int) msg->size, 0, &compressed, &new_size) == pcmk_rc_ok) { msg->header.size = sizeof(pcmk__cpg_msg_t) + new_size; msg = pcmk__realloc(msg, msg->header.size); memcpy(msg->data, compressed, new_size); msg->is_compressed = TRUE; msg->compressed_size = new_size; } else { // cppcheck seems not to understand the abort logic in pcmk__realloc // cppcheck-suppress memleak msg = pcmk__realloc(msg, msg->header.size); memcpy(msg->data, data, msg->size); } free(uncompressed); free(compressed); } iov = calloc(1, sizeof(struct iovec)); iov->iov_base = msg; iov->iov_len = msg->header.size; if (msg->compressed_size) { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes compressed payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->compressed_size, data); } else { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->size, data); } free(target); cs_message_queue = g_list_append(cs_message_queue, iov); crm_cs_flush(&pcmk_cpg_handle); return TRUE; } /*! * \brief Get the message type equivalent of a string * * \param[in] text String of message type * * \return Message type equivalent of \p text */ enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); text = pcmk__message_name(text); if (pcmk__str_eq(text, "ais", pcmk__str_casei)) { type = crm_msg_ais; } else if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_casei)) { type = crm_msg_cib; } else if (pcmk__strcase_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) { type = crm_msg_crmd; } else if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { type = crm_msg_te; } else if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_casei)) { type = crm_msg_pe; } else if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_casei)) { type = crm_msg_lrmd; } else if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_casei)) { type = crm_msg_stonithd; } else if (pcmk__str_eq(text, "stonith-ng", pcmk__str_casei)) { type = crm_msg_stonith_ng; } else if (pcmk__str_eq(text, "attrd", pcmk__str_casei)) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1 || type <= crm_msg_stonith_ng) { /* Ensure it's sane */ type = crm_msg_none; } } return type; } diff --git a/lib/common/output_text.c b/lib/common/output_text.c index 4408425620..3454a8cbdb 100644 --- a/lib/common/output_text.c +++ b/lib/common/output_text.c @@ -1,433 +1,433 @@ /* * Copyright 2019-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include static gboolean fancy = FALSE; GOptionEntry pcmk__text_output_entries[] = { { "text-fancy", 0, 0, G_OPTION_ARG_NONE, &fancy, "Use more highly formatted output (requires --output-as=text)", NULL }, { NULL } }; typedef struct text_list_data_s { unsigned int len; char *singular_noun; char *plural_noun; } text_list_data_t; typedef struct private_data_s { GQueue *parent_q; } private_data_t; static void text_free_priv(pcmk__output_t *out) { private_data_t *priv = out->priv; if (priv == NULL) { return; } g_queue_free(priv->parent_q); free(priv); out->priv = NULL; } static bool text_init(pcmk__output_t *out) { private_data_t *priv = NULL; /* If text_init was previously called on this output struct, just return. */ if (out->priv != NULL) { return true; } else { out->priv = calloc(1, sizeof(private_data_t)); if (out->priv == NULL) { return false; } priv = out->priv; } priv->parent_q = g_queue_new(); return true; } static void text_finish(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy_dest) { fflush(out->dest); } static void text_reset(pcmk__output_t *out) { CRM_ASSERT(out != NULL); if (out->dest != stdout) { out->dest = freopen(NULL, "w", out->dest); } CRM_ASSERT(out->dest != NULL); text_free_priv(out); text_init(out); } static void text_subprocess_output(pcmk__output_t *out, int exit_status, const char *proc_stdout, const char *proc_stderr) { CRM_ASSERT(out != NULL); if (proc_stdout != NULL) { fprintf(out->dest, "%s\n", proc_stdout); } if (proc_stderr != NULL) { fprintf(out->dest, "%s\n", proc_stderr); } } static void text_version(pcmk__output_t *out, bool extended) { CRM_ASSERT(out != NULL); if (extended) { fprintf(out->dest, "Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); } else { fprintf(out->dest, "Pacemaker %s\n", PACEMAKER_VERSION); fprintf(out->dest, "Written by Andrew Beekhof\n"); } } G_GNUC_PRINTF(2, 3) static void text_err(pcmk__output_t *out, const char *format, ...) { va_list ap; int len = 0; CRM_ASSERT(out != NULL); va_start(ap, format); /* Informational output does not get indented, to separate it from other * potentially indented list output. */ len = vfprintf(stderr, format, ap); CRM_ASSERT(len >= 0); va_end(ap); /* Add a newline. */ fprintf(stderr, "\n"); } G_GNUC_PRINTF(2, 3) static int text_info(pcmk__output_t *out, const char *format, ...) { va_list ap; int len = 0; CRM_ASSERT(out != NULL); if (out->is_quiet(out)) { return pcmk_rc_no_output; } va_start(ap, format); /* Informational output does not get indented, to separate it from other * potentially indented list output. */ len = vfprintf(out->dest, format, ap); CRM_ASSERT(len >= 0); va_end(ap); /* Add a newline. */ fprintf(out->dest, "\n"); return pcmk_rc_ok; } static void text_output_xml(pcmk__output_t *out, const char *name, const char *buf) { CRM_ASSERT(out != NULL); pcmk__indented_printf(out, "%s", buf); } G_GNUC_PRINTF(4, 5) static void text_begin_list(pcmk__output_t *out, const char *singular_noun, const char *plural_noun, const char *format, ...) { private_data_t *priv = NULL; text_list_data_t *new_list = NULL; va_list ap; CRM_ASSERT(out != NULL && out->priv != NULL); priv = out->priv; va_start(ap, format); if (fancy && format) { pcmk__indented_vprintf(out, format, ap); fprintf(out->dest, ":\n"); } va_end(ap); new_list = calloc(1, sizeof(text_list_data_t)); new_list->len = 0; new_list->singular_noun = singular_noun == NULL ? NULL : strdup(singular_noun); new_list->plural_noun = plural_noun == NULL ? NULL : strdup(plural_noun); g_queue_push_tail(priv->parent_q, new_list); } G_GNUC_PRINTF(3, 4) static void text_list_item(pcmk__output_t *out, const char *id, const char *format, ...) { va_list ap; CRM_ASSERT(out != NULL); va_start(ap, format); if (fancy) { if (id != NULL) { /* Not really a good way to do this all in one call, so make it two. * The first handles the indentation and list styling. The second * just prints right after that one. */ pcmk__indented_printf(out, "%s: ", id); vfprintf(out->dest, format, ap); } else { pcmk__indented_vprintf(out, format, ap); } } else { pcmk__indented_vprintf(out, format, ap); } fputc('\n', out->dest); fflush(out->dest); va_end(ap); out->increment_list(out); } static void text_increment_list(pcmk__output_t *out) { private_data_t *priv = NULL; gpointer tail; CRM_ASSERT(out != NULL && out->priv != NULL); priv = out->priv; tail = g_queue_peek_tail(priv->parent_q); CRM_ASSERT(tail != NULL); ((text_list_data_t *) tail)->len++; } static void text_end_list(pcmk__output_t *out) { private_data_t *priv = NULL; text_list_data_t *node = NULL; CRM_ASSERT(out != NULL && out->priv != NULL); priv = out->priv; node = g_queue_pop_tail(priv->parent_q); if (node->singular_noun != NULL && node->plural_noun != NULL) { if (node->len == 1) { pcmk__indented_printf(out, "%d %s found\n", node->len, node->singular_noun); } else { pcmk__indented_printf(out, "%d %s found\n", node->len, node->plural_noun); } } free(node); } static bool text_is_quiet(pcmk__output_t *out) { CRM_ASSERT(out != NULL); return out->quiet; } static void text_spacer(pcmk__output_t *out) { CRM_ASSERT(out != NULL); fprintf(out->dest, "\n"); } static void text_progress(pcmk__output_t *out, bool end) { CRM_ASSERT(out != NULL); if (out->dest == stdout) { fprintf(out->dest, "."); if (end) { fprintf(out->dest, "\n"); } } } pcmk__output_t * pcmk__mk_text_output(char **argv) { pcmk__output_t *retval = calloc(1, sizeof(pcmk__output_t)); if (retval == NULL) { return NULL; } retval->fmt_name = "text"; retval->request = argv == NULL ? NULL : g_strjoinv(" ", argv); retval->init = text_init; retval->free_priv = text_free_priv; retval->finish = text_finish; retval->reset = text_reset; retval->register_message = pcmk__register_message; retval->message = pcmk__call_message; retval->subprocess_output = text_subprocess_output; retval->version = text_version; retval->info = text_info; retval->err = text_err; retval->output_xml = text_output_xml; retval->begin_list = text_begin_list; retval->list_item = text_list_item; retval->increment_list = text_increment_list; retval->end_list = text_end_list; retval->is_quiet = text_is_quiet; retval->spacer = text_spacer; retval->progress = text_progress; retval->prompt = pcmk__text_prompt; return retval; } G_GNUC_PRINTF(2, 0) void pcmk__formatted_vprintf(pcmk__output_t *out, const char *format, va_list args) { int len = 0; CRM_ASSERT(out != NULL); len = vfprintf(out->dest, format, args); CRM_ASSERT(len >= 0); } G_GNUC_PRINTF(2, 3) void pcmk__formatted_printf(pcmk__output_t *out, const char *format, ...) { va_list ap; CRM_ASSERT(out != NULL); va_start(ap, format); pcmk__formatted_vprintf(out, format, ap); va_end(ap); } G_GNUC_PRINTF(2, 0) void pcmk__indented_vprintf(pcmk__output_t *out, const char *format, va_list args) { CRM_ASSERT(out != NULL); if (!pcmk__str_eq(out->fmt_name, "text", pcmk__str_none)) { return; } if (fancy) { int level = 0; private_data_t *priv = out->priv; CRM_ASSERT(priv != NULL); level = g_queue_get_length(priv->parent_q); for (int i = 0; i < level; i++) { fprintf(out->dest, " "); } if (level > 0) { fprintf(out->dest, "* "); } } pcmk__formatted_vprintf(out, format, args); } G_GNUC_PRINTF(2, 3) void pcmk__indented_printf(pcmk__output_t *out, const char *format, ...) { va_list ap; CRM_ASSERT(out != NULL); va_start(ap, format); pcmk__indented_vprintf(out, format, ap); va_end(ap); } void pcmk__text_prompt(const char *prompt, bool echo, char **dest) { int rc = 0; struct termios settings; tcflag_t orig_c_lflag = 0; CRM_ASSERT(prompt != NULL); CRM_ASSERT(dest != NULL); if (!echo) { rc = tcgetattr(0, &settings); if (rc == 0) { orig_c_lflag = settings.c_lflag; settings.c_lflag &= ~ECHO; rc = tcsetattr(0, TCSANOW, &settings); } } if (rc == 0) { fprintf(stderr, "%s: ", prompt); if (*dest != NULL) { free(*dest); *dest = NULL; } #if SSCANF_HAS_M rc = scanf("%ms", dest); #else *dest = calloc(1, 1024); rc = scanf("%1023s", *dest); #endif fprintf(stderr, "\n"); } if (rc < 1) { free(*dest); *dest = NULL; } if (orig_c_lflag != 0) { settings.c_lflag = orig_c_lflag; - rc = tcsetattr(0, TCSANOW, &settings); + /* rc = */ tcsetattr(0, TCSANOW, &settings); } } diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 5a1d2bd2f4..b6ea12022a 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2708 +1,2708 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(stonith); struct stonith_action_s { /*! user defined data */ char *agent; char *action; char *victim; GHashTable *args; int timeout; int async; void *userdata; void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); void (*fork_cb) (GPid pid, gpointer user_data); svc_action_t *svc_action; /*! internal timing information */ time_t initial_start_time; int tries; int remaining_timeout; int max_retries; /* device output data */ GPid pid; int rc; char *output; char *error; }; typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; int notify_refcnt; bool notify_deletes; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); bool delete; } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); bool stonith_dispatch(stonith_t * st); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static int stonith_send_command(stonith_t *stonith, const char *op, xmlNode *data, xmlNode **output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int internal_stonith_action_execute(stonith_action_t * action); static void log_action(stonith_action_t *action, pid_t pid); /*! * \brief Get agent namespace by name * * \param[in] namespace_s Name of namespace as string * * \return Namespace as enum value */ enum stonith_namespace stonith_text2namespace(const char *namespace_s) { - if (pcmk__str_eq(namespace_s, "any", pcmk__str_null_matches)) { + if ((namespace_s == NULL) || !strcmp(namespace_s, "any")) { return st_namespace_any; } else if (!strcmp(namespace_s, "redhat") || !strcmp(namespace_s, "stonith-ng")) { return st_namespace_rhcs; } else if (!strcmp(namespace_s, "internal")) { return st_namespace_internal; } else if (!strcmp(namespace_s, "heartbeat")) { return st_namespace_lha; } return st_namespace_invalid; } /*! * \brief Get agent namespace name * * \param[in] namespace Namespace as enum value * * \return Namespace name as string */ const char * stonith_namespace2text(enum stonith_namespace st_namespace) { switch (st_namespace) { case st_namespace_any: return "any"; case st_namespace_rhcs: return "stonith-ng"; case st_namespace_internal: return "internal"; case st_namespace_lha: return "heartbeat"; default: break; } return "unsupported"; } /*! * \brief Determine namespace of a fence agent * * \param[in] agent Fence agent type * \param[in] namespace_s Name of agent namespace as string, if known * * \return Namespace of specified agent, as enum value */ enum stonith_namespace stonith_get_namespace(const char *agent, const char *namespace_s) { if (pcmk__str_eq(namespace_s, "internal", pcmk__str_casei)) { return st_namespace_internal; } if (stonith__agent_is_rhcs(agent)) { return st_namespace_rhcs; } #if HAVE_STONITH_STONITH_H if (stonith__agent_is_lha(agent)) { return st_namespace_lha; } #endif crm_err("Unknown fence agent: %s", agent); return st_namespace_invalid; } static void log_action(stonith_action_t *action, pid_t pid) { if (action->output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid); crm_log_output(LOG_TRACE, prefix, action->output); free(prefix); } if (action->error) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); crm_log_output(LOG_WARNING, prefix, action->error); free(prefix); } } /* when cycling through the list we don't want to delete items so just mark them and when we know nobody is using the list loop over it to remove the marked items */ static void foreach_notify_entry (stonith_private_t *private, GFunc func, gpointer user_data) { private->notify_refcnt++; g_list_foreach(private->notify_list, func, user_data); private->notify_refcnt--; if ((private->notify_refcnt == 0) && private->notify_deletes) { GList *list_item = private->notify_list; private->notify_deletes = FALSE; while (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; GList *next = g_list_next(list_item); if (list_client->delete) { free(list_client); private->notify_list = g_list_delete_link(private->notify_list, list_item); } list_item = next; } } } static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = create_xml_node(NULL, "notify"); native = stonith->st_private; native->ipc = NULL; native->source = NULL; free(native->token); native->token = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT); foreach_notify_entry(native, stonith_send_notification, &blob); free_xml(blob.xml); } xmlNode * create_device_registration_xml(const char *id, enum stonith_namespace namespace, const char *agent, stonith_key_value_t *params, const char *rsc_provides) { xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H if (namespace == st_namespace_any) { namespace = stonith_get_namespace(agent, NULL); } if (namespace == st_namespace_lha) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, XML_ATTR_ID, id); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, "agent", agent); if ((namespace != st_namespace_any) && (namespace != st_namespace_invalid)) { crm_xml_add(data, "namespace", stonith_namespace2text(namespace)); } if (rsc_provides) { crm_xml_add(data, "rsc_provides", rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t * st, int call_options, const char *id, const char *namespace, const char *agent, stonith_key_value_t * params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, stonith_text2namespace(namespace), agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, XML_ATTR_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level) { int rc = 0; xmlNode *data = NULL; CRM_CHECK(node || pattern || (attr && value), return -EINVAL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { return stonith_api_remove_level_full(st, options, node, NULL, NULL, NULL, level); } /*! * \internal * \brief Create XML for fence topology level registration request * * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to register * \param[in] device_list List of devices in level * * \return Newly allocated XML tree on success, NULL otherwise * * \note The caller should set only one of node, pattern or attr/value. */ xmlNode * create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { size_t len = 0; char *list = NULL; xmlNode *data; CRM_CHECK(node || pattern || (attr && value), return NULL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); CRM_CHECK(data, return NULL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } // cppcheck seems not to understand the abort logic behind pcmk__realloc // cppcheck-suppress memleak for (; device_list; device_list = device_list->next) { pcmk__add_separated_word(&list, &len, device_list->value, ","); } crm_xml_add(data, XML_ATTR_STONITH_DEVICES, list); free(list); return data; } static int stonith_api_register_level_full(stonith_t * st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, pattern, attr, value, level, device_list); CRM_CHECK(data != NULL, return -EINVAL); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, stonith_key_value_t * device_list) { return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL, level, device_list); } static void append_config_arg(gpointer key, gpointer value, gpointer user_data) { /* The fencer will filter "action" out when it registers the device, * but ignore it here in case any external API users don't. * * Also filter out parameters handled directly by Pacemaker. */ if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei) && !pcmk_stonith_param(key) && (strstr(key, CRM_META) == NULL) && !pcmk__str_eq(key, "crm_feature_set", pcmk__str_casei)) { crm_trace("Passing %s=%s with fence action", (const char *) key, (const char *) (value? value : "")); g_hash_table_insert((GHashTable *) user_data, strdup(key), strdup(value? value : "")); } } static GHashTable * make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args, GHashTable * port_map, const char *host_arg) { GHashTable *arg_list = NULL; const char *value = NULL; CRM_CHECK(action != NULL, return NULL); arg_list = pcmk__strkey_table(free, free); // Add action to arguments (using an alias if requested) if (device_args) { char buffer[512]; snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action); value = g_hash_table_lookup(device_args, buffer); if (value) { crm_debug("Substituting '%s' for fence action %s targeting %s", value, action, victim); action = value; } } g_hash_table_insert(arg_list, strdup(STONITH_ATTR_ACTION_OP), strdup(action)); /* If this is a fencing operation against another node, add more standard * arguments. */ if (victim && device_args) { const char *param = NULL; /* Always pass the target's name, per * https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md */ g_hash_table_insert(arg_list, strdup("nodename"), strdup(victim)); // If the target's node ID was specified, pass it, too if (victim_nodeid) { char *nodeid = crm_strdup_printf("%" PRIu32, victim_nodeid); // cts-fencing looks for this log message crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s", nodeid, action, victim); g_hash_table_insert(arg_list, strdup("nodeid"), nodeid); } // Check whether target must be specified in some other way param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT); if (!pcmk__str_eq(agent, "fence_legacy", pcmk__str_none) && !pcmk__str_eq(param, "none", pcmk__str_casei)) { if (param == NULL) { /* Use the caller's default for pcmk_host_argument, or "port" if * none was given */ param = (host_arg == NULL)? "port" : host_arg; } value = g_hash_table_lookup(device_args, param); if (pcmk__str_eq(value, "dynamic", pcmk__str_casei|pcmk__str_null_matches)) { /* If the host argument was "dynamic" or not explicitly specified, * add it with the target */ const char *alias = NULL; if (port_map) { alias = g_hash_table_lookup(port_map, victim); } if (alias == NULL) { alias = victim; } crm_debug("Passing %s='%s' with fence action %s targeting %s", param, alias, action, victim); g_hash_table_insert(arg_list, strdup(param), strdup(alias)); } } } if (device_args) { g_hash_table_foreach(device_args, append_config_arg, arg_list); } return arg_list; } /*! * \internal * \brief Free all memory used by a stonith action * * \param[in,out] action Action to free */ void stonith__destroy_action(stonith_action_t *action) { if (action) { free(action->agent); if (action->args) { g_hash_table_destroy(action->args); } free(action->action); free(action->victim); if (action->svc_action) { services_action_free(action->svc_action); } free(action->output); free(action->error); free(action); } } /*! * \internal * \brief Get the result of an executed stonith action * * \param[in,out] action Executed action * \param[out] rc Where to store result code (or NULL) * \param[out] output Where to store standard output (or NULL) * \param[out] error_output Where to store standard error output (or NULL) * * \note If output or error_output is not NULL, the caller is responsible for * freeing the memory. */ void stonith__action_result(stonith_action_t *action, int *rc, char **output, char **error_output) { if (rc) { *rc = pcmk_ok; } if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } if (action != NULL) { if (rc) { *rc = action->rc; } if (output && action->output) { *output = action->output; action->output = NULL; // hand off memory management to caller } if (error_output && action->error) { *error_output = action->error; action->error = NULL; // hand off memory management to caller } } } #define FAILURE_MAX_RETRIES 2 stonith_action_t * stonith_action_create(const char *agent, const char *_action, const char *victim, uint32_t victim_nodeid, int timeout, GHashTable * device_args, GHashTable * port_map, const char *host_arg) { stonith_action_t *action; action = calloc(1, sizeof(stonith_action_t)); action->args = make_args(agent, _action, victim, victim_nodeid, device_args, port_map, host_arg); crm_debug("Preparing '%s' action for %s using agent %s", _action, (victim? victim : "no target"), agent); action->agent = strdup(agent); action->action = strdup(_action); if (victim) { action->victim = strdup(victim); } action->timeout = action->remaining_timeout = timeout; action->max_retries = FAILURE_MAX_RETRIES; if (device_args) { char buffer[512]; const char *value = NULL; snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action); value = g_hash_table_lookup(device_args, buffer); if (value) { action->max_retries = atoi(value); } } return action; } static gboolean update_remaining_timeout(stonith_action_t * action) { int diff = time(NULL) - action->initial_start_time; if (action->tries >= action->max_retries) { crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", action->agent, action->action, action->max_retries); action->remaining_timeout = 0; } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) { /* only set remaining timeout period if there is 30% * or greater of the original timeout period left */ action->remaining_timeout = action->timeout - diff; } else { action->remaining_timeout = 0; } return action->remaining_timeout ? TRUE : FALSE; } static int svc_action_to_errno(svc_action_t *svc_action) { int rv = pcmk_ok; if (svc_action->rc > 0) { /* Try to provide a useful error code based on the fence agent's * error output. */ if (svc_action->rc == PCMK_OCF_TIMEOUT) { rv = -ETIME; } else if (svc_action->stderr_data == NULL) { rv = -ENODATA; } else if (strstr(svc_action->stderr_data, "imed out")) { /* Some agents have their own internal timeouts */ rv = -ETIME; } else if (strstr(svc_action->stderr_data, "Unrecognised action")) { rv = -EOPNOTSUPP; } else { rv = -pcmk_err_generic; } } return rv; } static void stonith_action_async_done(svc_action_t *svc_action) { stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; action->rc = svc_action_to_errno(svc_action); action->output = svc_action->stdout_data; svc_action->stdout_data = NULL; action->error = svc_action->stderr_data; svc_action->stderr_data = NULL; svc_action->params = NULL; crm_debug("Child process %d performing action '%s' exited with rc %d", action->pid, action->action, svc_action->rc); log_action(action, action->pid); if (action->rc != pcmk_ok && update_remaining_timeout(action)) { int rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { return; } } if (action->done_cb) { action->done_cb(action->pid, action->rc, action->output, action->userdata); } action->svc_action = NULL; // don't remove our caller stonith__destroy_action(action); } static void stonith_action_async_forked(svc_action_t *svc_action) { stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; action->pid = svc_action->pid; action->svc_action = svc_action; if (action->fork_cb) { (action->fork_cb) (svc_action->pid, action->userdata); } crm_trace("Child process %d performing action '%s' successfully forked", action->pid, action->action); } static int internal_stonith_action_execute(stonith_action_t * action) { int rc = -EPROTO; int is_retry = 0; svc_action_t *svc_action = NULL; static int stonith_sequence = 0; char *buffer = NULL; if (!action->tries) { action->initial_start_time = time(NULL); } action->tries++; if (action->tries > 1) { crm_info("Attempt %d to execute %s (%s). remaining timeout is %d", action->tries, action->agent, action->action, action->remaining_timeout); is_retry = 1; } if (action->args == NULL || action->agent == NULL) goto fail; buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s", basename(action->agent)); svc_action = services_action_create_generic(buffer, NULL); free(buffer); svc_action->timeout = 1000 * action->remaining_timeout; svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH); svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent), action->action, action->tries); svc_action->agent = strdup(action->agent); svc_action->sequence = stonith_sequence++; svc_action->params = action->args; svc_action->cb_data = (void *) action; svc_action->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", svc_action->id, svc_action->flags, SVC_ACTION_NON_BLOCKED, "SVC_ACTION_NON_BLOCKED"); /* keep retries from executing out of control and free previous results */ if (is_retry) { free(action->output); action->output = NULL; free(action->error); action->error = NULL; sleep(1); } if (action->async) { /* async */ if(services_action_async_fork_notify(svc_action, &stonith_action_async_done, &stonith_action_async_forked) == FALSE) { services_action_free(svc_action); svc_action = NULL; } else { rc = 0; } } else { /* sync */ if (services_action_sync(svc_action)) { rc = 0; action->rc = svc_action_to_errno(svc_action); action->output = svc_action->stdout_data; svc_action->stdout_data = NULL; action->error = svc_action->stderr_data; svc_action->stderr_data = NULL; } else { action->rc = -ECONNABORTED; rc = action->rc; } svc_action->params = NULL; services_action_free(svc_action); } fail: return rc; } /*! * \internal * \brief Kick off execution of an async stonith action * * \param[in,out] action Action to be executed * \param[in,out] userdata Datapointer to be passed to callbacks * \param[in] done Callback to notify action has failed/succeeded * \param[in] fork_callback Callback to notify successful fork of child * * \return pcmk_ok if ownership of action has been taken, -errno otherwise */ int stonith_action_execute_async(stonith_action_t * action, void *userdata, void (*done) (GPid pid, int rc, const char *output, gpointer user_data), void (*fork_cb) (GPid pid, gpointer user_data)) { if (!action) { return -EINVAL; } action->userdata = userdata; action->done_cb = done; action->fork_cb = fork_cb; action->async = 1; return internal_stonith_action_execute(action); } /*! * \internal * \brief Execute a stonith action * * \param[in,out] action Action to execute * * \return pcmk_ok on success, -errno otherwise */ int stonith__execute(stonith_action_t *action) { int rc = pcmk_ok; CRM_CHECK(action != NULL, return -EINVAL); // Keep trying until success, max retries, or timeout do { rc = internal_stonith_action_execute(action); } while ((rc != pcmk_ok) && update_remaining_timeout(action)); return rc; } static int stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace, stonith_key_value_t ** devices, int timeout) { int count = 0; enum stonith_namespace ns = stonith_text2namespace(namespace); if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } #if HAVE_STONITH_STONITH_H // Include Linux-HA agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { count += stonith__list_lha_agents(devices); } #endif // Include Red Hat agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { count += stonith__list_rhcs_agents(devices); } return count; } static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) { /* By executing meta-data directly, we can get it from stonith_admin when * the cluster is not running, which is important for higher-level tools. */ enum stonith_namespace ns = stonith_get_namespace(agent, namespace); crm_trace("Looking up metadata for %s agent %s", stonith_namespace2text(ns), agent); switch (ns) { case st_namespace_rhcs: return stonith__rhcs_metadata(agent, timeout, output); #if HAVE_STONITH_STONITH_H case st_namespace_lha: return stonith__lha_metadata(agent, timeout, output); #endif default: crm_err("Can't get fence agent '%s' meta-data: No such agent", agent); break; } return -ENODEV; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObjectPtr xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, F_STONITH_TARGET, target); crm_xml_add(data, F_STONITH_ACTION, "off"); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = xpath_search(output, "//@agent"); if (xpathObj) { max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { xmlChar *match_path = xmlGetNodePath(match); crm_info("%s[%d] = %s", "//@agent", lpc, match_path); free(match_path); *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); } } freeXpathObject(xpathObj); } free_xml(output); free_xml(data); return max; } static int stonith_api_call(stonith_t * stonith, int call_options, const char *id, const char *action, const char *victim, int timeout, xmlNode ** output) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, F_STONITH_DEVICE, id); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add(data, F_STONITH_TARGET, victim); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout); free_xml(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, "st_output"); if (list_str) { *list_info = strdup(list_str); } } if (output) { free_xml(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL); } static int stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance, int delay) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, __func__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance); crm_xml_add_int(data, F_STONITH_DELAY, delay); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); free_xml(data); return rc; } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { return stonith_api_fence_with_delay(stonith, call_options, node, action, timeout, tolerance, 0); } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { stonith__set_call_options(call_options, target, st_opt_manual_ack); return stonith_api_fence(stonith, call_options, target, "off", 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = create_xml_node(NULL, __func__); crm_xml_add(data, F_STONITH_TARGET, node); } stonith__set_call_options(call_options, node, st_opt_sync_call); rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options, timeout); free_xml(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_NEVER); for (op = pcmk__xml_first_child(reply); op != NULL; op = pcmk__xml_next(op)) { stonith_history_t *kvp; long long completed; kvp = calloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, F_STONITH_TARGET); kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); crm_element_value_ll(op, F_STONITH_DATE, &completed); kvp->completed = (time_t) completed; crm_element_value_int(op, F_STONITH_STATE, &kvp->state); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } free_xml(output); return rc; } void stonith_history_free(stonith_history_t *history) { stonith_history_t *hp, *hp_old; for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) { free(hp->target); free(hp->action); free(hp->origin); free(hp->delegate); free(hp->client); } } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; if (a_client->delete || b_client->delete) { /* make entries marked for deletion not findable */ return -1; } CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = create_xml_node(NULL, "stonith_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command"); crm_xml_add(op_msg, F_TYPE, T_STONITH_NG); crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_STONITH_OPERATION, op); crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options); if (data != NULL) { add_message_xml(op_msg, F_STONITH_CALLDATA, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->st_private; crm_debug("Disconnecting from the fencer"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->st_private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { pcmk__intkey_table_remove(private->stonith_op_callback_table, call_id); } return pcmk_ok; } static void invoke_callback(stonith_t * st, int call_id, int rc, void *userdata, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = rc; data.userdata = userdata; callback(st, &data); } static void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc) { stonith_private_t *private = NULL; stonith_callback_client_t *blob = NULL; stonith_callback_client_t local_blob; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->st_private != NULL, return); private = stonith->st_private; local_blob.id = NULL; local_blob.callback = NULL; local_blob.user_data = NULL; local_blob.only_success = FALSE; if (msg != NULL) { crm_element_value_int(msg, F_STONITH_RC, &rc); crm_element_value_int(msg, F_STONITH_CALLID, &call_id); } CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result")); blob = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (blob != NULL) { local_blob = *blob; blob = NULL; stonith_api_del_callback(stonith, call_id, FALSE); } else { crm_trace("No callback found for call %d", call_id); local_blob.callback = NULL; } if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback); } else if (private->op_callback == NULL && rc != pcmk_ok) { crm_warn("Fencing command failed: %s", pcmk_strerror(rc)); crm_log_xml_debug(msg, "Failed fence update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_callback(stonith, call_id, rc, NULL, private->op_callback); } crm_trace("OP callback activated."); } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = calloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->st_private; callback = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->st_private; blob.stonith = st; blob.xml = string2xml(buffer); if (blob.xml == NULL) { crm_warn("Received malformed message from fencer: %s", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, F_TYPE); crm_trace("Activating %s callbacks...", type); if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) { stonith_perform_callback(st, blob.xml, 0, 0); } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) { foreach_notify_entry(private, stonith_send_notification, &blob); } else if (pcmk__str_eq(type, T_STONITH_TIMEOUT_VALUE, pcmk__str_casei)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout); crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } free_xml(blob.xml); return 1; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = NULL; const char *display_name = name? name : "client"; struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; CRM_CHECK(stonith != NULL, return -EINVAL); native = stonith->st_private; CRM_ASSERT(native != NULL); crm_debug("Attempting fencer connection by %s with%s mainloop", display_name, (stonith_fd? "out" : "")); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *stonith_fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_ipc_close(native->ipc); crm_ipc_destroy(native->ipc); native->ipc = NULL; } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { rc = -ENOTCONN; } else { xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "stonith_command"); crm_xml_add(hello, F_TYPE, T_STONITH_NG); crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_STONITH_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_debug("Couldn't register with the fencer: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); rc = -ECOMM; } else if (reply == NULL) { crm_debug("Couldn't register with the fencer: no reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION); native->token = crm_element_value_copy(reply, F_STONITH_CLIENTID); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_debug("Couldn't register with the fencer: invalid reply type '%s'", (msg_type? msg_type : "(missing)")); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else if (native->token == NULL) { crm_debug("Couldn't register with the fencer: no token in reply"); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else { #if HAVE_MSGFROMIPC_TIMEOUT stonith->call_timeout = PCMK__IPC_TIMEOUT; #endif crm_debug("Connection to fencer by %s succeeded (registration token: %s)", display_name, native->token); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); } if (rc != pcmk_ok) { crm_debug("Connection attempt to fencer by %s failed: %s " CRM_XS " rc=%d", display_name, pcmk_strerror(rc), rc); stonith->cmds->disconnect(stonith); } return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = create_xml_node(NULL, __func__); stonith_private_t *native = stonith->st_private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY); if (enabled) { crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } free_xml(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->st_private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; crm_debug("Removing callback for %s events", event); private = stonith->st_private; new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; if (private->notify_refcnt) { list_client->delete = TRUE; private->notify_deletes = TRUE; } else { private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); } crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->st_private != NULL, return -EINVAL); private = stonith->st_private; if (call_id == 0) { private->op_callback = callback; } else if (call_id < 0) { if (!(options & st_opt_report_only_success)) { crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); invoke_callback(stonith, call_id, call_id, user_data, callback); } else { crm_warn("Fencer call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = calloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } pcmk__intkey_table_insert(private->stonith_op_callback_table, call_id, blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->st_private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } /* */ static stonith_event_t * xml_to_event(xmlNode * msg) { stonith_event_t *event = calloc(1, sizeof(stonith_event_t)); const char *ntype = crm_element_value(msg, F_SUBTYPE); char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG); crm_log_xml_trace(msg, "stonith_notify"); crm_element_value_int(msg, F_STONITH_RC, &(event->result)); if (pcmk__str_eq(ntype, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION); if (data) { event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN); event->action = crm_element_value_copy(data, F_STONITH_ACTION); event->target = crm_element_value_copy(data, F_STONITH_TARGET); event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE); event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID); event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME); event->device = crm_element_value_copy(data, F_STONITH_DEVICE); } else { crm_err("No data for %s event", ntype); crm_log_xml_notice(msg, "BadEvent"); } } free(data_addr); return event; } static void event_free(stonith_event_t * event) { free(event->id); free(event->type); free(event->message); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, F_SUBTYPE); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->delete) { crm_trace("Skipping callback - marked for deletion"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (!pcmk__str_eq(entry->event, event, pcmk__str_casei)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } /*! * \internal * \brief Create and send an API request * * \param[in] stonith Stonith connection * \param[in] op API operation to request * \param[in] data Data to attach to request * \param[out] output_data If not NULL, will be set to reply if synchronous * \param[in] call_options Bitmask of stonith_call_options to use * \param[in] timeout Error if not completed within this many seconds * * \return pcmk_ok (for synchronous requests) or positive call ID * (for asynchronous requests) on success, -errno otherwise */ static int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = NULL; CRM_ASSERT(stonith && stonith->st_private && op); native = stonith->st_private; if (output_data != NULL) { *output_data = NULL; } if ((stonith->state == stonith_disconnected) || (native->token == NULL)) { return -ENOTCONN; } /* Increment the call ID, which must be positive to avoid conflicting with * error codes. This shouldn't be a problem unless the client mucked with * it or the counter wrapped around. */ stonith->call_id++; if (stonith->call_id < 1) { stonith->call_id = 1; } op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout); crm_trace("Sending %s message to fencer with timeout %ds", op, timeout); if (data) { const char *delay_s = crm_element_value(data, F_STONITH_DELAY); if (delay_s) { crm_xml_add(op_msg, F_STONITH_DELAY, delay_s); } } { enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; if (call_options & st_opt_sync_call) { pcmk__set_ipc_flags(ipc_flags, "stonith command", crm_ipc_client_response); } rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); } free_xml(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); free_xml(op_reply); return stonith->call_id; } rc = pcmk_ok; crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id); if (reply_id == stonith->call_id) { crm_trace("Synchronous reply %d received", reply_id); if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) { rc = -ENOMSG; } if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); free_xml(op_reply); rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); free_xml(op_reply); rc = -ENOMSG; } done: if (crm_ipc_connected(native->ipc) == FALSE) { crm_err("Fencer disconnected"); free(native->token); native->token = NULL; stonith->state = stonith_disconnected; } free_xml(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->st_private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (crm_ipc_connected(private->ipc) == FALSE) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } static int stonith_api_free(stonith_t * stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Disconnecting %p first", stonith); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->st_private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->st_private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } void stonith_api_delete(stonith_t * stonith) { crm_trace("Destroying %p", stonith); if(stonith) { stonith->cmds->free(stonith); } } static int stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, stonith_key_value_t *params, int timeout, char **output, char **error_output) { /* Validation should be done directly via the agent, so we can get it from * stonith_admin when the cluster is not running, which is important for * higher-level tools. */ int rc = pcmk_ok; /* Use a dummy node name in case the agent requires a target. We assume the * actual target doesn't matter for validation purposes (if in practice, * that is incorrect, we will need to allow the caller to pass the target). */ const char *target = "node1"; const char *host_arg = NULL; GHashTable *params_table = pcmk__strkey_table(free, free); // Convert parameter list to a hash table for (; params; params = params->next) { if (pcmk__str_eq(params->key, PCMK_STONITH_HOST_ARGUMENT, pcmk__str_casei)) { host_arg = params->value; } if (!pcmk_stonith_param(params->key)) { g_hash_table_insert(params_table, strdup(params->key), strdup(params->value)); } } #if SUPPORT_CIBSECRETS rc = pcmk__substitute_secrets(rsc_id, params_table); if (rc != pcmk_rc_ok) { crm_warn("Could not replace secret parameters for validation of %s: %s", agent, pcmk_rc_str(rc)); // rc is standard return value, don't return it in this function } #endif if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } switch (stonith_get_namespace(agent, namespace_s)) { case st_namespace_rhcs: rc = stonith__rhcs_validate(st, call_options, target, agent, params_table, host_arg, timeout, output, error_output); break; #if HAVE_STONITH_STONITH_H case st_namespace_lha: rc = stonith__lha_validate(st, call_options, target, agent, params_table, timeout, output, error_output); break; #endif default: rc = -EINVAL; errno = EINVAL; crm_perror(LOG_ERR, "Agent %s not found or does not support validation", agent); break; } g_hash_table_destroy(params_table); return rc; } stonith_t * stonith_api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); if (new_stonith == NULL) { return NULL; } private = calloc(1, sizeof(stonith_private_t)); if (private == NULL) { free(new_stonith); return NULL; } new_stonith->st_private = private; private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); private->notify_list = NULL; private->notify_refcnt = 0; private->notify_deletes = FALSE; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); if (new_stonith->cmds == NULL) { free(new_stonith->st_private); free(new_stonith); return NULL; } /* *INDENT-OFF* */ new_stonith->cmds->free = stonith_api_free; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->remove_level_full = stonith_api_remove_level_full; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->register_level_full = stonith_api_register_level_full; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; new_stonith->cmds->validate = stonith_api_validate; /* *INDENT-ON* */ return new_stonith; } /*! * \brief Make a blocking connection attempt to the fencer * * \param[in,out] st Fencer API object * \param[in] name Client name to use with fencer * \param[in] max_attempts Return error if this many attempts fail * * \return pcmk_ok on success, result of last attempt otherwise */ int stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts) { int rc = -EINVAL; // if max_attempts is not positive for (int attempt = 1; attempt <= max_attempts; attempt++) { rc = st->cmds->connect(st, name, NULL); if (rc == pcmk_ok) { return pcmk_ok; } else if (attempt < max_attempts) { crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s " CRM_XS " rc=%d", attempt, max_attempts, pcmk_strerror(rc), rc); sleep(2); } } crm_notice("Could not connect to fencer: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); return rc; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = calloc(1, sizeof(stonith_key_value_t)); if (key) { p->key = strdup(key); } if (value) { p->value = strdup(value); } end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __func__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { int rc = pcmk_ok; stonith_t *st = stonith_api_new(); const char *action = off? "off" : "reboot"; api_log_open(); if (st == NULL) { api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s", action, nodeid, uname); return -EPROTO; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); int opts = 0; stonith__set_call_options(opts, name, st_opt_sync_call|st_opt_allow_suicide); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->fence(st, opts, name, action, timeout, 0); free(name); if (rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action); } } stonith_api_delete(st); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = pcmk_ok; time_t when = 0; stonith_t *st = stonith_api_new(); stonith_history_t *history = NULL, *hp = NULL; if (st == NULL) { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: " "API initialization failed", nodeid, uname); return when; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } else { int entries = 0; int progress = 0; int completed = 0; int opts = 0; char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); stonith__set_call_options(opts, name, st_opt_sync_call); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->history(st, opts, name, &history, 120); free(name); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } stonith_history_free(history); if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } stonith_api_delete(st); if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } return when; } bool stonith_agent_exists(const char *agent, int timeout) { stonith_t *st = NULL; stonith_key_value_t *devices = NULL; stonith_key_value_t *dIter = NULL; bool rc = FALSE; if (agent == NULL) { return rc; } st = stonith_api_new(); if (st == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return FALSE; } st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout == 0 ? 120 : timeout); for (dIter = devices; dIter != NULL; dIter = dIter->next) { if (pcmk__str_eq(dIter->value, agent, pcmk__str_none)) { rc = TRUE; break; } } stonith_key_value_freeall(devices, 1, 1); stonith_api_delete(st); return rc; } const char * stonith_action_str(const char *action) { if (action == NULL) { return "fencing"; } else if (!strcmp(action, "on")) { return "unfencing"; } else if (!strcmp(action, "off")) { return "turning off"; } else { return action; } } /*! * \internal * \brief Parse a target name from one line of a target list string * * \param[in] line One line of a target list string * \parma[in] len String length of line * \param[in,out] output List to add newly allocated target name to */ static void parse_list_line(const char *line, int len, GList **output) { size_t i = 0; size_t entry_start = 0; /* Skip complaints about additional parameters device doesn't understand * * @TODO Document or eliminate the implied restriction of target names */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping list output line: %s", line); return; } // Process line content, character by character for (i = 0; i <= len; i++) { if (isspace(line[i]) || (line[i] == ',') || (line[i] == ';') || (line[i] == '\0')) { // We've found a separator (i.e. the end of an entry) int rc = 0; char *entry = NULL; if (i == entry_start) { // Skip leading and sequential separators entry_start = i + 1; continue; } entry = calloc(i - entry_start + 1, sizeof(char)); CRM_ASSERT(entry != NULL); /* Read entry, stopping at first separator * * @TODO Document or eliminate these character restrictions */ rc = sscanf(line + entry_start, "%[a-zA-Z0-9_-.]", entry); if (rc != 1) { crm_warn("Could not parse list output entry: %s " CRM_XS " entry_start=%d position=%d", line + entry_start, entry_start, i); free(entry); } else if (pcmk__strcase_any_of(entry, "on", "off", NULL)) { /* Some agents print the target status in the list output, * though none are known now (the separate list-status command * is used for this, but it can also print "UNKNOWN"). To handle * this possibility, skip such entries. * * @TODO Document or eliminate the implied restriction of target * names. */ free(entry); } else { // We have a valid entry *output = g_list_append(*output, entry); } entry_start = i + 1; } } } /*! * \internal * \brief Parse a list of targets from a string * * \param[in] list_output Target list as a string * * \return List of target names * \note The target list string format is flexible, to allow for user-specified * lists such pcmk_host_list and the output of an agent's list action * (whether direct or via the API, which escapes newlines). There may be * multiple lines, separated by either a newline or an escaped newline * (backslash n). Each line may have one or more target names, separated * by any combination of whitespace, commas, and semi-colons. Lines * containing "invalid" or "variable" will be ignored entirely. Target * names "on" or "off" (case-insensitive) will be ignored. Target names * may contain only alphanumeric characters, underbars (_), dashes (-), * and dots (.) (if any other character occurs in the name, it and all * subsequent characters in the name will be ignored). * \note The caller is responsible for freeing the result with * g_list_free_full(result, free). */ GList * stonith__parse_targets(const char *target_spec) { GList *targets = NULL; if (target_spec != NULL) { size_t out_len = strlen(target_spec); size_t line_start = 0; // Starting index of line being processed for (size_t i = 0; i <= out_len; ++i) { if ((target_spec[i] == '\n') || (target_spec[i] == '\0') || ((target_spec[i] == '\\') && (target_spec[i + 1] == 'n'))) { // We've reached the end of one line of output int len = i - line_start; if (len > 0) { char *line = strndup(target_spec + line_start, len); line[len] = '\0'; // Because it might be a newline parse_list_line(line, len, &targets); free(line); } if (target_spec[i] == '\\') { ++i; // backslash-n takes up two positions } line_start = i + 1; } } } return targets; } /*! * \internal * \brief Determine if a later stonith event succeeded. * * \note Before calling this function, use stonith__sort_history() to sort the * top_history argument. */ gboolean stonith__later_succeeded(stonith_history_t *event, stonith_history_t *top_history) { gboolean ret = FALSE; for (stonith_history_t *prev_hp = top_history; prev_hp; prev_hp = prev_hp->next) { if (prev_hp == event) { break; } if ((prev_hp->state == st_done) && pcmk__str_eq(event->target, prev_hp->target, pcmk__str_casei) && pcmk__str_eq(event->action, prev_hp->action, pcmk__str_casei) && pcmk__str_eq(event->delegate, prev_hp->delegate, pcmk__str_casei) && (event->completed < prev_hp->completed)) { ret = TRUE; break; } } return ret; } /*! * \internal * \brief Sort the stonith-history * sort by competed most current on the top * pending actions lacking a completed-stamp are gathered at the top * * \param[in] history List of stonith actions * */ stonith_history_t * stonith__sort_history(stonith_history_t *history) { stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp; for (hp = history; hp; ) { tmp = hp->next; if ((hp->state == st_done) || (hp->state == st_failed)) { /* sort into new */ if ((!new) || (hp->completed > new->completed)) { hp->next = new; new = hp; } else { np = new; do { if ((!np->next) || (hp->completed > np->next->completed)) { hp->next = np->next; np->next = hp; break; } np = np->next; } while (1); } } else { /* put into pending */ hp->next = pending; pending = hp; } hp = tmp; } /* pending actions don't have a completed-stamp so make them go front */ if (pending) { stonith_history_t *last_pending = pending; while (last_pending->next) { last_pending = last_pending->next; } last_pending->next = new; new = pending; } return new; } /*! * \brief Return string equivalent of an operation state value * * \param[in] state Fencing operation state value * * \return Human-friendly string equivalent of state */ const char * stonith_op_state_str(enum op_state state) { switch (state) { case st_query: return "querying"; case st_exec: return "executing"; case st_done: return "completed"; case st_duplicate: return "duplicate"; case st_failed: return "failed"; } return "unknown"; } stonith_history_t * stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data) { for (stonith_history_t *hp = history; hp; hp = hp->next) { if (matching_fn(hp, user_data)) { return hp; } } return NULL; } bool stonith__event_state_pending(stonith_history_t *history, void *user_data) { return history->state != st_failed && history->state != st_done; } bool stonith__event_state_eq(stonith_history_t *history, void *user_data) { return history->state == GPOINTER_TO_INT(user_data); } bool stonith__event_state_neq(stonith_history_t *history, void *user_data) { return history->state != GPOINTER_TO_INT(user_data); } void stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name, xmlNode *metadata) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; CRM_CHECK((device_flags != NULL) && (metadata != NULL), return); xpath = xpath_search(metadata, "//parameter"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *parameter = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if (match == NULL) { continue; } parameter = crm_element_value(match, "name"); if (pcmk__str_eq(parameter, "plug", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_plug); } else if (pcmk__str_eq(parameter, "port", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_port); } } freeXpathObject(xpath); } // Deprecated functions kept only for backward API compatibility #include const char * get_stonith_provider(const char *agent, const char *provider) { return stonith_namespace2text(stonith_get_namespace(agent, provider)); } // End deprecated API diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index 8d660163fe..5b08957050 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2103 +1,2108 @@ /* * Copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // uint32_t, uint64_t #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #include #include #include #include #include #define MAX_TLS_RECV_WAIT 10000 CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; int lrmd_tls_set_key(gnutls_datum_t * key); static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { uint64_t type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; pcmk__remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; /* while the async connection is occurring, this is the id * of the connection timeout timer. */ int async_timer; int sock; /* since tls requires a round trip across the network for a * request/reply, there are times where we just want to be able * to send a request from the client and not wait around (or even care * about) what the reply is. */ int expected_late_replies; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; char *peer_version; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } /*! * Create a new lrmd_event_data_t object * * \param[in] rsc_id ID of resource involved in event * \param[in] task Action name * \param[in] interval_ms Action interval * * \return Newly allocated and initialized lrmd_event_data_t * \note This functions asserts on memory errors, so the return value is * guaranteed to be non-NULL. The caller is responsible for freeing the * result with lrmd_free_event(). */ lrmd_event_data_t * lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms) { lrmd_event_data_t *event = calloc(1, sizeof(lrmd_event_data_t)); CRM_ASSERT(event != NULL); if (rsc_id != NULL) { event->rsc_id = strdup(rsc_id); CRM_ASSERT(event->rsc_id != NULL); } if (task != NULL) { event->op_type = strdup(task); CRM_ASSERT(event->op_type != NULL); } event->interval_ms = interval_ms; return event; } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); /* This will get all the int values. * we just have to be careful not to leave any * dangling pointers to strings. */ memcpy(copy, event, sizeof(lrmd_event_data_t)); copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL; copy->op_type = event->op_type ? strdup(event->op_type) : NULL; copy->user_data = event->user_data ? strdup(event->user_data) : NULL; copy->output = event->output ? strdup(event->output) : NULL; copy->exit_reason = event->exit_reason ? strdup(event->exit_reason) : NULL; copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL; copy->params = pcmk__str_table_dup(event->params); return copy; } void lrmd_free_event(lrmd_event_data_t * event) { if (!event) { return; } /* free gives me grief if i try to cast */ free((char *)event->rsc_id); free((char *)event->op_type); free((char *)event->user_data); free((char *)event->output); free((char *)event->exit_reason); free((char *)event->remote_nodename); if (event->params) { g_hash_table_destroy(event->params); } free(event); } static int lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) { const char *type; const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->lrmd_private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return 1; } else if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return 1; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (pcmk__str_eq(type, LRMD_OP_RSC_REG, pcmk__str_none)) { event.type = lrmd_event_register; } else if (pcmk__str_eq(type, LRMD_OP_RSC_UNREG, pcmk__str_none)) { event.type = lrmd_event_unregister; } else if (pcmk__str_eq(type, LRMD_OP_RSC_EXEC, pcmk__str_none)) { time_t epoch = 0; crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_ms(msg, F_LRMD_RSC_INTERVAL, &event.interval_ms); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_epoch(msg, F_LRMD_RSC_RUN_TIME, &epoch); event.t_run = (unsigned int) epoch; crm_element_value_epoch(msg, F_LRMD_RSC_RCCHANGE_TIME, &epoch); event.t_rcchange = (unsigned int) epoch; crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT); event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON); event.type = lrmd_event_exec_complete; event.params = xml2list(msg); } else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) { event.type = lrmd_event_new_client; } else if (pcmk__str_eq(type, LRMD_OP_POKE, pcmk__str_none)) { event.type = lrmd_event_poke; } else { return 1; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } return 1; } static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *msg; int rc; if (!native->callback) { /* no callback set */ return 1; } msg = string2xml(buffer); rc = lrmd_dispatch_internal(lrmd, msg); free_xml(msg); return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static int lrmd_tls_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote->tls_session) { return TRUE; } return FALSE; } static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *xml = NULL; int rc = pcmk_rc_ok; if (lrmd_tls_connected(lrmd) == FALSE) { crm_trace("TLS dispatch triggered after disconnect"); return 0; } crm_trace("TLS dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ switch (pcmk__remote_ready(native->remote, 0)) { case pcmk_rc_ok: rc = pcmk__read_remote_message(native->remote, -1); xml = pcmk__remote_message_xml(native->remote); break; case ETIME: // Nothing to read, check if a full message is already in buffer xml = pcmk__remote_message_xml(native->remote); break; default: rc = ENOTCONN; break; } while (xml) { const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { lrmd_dispatch_internal(lrmd, xml); } else if (pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { int reply_id = 0; crm_element_value_int(xml, F_LRMD_CALLID, &reply_id); /* if this happens, we want to know about it */ crm_err("Got outdated Pacemaker Remote reply %d", reply_id); } } free_xml(xml); xml = pcmk__remote_message_xml(native->remote); } if (rc == ENOTCONN) { crm_info("Lost %s executor connection while reading data", (native->remote_nodename? native->remote_nodename : "local")); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: if (native->pending_notify) { return 1; } else { int rc = pcmk__remote_ready(native->remote, 0); switch (rc) { case pcmk_rc_ok: return 1; case ETIME: return 0; default: return pcmk_rc2legacy(rc); } } #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->lrmd_private; switch (private->type) { case pcmk__client_ipc: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode *data, int timeout, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } crm_trace("Created executor %s command with call options %.8lx (%d)", op, (long)options, options); return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; crm_info("IPC connection destroyed"); /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); native->remote->buffer = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } // \return Standard Pacemaker return code int lrmd_tls_send_msg(pcmk__remote_t *session, xmlNode *msg, uint32_t id, const char *msg_type) { crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); return pcmk__remote_send_xml(session, msg); } static xmlNode * lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected) { lrmd_private_t *native = lrmd->lrmd_private; xmlNode *xml = NULL; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) { total_timeout = MAX_TLS_RECV_WAIT; } while (!xml) { xml = pcmk__remote_message_xml(native->remote); if (!xml) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { crm_err("Never received the expected reply during the timeout period, disconnecting."); *disconnected = TRUE; return NULL; } if (pcmk__read_remote_message(native->remote, remaining_timeout) == ENOTCONN) { *disconnected = TRUE; } else { *disconnected = FALSE; } xml = pcmk__remote_message_xml(native->remote); if (!xml) { crm_err("Unable to receive expected reply, disconnecting."); *disconnected = TRUE; return NULL; } else if (*disconnected) { return NULL; } } CRM_ASSERT(xml != NULL); crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(xml); xml = NULL; } else if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, xml); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } xml = NULL; } else if (!pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(xml); xml = NULL; } else if (reply_id != expected_reply_id) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); } free_xml(xml); xml = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return xml; } static int lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg) { int rc = 0; lrmd_private_t *native = lrmd->lrmd_private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request"); if (rc != pcmk_rc_ok) { crm_err("Disconnecting because TLS message could not be sent to " "Pacemaker Remote: %s", pcmk_rc_str(rc)); lrmd_tls_disconnect(lrmd); return -ENOTCONN; } return pcmk_ok; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; int disconnected = 0; xmlNode *xml = NULL; if (lrmd_tls_connected(lrmd) == FALSE) { return -1; } rc = lrmd_tls_send(lrmd, msg); if (rc < 0) { return rc; } xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected); if (disconnected) { crm_err("Pacemaker Remote disconnected while waiting for reply to request id %d", global_remote_msg_id); lrmd_tls_disconnect(lrmd); rc = -ENOTCONN; } else if (!xml) { crm_err("Did not receive reply from Pacemaker Remote for request id %d (timeout %dms)", global_remote_msg_id, timeout); rc = -ECOMM; } if (reply) { *reply = xml; } else { free_xml(xml); } return rc; } #endif static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = -1; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = -1; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_send(lrmd, msg); if (rc == pcmk_ok) { /* we don't want to wait around for the reply, but * since the request/reply protocol needs to behave the same * as libqb, a reply will eventually come later anyway. */ native->expected_late_replies++; } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_connected(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: return lrmd_tls_connected(lrmd); #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } /*! * \internal * \brief Send a prepared API command to the executor * * \param[in] lrmd Existing connection to the executor * \param[in] op Name of API command to send * \param[in] data Command data XML to add to the sent command * \param[out] output_data If expecting a reply, it will be stored here * \param[in] timeout Timeout in milliseconds (if 0, defaults to * a sensible value per the type of connection, * standard vs. pacemaker remote); * also propagated to the command XML * \param[in] call_options Call options to pass to server when sending * \param[in] expect_reply If TRUE, wait for a reply from the server; * must be TRUE for IPC (as opposed to TLS) clients * * \return pcmk_ok on success, -errno on error */ static int lrmd_send_command(lrmd_t *lrmd, const char *op, xmlNode *data, xmlNode **output_data, int timeout, enum lrmd_call_options options, gboolean expect_reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,; ); crm_trace("Sending %s op to executor", op); op_msg = lrmd_create_op(native->token, op, data, timeout, options); if (op_msg == NULL) { return -EINVAL; } if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); rc = -ECOMM; goto done; } else if(op_reply == NULL) { rc = -ENOMSG; goto done; } rc = pcmk_ok; crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Executor disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); free_xml(data); return rc < 0 ? rc : pcmk_ok; } int remote_proxy_check(lrmd_t * lrmd, GHashTable *hash) { int rc; const char *value; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = create_xml_node(NULL, F_LRMD_OPERATION); crm_xml_add(data, F_LRMD_ORIGIN, __func__); value = g_hash_table_lookup(hash, "stonith-watchdog-timeout"); crm_xml_add(data, F_LRMD_WATCHDOG, value); rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); free_xml(data); return rc < 0 ? rc : pcmk_ok; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (native->proxy_callback) { crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true"); } rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the executor API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION); const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); crm_element_value_int(reply, F_LRMD_RC, &rc); if (rc == -EPROTO) { crm_err("Executor protocol version mismatch between client (%s) and server (%s)", LRMD_PROTOCOL_VERSION, version); crm_log_xml_err(reply, "Protocol Error"); } else if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */ rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to executor"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_perror(LOG_ERR, "Connection to executor failed"); rc = -ENOTCONN; } } else { native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the executor API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void copy_gnutls_datum(gnutls_datum_t *dest, gnutls_datum_t *source) { dest->data = gnutls_malloc(source->size); CRM_ASSERT(dest->data); memcpy(dest->data, source->data, source->size); dest->size = source->size; } static void clear_gnutls_datum(gnutls_datum_t *datum) { gnutls_free(datum->data); datum->data = NULL; datum->size = 0; } #define KEY_READ_LEN 256 static int set_key(gnutls_datum_t * key, const char *location) { FILE *stream; size_t buf_len = KEY_READ_LEN; static gnutls_datum_t key_cache = { 0, }; static time_t key_cache_updated = 0; if (location == NULL) { return -1; } if (key_cache.data != NULL) { if ((time(NULL) - key_cache_updated) < 60) { copy_gnutls_datum(key, &key_cache); crm_debug("Using cached Pacemaker Remote key"); return 0; } else { clear_gnutls_datum(&key_cache); key_cache_updated = 0; crm_debug("Cleared Pacemaker Remote key cache"); } } stream = fopen(location, "r"); if (!stream) { return -1; } key->data = gnutls_malloc(buf_len); key->size = 0; while (!feof(stream)) { int next = fgetc(stream); if (next == EOF) { if (!feof(stream)) { crm_err("Error reading Pacemaker Remote key; copy in memory may be corrupted"); } break; } if (key->size == buf_len) { buf_len = key->size + KEY_READ_LEN; key->data = gnutls_realloc(key->data, buf_len); CRM_ASSERT(key->data); } key->data[key->size++] = (unsigned char) next; } fclose(stream); if (key->size == 0) { clear_gnutls_datum(key); return -1; } if (key_cache.data == NULL) { copy_gnutls_datum(&key_cache, key); key_cache_updated = time(NULL); crm_debug("Cached Pacemaker Remote key"); } return 0; } int lrmd_tls_set_key(gnutls_datum_t * key) { const char *specific_location = getenv("PCMK_authkey_location"); if (set_key(key, specific_location) == 0) { crm_debug("Using custom authkey location %s", specific_location); return pcmk_ok; } else if (specific_location) { crm_err("No valid Pacemaker Remote key found at %s, trying default location", specific_location); } if ((set_key(key, DEFAULT_REMOTE_KEY_LOCATION) != 0) && (set_key(key, ALT_REMOTE_KEY_LOCATION) != 0)) { crm_err("No valid Pacemaker Remote key found at %s", DEFAULT_REMOTE_KEY_LOCATION); return -ENOKEY; } return pcmk_ok; } static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { crm_gnutls_global_init(); } gnutls_init = 1; } #endif static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->lrmd_private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static inline int lrmd__tls_client_handshake(pcmk__remote_t *remote) { return pcmk__tls_client_handshake(remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT); } static void lrmd_tcp_connect_cb(void *userdata, int rc, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; char *name; static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; gnutls_datum_t psk_key = { NULL, 0 }; native->async_timer = 0; if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } /* The TCP connection was successful, so establish the TLS connection. * @TODO make this async to avoid blocking code in client */ native->sock = sock; rc = lrmd_tls_set_key(&psk_key); if (rc != 0) { crm_warn("Could not set key for Pacemaker Remote at %s:%d " CRM_XS " rc=%d", native->server, native->port, rc); lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, rc); return; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = pcmk__new_tls_session(sock, GNUTLS_CLIENT, GNUTLS_CRD_PSK, native->psk_cred_c); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EPROTO); return; } if (lrmd__tls_client_handshake(native->remote) != pcmk_rc_ok) { crm_warn("Disconnecting after TLS handshake with Pacemaker Remote server %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EKEYREJECTED); return; } crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded", native->server, native->port); name = crm_strdup_printf("pacemaker-remote-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); rc = lrmd_handshake(lrmd, name); free(name); report_async_connection_result(lrmd, rc); return; } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc; int timer_id = 0; lrmd_private_t *native = lrmd->lrmd_private; lrmd_gnutls_global_init(); native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, timeout, &timer_id, &(native->sock), lrmd, lrmd_tcp_connect_cb); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%s failed: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); return -1; } native->async_timer = timer_id; return pcmk_ok; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; int rc; lrmd_private_t *native = lrmd->lrmd_private; gnutls_datum_t psk_key = { NULL, 0 }; lrmd_gnutls_global_init(); native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, 0, NULL, &(native->sock), NULL, NULL); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%s failed: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } rc = lrmd_tls_set_key(&psk_key); if (rc < 0) { lrmd_tls_connection_destroy(lrmd); return rc; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = pcmk__new_tls_session(native->sock, GNUTLS_CLIENT, GNUTLS_CRD_PSK, native->psk_cred_c); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); return -EPROTO; } if (lrmd__tls_client_handshake(native->remote) != pcmk_rc_ok) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -EKEYREJECTED; } crm_info("Client TLS connection established with Pacemaker Remote server %s:%d", native->server, native->port); if (fd) { *fd = native->sock; } else { char *name = crm_strdup_printf("pacemaker-remote-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); free(name); } return pcmk_ok; } #endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = 0; lrmd_private_t *native = lrmd->lrmd_private; CRM_CHECK(native && native->callback, return -1); switch (native->type) { case pcmk__client_ipc: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->async_timer) { g_source_remove(native->async_timer); native->async_timer = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock) { close(native->sock); native->sock = 0; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; crm_info("Disconnecting %s %s executor connection", pcmk__client_type_str(native->type), (native->remote_nodename? native->remote_nodename : "local")); switch (native->type) { case pcmk__client_ipc: lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } free(native->token); native->token = NULL; free(native->peer_version); native->peer_version = NULL; return 0; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && (provider == NULL)) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_new_rsc_info(const char *rsc_id, const char *standard, const char *provider, const char *type) { lrmd_rsc_info_t *rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); CRM_ASSERT(rsc_info); if (rsc_id) { rsc_info->id = strdup(rsc_id); CRM_ASSERT(rsc_info->id); } if (standard) { rsc_info->standard = strdup(standard); CRM_ASSERT(rsc_info->standard); } if (provider) { rsc_info->provider = strdup(provider); CRM_ASSERT(rsc_info->provider); } if (type) { rsc_info->type = strdup(type); CRM_ASSERT(rsc_info->type); } return rsc_info; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { return lrmd_new_rsc_info(rsc_info->id, rsc_info->standard, rsc_info->provider, rsc_info->type); } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->standard); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; } else if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && !provider) { free_xml(output); return NULL; } rsc_info = lrmd_new_rsc_info(rsc_id, class, provider, type); free_xml(output); return rsc_info; } void lrmd_free_op_info(lrmd_op_info_t *op_info) { if (op_info) { free(op_info->rsc_id); free(op_info->action); free(op_info->interval_ms_s); free(op_info->timeout_ms_s); free(op_info); } } static int lrmd_api_get_recurring_ops(lrmd_t *lrmd, const char *rsc_id, int timeout_ms, enum lrmd_call_options options, GList **output) { xmlNode *data = NULL; xmlNode *output_xml = NULL; int rc = pcmk_ok; if (output == NULL) { return -EINVAL; } *output = NULL; // Send request if (rsc_id) { data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); } rc = lrmd_send_command(lrmd, LRMD_OP_GET_RECURRING, data, &output_xml, timeout_ms, options, TRUE); if (data) { free_xml(data); } // Process reply if ((rc != pcmk_ok) || (output_xml == NULL)) { return rc; } for (xmlNode *rsc_xml = first_named_child(output_xml, F_LRMD_RSC); - rsc_xml != NULL; rsc_xml = crm_next_same_xml(rsc_xml)) { + (rsc_xml != NULL) && (rc == pcmk_ok); + rsc_xml = crm_next_same_xml(rsc_xml)) { rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (rsc_id == NULL) { crm_err("Could not parse recurring operation information from executor"); continue; } for (xmlNode *op_xml = first_named_child(rsc_xml, T_LRMD_RSC_OP); op_xml != NULL; op_xml = crm_next_same_xml(op_xml)) { lrmd_op_info_t *op_info = calloc(1, sizeof(lrmd_op_info_t)); - CRM_CHECK(op_info != NULL, break); + if (op_info == NULL) { + rc = -ENOMEM; + break; + } op_info->rsc_id = strdup(rsc_id); op_info->action = crm_element_value_copy(op_xml, F_LRMD_RSC_ACTION); op_info->interval_ms_s = crm_element_value_copy(op_xml, F_LRMD_RSC_INTERVAL); op_info->timeout_ms_s = crm_element_value_copy(op_xml, F_LRMD_TIMEOUT); *output = g_list_prepend(*output, op_info); } } free_xml(output_xml); return rc; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->lrmd_private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->lrmd_private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->lrmd_private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not get fence agent meta-data: API memory allocation failed"); return -ENOMEM; } rc = stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); if ((rc == pcmk_ok) && (*output == NULL)) { rc = -EIO; } stonith_api->cmds->free(stonith_api); return rc; } static int lrmd_api_get_metadata(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options) { return lrmd->cmds->get_metadata_params(lrmd, standard, provider, type, output, options, NULL); } static int lrmd_api_get_metadata_params(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options, lrmd_key_value_t *params) { svc_action_t *action = NULL; GHashTable *params_table = NULL; if (!standard || !type) { lrmd_key_value_freeall(params); return -EINVAL; } if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { lrmd_key_value_freeall(params); return stonith_get_metadata(provider, type, output); } params_table = pcmk__strkey_table(free, free); for (const lrmd_key_value_t *param = params; param; param = param->next) { g_hash_table_insert(params_table, strdup(param->key), strdup(param->value)); } action = resources_action_create(type, standard, provider, type, CRMD_ACTION_METADATA, 0, CRMD_METADATA_CALL_TIMEOUT, params_table, 0); lrmd_key_value_freeall(params); if (action == NULL) { crm_err("Unable to retrieve meta-data for %s:%s:%s", standard, provider, type); return -EINVAL; } if (!services_action_sync(action)) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to receive meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_exec(lrmd_t *lrmd, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } /* timeout is in ms */ static int lrmd_api_exec_alert(lrmd_t *lrmd, const char *alert_id, const char *alert_path, int timeout, lrmd_key_value_t *params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_ALERT); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_ALERT_ID, alert_id); crm_xml_add(data, F_LRMD_ALERT_PATH, alert_path); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_ALERT_EXEC, data, NULL, timeout, lrmd_opt_notify_orig_only, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t *lrmd, const char *rsc_id, const char *action, guint interval_ms) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith_api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if (stonith_api == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return -ENOMEM; } stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; int stonith_count = 0; // Initially, whether to include stonith devices if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stonith_count = 1; } else { GList *gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { stonith_count = 1; } } if (stonith_count) { // Now, if stonith devices are included, how many there are stonith_count = list_stonith_agents(resources); if (stonith_count > 0) { rc += stonith_count; } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static int does_provider_have_agent(const char *agent, const char *provider, const char *class) { int found = 0; GList *agents = NULL; GList *gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (pcmk__str_eq(agent, gIter2->data, pcmk__str_casei)) { found = 1; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GList *gIter = NULL; ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, PCMK_RESOURCE_CLASS_OCF)) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GList *gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH); rc++; } g_list_free_full(standards, free); return rc; } lrmd_t * lrmd_api_new(void) { lrmd_t *new_lrmd = NULL; lrmd_private_t *pvt = NULL; new_lrmd = calloc(1, sizeof(lrmd_t)); pvt = calloc(1, sizeof(lrmd_private_t)); pvt->remote = calloc(1, sizeof(pcmk__remote_t)); new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t)); pvt->type = pcmk__client_ipc; new_lrmd->lrmd_private = pvt; new_lrmd->cmds->connect = lrmd_api_connect; new_lrmd->cmds->connect_async = lrmd_api_connect_async; new_lrmd->cmds->is_connected = lrmd_api_is_connected; new_lrmd->cmds->poke_connection = lrmd_api_poke_connection; new_lrmd->cmds->disconnect = lrmd_api_disconnect; new_lrmd->cmds->register_rsc = lrmd_api_register_rsc; new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc; new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info; new_lrmd->cmds->get_recurring_ops = lrmd_api_get_recurring_ops; new_lrmd->cmds->set_callback = lrmd_api_set_callback; new_lrmd->cmds->get_metadata = lrmd_api_get_metadata; new_lrmd->cmds->exec = lrmd_api_exec; new_lrmd->cmds->cancel = lrmd_api_cancel; new_lrmd->cmds->list_agents = lrmd_api_list_agents; new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; new_lrmd->cmds->list_standards = lrmd_api_list_standards; new_lrmd->cmds->exec_alert = lrmd_api_exec_alert; new_lrmd->cmds->get_metadata_params = lrmd_api_get_metadata_params; return new_lrmd; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { #ifdef HAVE_GNUTLS_GNUTLS_H lrmd_t *new_lrmd = lrmd_api_new(); lrmd_private_t *native = new_lrmd->lrmd_private; if (!nodename && !server) { lrmd_api_delete(new_lrmd); return NULL; } native->type = pcmk__client_tls; native->remote_nodename = nodename ? strdup(nodename) : strdup(server); native->server = server ? strdup(server) : strdup(nodename); native->port = port; if (native->port == 0) { native->port = crm_default_remote_port(); } return new_lrmd; #else crm_err("Cannot communicate with Pacemaker Remote because GnuTLS is not enabled for this build"); return NULL; #endif } void lrmd_api_delete(lrmd_t * lrmd) { if (!lrmd) { return; } lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */ free(lrmd->cmds); if (lrmd->lrmd_private) { lrmd_private_t *native = lrmd->lrmd_private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); free(native->token); free(native->peer_version); } free(lrmd->lrmd_private); free(lrmd); } diff --git a/maint/Makefile.am b/maint/Makefile.am index 11e4dd1f8f..cac3ee0088 100644 --- a/maint/Makefile.am +++ b/maint/Makefile.am @@ -1,116 +1,115 @@ # -# Copyright 2019 the Pacemaker project contributors +# Copyright 2019-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # TESTCC_TARGETS = testcc_common_acl_h testcc_common_ipc_h \ testcc_common_iso8601_h testcc_common_mainloop_h testcc_common_nvpair_h \ testcc_common_results_h testcc_common_util_h testcc_common_xml_h \ testcc_compatibility_h testcc_pengine_common_h testcc_pengine_complex_h \ testcc_pe_types_h testcc_cib_types_h testcc_cib_util_h testcc_crm_h \ testcc_cib_h testcc_cluster_h testcc_lrmd_h testcc_msg_xml_h \ testcc_services_h testcc_stonith_ng_h testcc_pengine_status_h \ testcc_pengine_rules_h testcc_common_logging_h CLEANFILES = $(TESTCC_TARGETS) testcc_helper.cc -EXTRA_SCRIPTS = bumplibs.sh \ - travisci_build_coverity_scan.sh +EXTRA_SCRIPTS = bumplibs.sh EXTRA_PROGRAMS = $(TESTCC_TARGETS) EXTRA_DIST = README testcc_helper.cc: echo "int main() { return 0; }" > $@ nodist_testcc_common_acl_h_SOURCES = testcc_helper.cc testcc_common_acl_h_CXXFLAGS = -include "crm/common/acl.h" nodist_testcc_common_ipc_h_SOURCES = testcc_helper.cc testcc_common_ipc_h_CXXFLAGS = -include "crm/common/ipc.h" nodist_testcc_common_iso8601_h_SOURCES = testcc_helper.cc testcc_common_iso8601_h_CXXFLAGS = -include "crm/common/iso8601.h" nodist_testcc_common_mainloop_h_SOURCES = testcc_helper.cc testcc_common_mainloop_h_CXXFLAGS = -include "crm/common/mainloop.h" nodist_testcc_common_nvpair_h_SOURCES = testcc_helper.cc testcc_common_nvpair_h_CXXFLAGS = -include "crm/common/nvpair.h" nodist_testcc_common_results_h_SOURCES = testcc_helper.cc testcc_common_results_h_CXXFLAGS = -include "crm/common/results.h" nodist_testcc_common_util_h_SOURCES = testcc_helper.cc testcc_common_util_h_CXXFLAGS = -include "crm/common/util.h" nodist_testcc_common_xml_h_SOURCES = testcc_helper.cc testcc_common_xml_h_CXXFLAGS = -include "crm/common/xml.h" nodist_testcc_compatibility_h_SOURCES = testcc_helper.cc testcc_compatibility_h_CXXFLAGS = -include "crm/compatibility.h" nodist_testcc_pengine_common_h_SOURCES = testcc_helper.cc testcc_pengine_common_h_CXXFLAGS = -include "crm/pengine/common.h" nodist_testcc_pengine_complex_h_SOURCES = testcc_helper.cc testcc_pengine_complex_h_CXXFLAGS = -include "crm/pengine/complex.h" nodist_testcc_pe_types_h_SOURCES = testcc_helper.cc testcc_pe_types_h_CXXFLAGS = -include "crm/pengine/pe_types.h" nodist_testcc_cib_types_h_SOURCES = testcc_helper.cc testcc_cib_types_h_CXXFLAGS = -include "crm/cib/cib_types.h" nodist_testcc_cib_util_h_SOURCES = testcc_helper.cc testcc_cib_util_h_CXXFLAGS = -include "crm/cib/util.h" nodist_testcc_common_logging_h_SOURCES = testcc_helper.cc testcc_common_logging_h_CXXFLAGS = -include "crm/common/logging.h" # ones previously in testcc.cc except election.h not packaged nodist_testcc_crm_h_SOURCES = testcc_helper.cc testcc_crm_h_CXXFLAGS = -include "crm/crm.h" nodist_testcc_cib_h_SOURCES = testcc_helper.cc testcc_cib_h_CXXFLAGS = -include "crm/cib.h" nodist_testcc_cluster_h_SOURCES = testcc_helper.cc testcc_cluster_h_CXXFLAGS = -include "crm/cluster.h" nodist_testcc_lrmd_h_SOURCES = testcc_helper.cc testcc_lrmd_h_CXXFLAGS = -include "crm/lrmd.h" nodist_testcc_msg_xml_h_SOURCES = testcc_helper.cc testcc_msg_xml_h_CXXFLAGS = -include "crm/msg_xml.h" nodist_testcc_services_h_SOURCES = testcc_helper.cc testcc_services_h_CXXFLAGS = -include "crm/services.h" nodist_testcc_stonith_ng_h_SOURCES = testcc_helper.cc testcc_stonith_ng_h_CXXFLAGS = -include "crm/stonith-ng.h" nodist_testcc_pengine_status_h_SOURCES = testcc_helper.cc testcc_pengine_status_h_CXXFLAGS = -include "crm/pengine/status.h" nodist_testcc_pengine_rules_h_SOURCES = testcc_helper.cc testcc_pengine_rules_h_CXXFLAGS = -include "crm/pengine/rules.h" # c++-headers but not packaged # daemons/controld/controld_membership.h # include/crm/pengine/remote_internal.h # include/crm/common/cmdline_internal.h # include/crm/common/output.h # include/crm/common/ipcs_internal.h # include/crm/common/attrd_internal.h # include/pacemaker.h # include/pcmki/pcmki_output.h # include/pcmki/pcmki_transition.h # include/crm/cluster/election.h # lib/gnu/md5.h # tools/crm_resource_controller.h testcc: $(TESTCC_TARGETS) diff --git a/maint/travisci_build_coverity_scan.sh b/maint/travisci_build_coverity_scan.sh deleted file mode 100644 index f88f13643b..0000000000 --- a/maint/travisci_build_coverity_scan.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/sh -# -# Copyright 2014-2019 the Pacemaker project contributors -# -# The version control history for this file may have further details. -# -# This source code is licensed under the GNU General Public License version 2 -# or later (GPLv2+) WITHOUT ANY WARRANTY. -# - -set -e - -export RED="\033[33;1m" -export NONE="\033[0m" - -if [ -z "$PROJECT_NAME" ]; then - PROJECT_NAME=${TRAVIS_REPO_SLUG} -fi - -# Environment check -echo -e "${RED}Note: PROJECT_NAME and COVERITY_SCAN_TOKEN are available on Project Settings page on scan.coverity.com${NONE}" -[ -z "$PROJECT_NAME" ] && echo "ERROR: PROJECT_NAME must be set" && exit 1 -[ -z "$OWNER_EMAIL" ] && echo "ERROR: OWNER_EMAIL must be set" && exit 1 -[ -z "$COVERITY_SCAN_BRANCH_PATTERN" ] && echo "ERROR: COVERITY_SCAN_BRANCH_PATTERN must be set" && exit 1 -[ -z "$COVERITY_SCAN_BUILD_COMMAND" ] && echo "ERROR: COVERITY_SCAN_BUILD_COMMAND must be set" && exit 1 - -PLATFORM=`uname` -TOOL_ARCHIVE=/tmp/cov-analysis-${PLATFORM}.tgz -TOOL_URL=https://scan.coverity.com/download/${PLATFORM} -TOOL_BASE=/tmp/coverity-scan-analysis -UPLOAD_URL="http://scan5.coverity.com/cgi-bin/upload.py" -SCAN_URL="https://scan.coverity.com" - -# Do not run on pull requests -if [ "${TRAVIS_PULL_REQUEST}" = "true" ]; then - echo -e "${RED}INFO: Skipping Coverity Analysis: branch is a pull request.${NONE}" - exit 0 -fi - -# Verify this branch should run -IS_COVERITY_SCAN_BRANCH=`ruby -e "puts '${TRAVIS_BRANCH}' =~ /\\A$COVERITY_SCAN_BRANCH_PATTERN\\z/ ? 1 : 0"` -if [ "$IS_COVERITY_SCAN_BRANCH" = "1" ]; then - echo -e "${RED}Coverity Scan configured to run on branch ${TRAVIS_BRANCH}${NONE}" -else - echo -e "${RED}Coverity Scan NOT configured to run on branch ${TRAVIS_BRANCH}${NONE}" - exit 0 # Nothing to do, exit with success otherwise the build will be considered failed -fi - -# If COVERITY_SCAN_TOKEN isn't set, then we're probably running from somewhere -# other than ClusterLabs/pacemaker and coverity shouldn't be running anyway -[ -z "$COVERITY_SCAN_TOKEN" ] && echo "${RED}ERROR: COVERITY_SCAN_TOKEN must be set${NONE}" && exit 0 - -# Verify upload is permitted -AUTH_RES=`curl -s --form project="$PROJECT_NAME" --form token="$COVERITY_SCAN_TOKEN" $SCAN_URL/api/upload_permitted` -if [ "$AUTH_RES" = "Access denied" ]; then - echo -e "${RED}Coverity Scan API access denied. Check PROJECT_NAME and COVERITY_SCAN_TOKEN.${NONE}" - exit 1 -else - AUTH=`echo $AUTH_RES | ruby -e "require 'rubygems'; require 'json'; puts JSON[STDIN.read]['upload_permitted']"` - if [ "$AUTH" = "true" ]; then - echo -e "${RED}Coverity Scan analysis authorized per quota.${NONE}" - else - WHEN=`echo $AUTH_RES | ruby -e "require 'rubygems'; require 'json'; puts JSON[STDIN.read]['next_upload_permitted_at']"` - echo -e "${RED}Coverity Scan analysis NOT authorized until $WHEN.${NONE}" - exit 1 - fi -fi - -if [ ! -d $TOOL_BASE ]; then - # Download Coverity Scan Analysis Tool - if [ ! -e $TOOL_ARCHIVE ]; then - echo -e "${RED}Downloading Coverity Scan Analysis Tool...${NONE}" - wget -nv -O $TOOL_ARCHIVE $TOOL_URL --post-data "project=$PROJECT_NAME&token=$COVERITY_SCAN_TOKEN" - fi - - # Extract Coverity Scan Analysis Tool - echo -e "${RED}Extracting Coverity Scan Analysis Tool...${NONE}" - mkdir -p $TOOL_BASE - pushd $TOOL_BASE - tar xzf $TOOL_ARCHIVE - popd -fi - -TOOL_DIR=`find $TOOL_BASE -type d -name 'cov-analysis*'` -export PATH=$TOOL_DIR/bin:$PATH - -# Build -echo -e "${RED}Running Coverity Scan Analysis Tool...${NONE}" -COV_BUILD_OPTIONS="" -#COV_BUILD_OPTIONS="--return-emit-failures 8 --parse-error-threshold 85" -RESULTS_DIR="cov-int" -eval "${COVERITY_SCAN_BUILD_COMMAND_PREPEND}" -COVERITY_UNSUPPORTED=1 cov-build --dir $RESULTS_DIR $COV_BUILD_OPTIONS $COVERITY_SCAN_BUILD_COMMAND - -# Upload results -echo -e "${RED}Tarring Coverity Scan Analysis results...${NONE}" -RESULTS_ARCHIVE=analysis-results.tgz -tar czf $RESULTS_ARCHIVE $RESULTS_DIR -SHA=`git rev-parse --short HEAD` - -echo -e "${RED}Uploading Coverity Scan Analysis results...${NONE}" -curl \ - --progress-bar \ - --form project=$PROJECT_NAME \ - --form token=$COVERITY_SCAN_TOKEN \ - --form email=$OWNER_EMAIL \ - --form file=@$RESULTS_ARCHIVE \ - --form version=$SHA \ - --form description="Travis CI build" \ - $UPLOAD_URL diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c index 371c5bde03..1b6b9ef12d 100644 --- a/tools/crm_simulate.c +++ b/tools/crm_simulate.c @@ -1,1188 +1,1187 @@ /* * Copyright 2009-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "crm_simulate - simulate a Pacemaker cluster's response to events" struct { gboolean all_actions; char *dot_file; char *graph_file; gchar *input_file; guint modified; GList *node_up; GList *node_down; GList *node_fail; GList *op_fail; GList *op_inject; gchar *output_file; gboolean print_pending; gboolean process; char *quorum; long long repeat; gboolean show_attrs; gboolean show_failcounts; gboolean show_scores; gboolean show_utilization; gboolean simulate; gboolean store; gchar *test_dir; GList *ticket_grant; GList *ticket_revoke; GList *ticket_standby; GList *ticket_activate; char *use_date; char *watchdog; char *xml_file; } options = { .print_pending = TRUE, .repeat = 1 }; cib_t *global_cib = NULL; bool action_numbers = FALSE; char *temp_shadow = NULL; extern gboolean bringing_nodes_online; crm_exit_t exit_code = CRM_EX_OK; #define INDENT " " static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static gboolean in_place_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.store = TRUE; options.process = TRUE; options.simulate = TRUE; return TRUE; } static gboolean live_check_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.xml_file) { free(options.xml_file); } options.xml_file = NULL; return TRUE; } static gboolean node_down_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.node_down = g_list_append(options.node_down, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean node_fail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.node_fail = g_list_append(options.node_fail, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean node_up_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; bringing_nodes_online = TRUE; options.node_up = g_list_append(options.node_up, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean op_fail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.process = TRUE; options.simulate = TRUE; options.op_fail = g_list_append(options.op_fail, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean op_inject_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.op_inject = g_list_append(options.op_inject, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean quorum_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.quorum) { free(options.quorum); } options.modified++; options.quorum = strdup(optarg); return TRUE; } static gboolean save_dotfile_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.dot_file) { free(options.dot_file); } options.process = TRUE; options.dot_file = strdup(optarg); return TRUE; } static gboolean save_graph_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.graph_file) { free(options.graph_file); } options.process = TRUE; options.graph_file = strdup(optarg); return TRUE; } static gboolean show_scores_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.process = TRUE; options.show_scores = TRUE; return TRUE; } static gboolean simulate_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.process = TRUE; options.simulate = TRUE; return TRUE; } static gboolean ticket_activate_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.ticket_activate = g_list_append(options.ticket_activate, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean ticket_grant_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.ticket_grant = g_list_append(options.ticket_grant, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean ticket_revoke_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.ticket_revoke = g_list_append(options.ticket_revoke, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean ticket_standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.ticket_standby = g_list_append(options.ticket_standby, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean utilization_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.process = TRUE; options.show_utilization = TRUE; return TRUE; } static gboolean watchdog_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.watchdog) { free(options.watchdog); } options.modified++; options.watchdog = strdup(optarg); return TRUE; } static gboolean xml_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.xml_file) { free(options.xml_file); } options.xml_file = strdup(optarg); return TRUE; } static gboolean xml_pipe_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.xml_file) { free(options.xml_file); } options.xml_file = strdup("-"); return TRUE; } static GOptionEntry operation_entries[] = { { "run", 'R', 0, G_OPTION_ARG_NONE, &options.process, "Process the supplied input and show what actions the cluster will take in response", NULL }, { "simulate", 'S', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, simulate_cb, "Like --run, but also simulate taking those actions and show the resulting new status", NULL }, { "in-place", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, in_place_cb, "Like --simulate, but also store the results back to the input file", NULL }, { "show-attrs", 'A', 0, G_OPTION_ARG_NONE, &options.show_attrs, "Show node attributes", NULL }, { "show-failcounts", 'c', 0, G_OPTION_ARG_NONE, &options.show_failcounts, "Show resource fail counts", NULL }, { "show-scores", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_scores_cb, "Show allocation scores", NULL }, { "show-utilization", 'U', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, utilization_cb, "Show utilization information", NULL }, { "profile", 'P', 0, G_OPTION_ARG_FILENAME, &options.test_dir, "Process all the XML files in the named directory to create profiling data", "DIR" }, { "repeat", 'N', 0, G_OPTION_ARG_INT, &options.repeat, "With --profile, repeat each test N times and print timings", "N" }, /* Deprecated */ { "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending, "Display pending state if 'record-pending' is enabled", NULL }, { NULL } }; static GOptionEntry synthetic_entries[] = { { "node-up", 'u', 0, G_OPTION_ARG_CALLBACK, node_up_cb, "Simulate bringing a node online", "NODE" }, { "node-down", 'd', 0, G_OPTION_ARG_CALLBACK, node_down_cb, "Simulate taking a node offline", "NODE" }, { "node-fail", 'f', 0, G_OPTION_ARG_CALLBACK, node_fail_cb, "Simulate a node failing", "NODE" }, { "op-inject", 'i', 0, G_OPTION_ARG_CALLBACK, op_inject_cb, "Generate a failure for the cluster to react to in the simulation.\n" INDENT "See `Operation Specification` help for more information.", "OPSPEC" }, { "op-fail", 'F', 0, G_OPTION_ARG_CALLBACK, op_fail_cb, "If the specified task occurs during the simulation, have it fail with return code ${rc}.\n" INDENT "The transition will normally stop at the failed action.\n" INDENT "Save the result with --save-output and re-run with --xml-file.\n" INDENT "See `Operation Specification` help for more information.", "OPSPEC" }, { "set-datetime", 't', 0, G_OPTION_ARG_STRING, &options.use_date, "Set date/time (ISO 8601 format, see https://en.wikipedia.org/wiki/ISO_8601)", "DATETIME" }, { "quorum", 'q', 0, G_OPTION_ARG_CALLBACK, quorum_cb, "Set to '1' (or 'true') to indicate cluster has quorum", "QUORUM" }, { "watchdog", 'w', 0, G_OPTION_ARG_CALLBACK, watchdog_cb, "Set to '1' (or 'true') to indicate cluster has an active watchdog device", "DEVICE" }, { "ticket-grant", 'g', 0, G_OPTION_ARG_CALLBACK, ticket_grant_cb, "Simulate granting a ticket", "TICKET" }, { "ticket-revoke", 'r', 0, G_OPTION_ARG_CALLBACK, ticket_revoke_cb, "Simulate revoking a ticket", "TICKET" }, { "ticket-standby", 'b', 0, G_OPTION_ARG_CALLBACK, ticket_standby_cb, "Simulate making a ticket standby", "TICKET" }, { "ticket-activate", 'e', 0, G_OPTION_ARG_CALLBACK, ticket_activate_cb, "Simulate activating a ticket", "TICKET" }, { NULL } }; static GOptionEntry artifact_entries[] = { { "save-input", 'I', 0, G_OPTION_ARG_FILENAME, &options.input_file, "Save the input configuration to the named file", "FILE" }, { "save-output", 'O', 0, G_OPTION_ARG_FILENAME, &options.output_file, "Save the output configuration to the named file", "FILE" }, { "save-graph", 'G', 0, G_OPTION_ARG_CALLBACK, save_graph_cb, "Save the transition graph (XML format) to the named file", "FILE" }, { "save-dotfile", 'D', 0, G_OPTION_ARG_CALLBACK, save_dotfile_cb, "Save the transition graph (DOT format) to the named file", "FILE" }, { "all-actions", 'a', 0, G_OPTION_ARG_NONE, &options.all_actions, "Display all possible actions in DOT graph (even if not part of transition)", NULL }, { NULL } }; static GOptionEntry source_entries[] = { { "live-check", 'L', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, live_check_cb, "Connect to CIB manager and use the current CIB contents as input", NULL }, { "xml-file", 'x', 0, G_OPTION_ARG_CALLBACK, xml_file_cb, "Retrieve XML from the named file", "FILE" }, { "xml-pipe", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, xml_pipe_cb, "Retrieve XML from stdin", NULL }, { NULL } }; static void get_date(pe_working_set_t *data_set, bool print_original, char *use_date) { pcmk__output_t *out = data_set->priv; time_t original_date = 0; crm_element_value_epoch(data_set->input, "execution-date", &original_date); if (use_date) { data_set->now = crm_time_new(use_date); out->info(out, "Setting effective cluster time: %s", use_date); crm_time_log(LOG_NOTICE, "Pretending 'now' is", data_set->now, crm_time_log_date | crm_time_log_timeofday); } else if (original_date) { data_set->now = crm_time_new(NULL); crm_time_set_timet(data_set->now, &original_date); if (print_original) { char *when = crm_time_as_string(data_set->now, crm_time_log_date|crm_time_log_timeofday); out->info(out, "Using the original execution date of: %s", when); free(when); } } } static void print_cluster_status(pe_working_set_t * data_set, unsigned int print_opts) { pcmk__output_t *out = data_set->priv; int rc = pcmk_rc_no_output; GList *all = NULL; all = g_list_prepend(all, strdup("*")); rc = out->message(out, "node-list", data_set->nodes, all, all, print_opts, FALSE, FALSE, FALSE); PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); rc = out->message(out, "resource-list", data_set, print_opts, FALSE, TRUE, FALSE, FALSE, all, all, FALSE); if (options.show_attrs) { out->message(out, "node-attribute-list", data_set, 0, rc == pcmk_rc_ok, FALSE, FALSE, FALSE, all, all); } if (options.show_failcounts) { out->message(out, "failed-action-list", data_set, all, all, rc == pcmk_rc_ok); } g_list_free_full(all, free); } static char * create_action_name(pe_action_t *action) { char *action_name = NULL; const char *prefix = ""; const char *action_host = NULL; const char *clone_name = NULL; const char *task = action->task; if (action->node) { action_host = action->node->details->uname; } else if (!pcmk_is_set(action->flags, pe_action_pseudo)) { action_host = ""; } if (pcmk__str_eq(action->task, RSC_CANCEL, pcmk__str_casei)) { prefix = "Cancel "; task = action->cancel_task; } if (action->rsc && action->rsc->clone_name) { clone_name = action->rsc->clone_name; } if (clone_name) { char *key = NULL; guint interval_ms = 0; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } if (pcmk__strcase_any_of(action->task, RSC_NOTIFY, RSC_NOTIFIED, NULL)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_key_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_key_operation"); CRM_ASSERT(n_type != NULL); CRM_ASSERT(n_task != NULL); key = pcmk__notify_key(clone_name, n_type, n_task); } else { key = pcmk__op_key(clone_name, task, interval_ms); } if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, key, action_host); } else { action_name = crm_strdup_printf("%s%s", prefix, key); } free(key); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); action_name = crm_strdup_printf("%s%s '%s' %s", prefix, action->task, op, action_host); } else if (action->rsc && action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->uuid, action_host); } else if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->task, action_host); } else { action_name = crm_strdup_printf("%s", action->uuid); } if (action_numbers) { // i.e. verbose char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id); free(action_name); action_name = with_id; } return action_name; } static bool create_dotfile(pe_working_set_t * data_set, const char *dot_file, gboolean all_actions, GError **error) { GList *gIter = NULL; FILE *dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { g_set_error(error, PCMK__RC_ERROR, errno, "Could not open %s for writing: %s", dot_file, pcmk_rc_str(errno)); return false; } fprintf(dot_strm, " digraph \"g\" {\n"); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; const char *style = "dashed"; const char *font = "black"; const char *color = "black"; char *action_name = create_action_name(action); crm_trace("Action %d: %s %s %p", action->id, action_name, action->uuid, action); if (pcmk_is_set(action->flags, pe_action_pseudo)) { font = "orange"; } if (pcmk_is_set(action->flags, pe_action_dumped)) { style = "bold"; color = "green"; } else if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pe_rsc_managed)) { color = "red"; font = "purple"; if (all_actions == FALSE) { goto do_not_write; } } else if (pcmk_is_set(action->flags, pe_action_optional)) { color = "blue"; if (all_actions == FALSE) { goto do_not_write; } } else { color = "red"; CRM_CHECK(!pcmk_is_set(action->flags, pe_action_runnable), ;); } pe__set_action_flags(action, pe_action_dumped); crm_trace("\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]", action_name, style, color, font); fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n", action_name, style, color, font); do_not_write: free(action_name); } for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; GList *gIter2 = NULL; for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) { pe_action_wrapper_t *before = (pe_action_wrapper_t *) gIter2->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; gboolean optional = TRUE; if (before->state == pe_link_dumped) { optional = FALSE; style = "bold"; } else if (pcmk_is_set(action->flags, pe_action_pseudo) && (before->type & pe_order_stonith_stop)) { continue; } else if (before->type == pe_order_none) { continue; } else if (pcmk_is_set(before->action->flags, pe_action_dumped) && pcmk_is_set(action->flags, pe_action_dumped) && before->type != pe_order_load) { optional = FALSE; } if (all_actions || optional == FALSE) { before_name = create_action_name(before->action); after_name = create_action_name(action); crm_trace("\"%s\" -> \"%s\" [ style = %s]", before_name, after_name, style); fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", before_name, after_name, style); free(before_name); free(after_name); } } } fprintf(dot_strm, "}\n"); fflush(dot_strm); fclose(dot_strm); return true; } static int setup_input(const char *input, const char *output, GError **error) { int rc = pcmk_rc_ok; cib_t *cib_conn = NULL; xmlNode *cib_object = NULL; char *local_output = NULL; if (input == NULL) { /* Use live CIB */ cib_conn = cib_new(); rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { rc = cib_conn->cmds->query(cib_conn, NULL, &cib_object, cib_scope_local | cib_sync_call); } cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); cib_conn = NULL; if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); g_set_error(error, PCMK__RC_ERROR, rc, "Live CIB query failed: %s (%d)", pcmk_rc_str(rc), rc); return rc; } else if (cib_object == NULL) { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_NOINPUT, "Live CIB query failed: empty result"); return pcmk_rc_no_input; } } else if (pcmk__str_eq(input, "-", pcmk__str_casei)) { cib_object = filename2xml(NULL); } else { cib_object = filename2xml(input); } if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return pcmk_rc_transform_failed; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return pcmk_rc_schema_validation; } if (output == NULL) { char *pid = pcmk__getpid_s(); local_output = get_shadow_file(pid); temp_shadow = strdup(local_output); output = local_output; free(pid); } rc = write_xml_file(cib_object, output, FALSE); free_xml(cib_object); cib_object = NULL; if (rc < 0) { rc = pcmk_legacy2rc(rc); g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_CANTCREAT, "Could not create '%s': %s", output, pcmk_rc_str(rc)); return rc; } else { setenv("CIB_file", output, 1); free(local_output); return pcmk_rc_ok; } } static void profile_one(const char *xml_file, long long repeat, pe_working_set_t *data_set, char *use_date) { pcmk__output_t *out = data_set->priv; xmlNode *cib_object = NULL; clock_t start = 0; clock_t end; cib_object = filename2xml(xml_file); start = clock(); if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return; } for (int i = 0; i < repeat; ++i) { xmlNode *input = (repeat == 1)? cib_object : copy_xml(cib_object); data_set->input = input; get_date(data_set, false, use_date); pcmk__schedule_actions(data_set, input, NULL); pe_reset_working_set(data_set); } end = clock(); out->message(out, "profile", xml_file, start, end); } #ifndef FILENAME_MAX # define FILENAME_MAX 512 #endif static void profile_all(const char *dir, long long repeat, pe_working_set_t *data_set, char *use_date) { pcmk__output_t *out = data_set->priv; struct dirent **namelist; int file_num = scandir(dir, &namelist, 0, alphasort); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX]; out->begin_list(out, NULL, NULL, "Timings"); while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (!pcmk__ends_with_ext(namelist[file_num]->d_name, ".xml")) { free(namelist[file_num]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { profile_one(buffer, repeat, data_set, use_date); } free(namelist[file_num]); } free(namelist); out->end_list(out); } } PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t") static int profile_default(pcmk__output_t *out, va_list args) { const char *xml_file = va_arg(args, const char *); clock_t start = va_arg(args, clock_t); clock_t end = va_arg(args, clock_t); out->list_item(out, NULL, "Testing %s ... %.2f secs", xml_file, (end - start) / (float) CLOCKS_PER_SEC); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t") static int profile_xml(pcmk__output_t *out, va_list args) { const char *xml_file = va_arg(args, const char *); clock_t start = va_arg(args, clock_t); clock_t end = va_arg(args, clock_t); char *duration = pcmk__ftoa((end - start) / (float) CLOCKS_PER_SEC); pcmk__output_create_xml_node(out, "timing", "file", xml_file, "duration", duration, NULL); free(duration); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "profile", "default", profile_default, }, { "profile", "xml", profile_xml }, { NULL } }; static void crm_simulate_register_messages(pcmk__output_t *out) { pcmk__register_messages(out, fmt_functions); } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Display only essential output", NULL }, { NULL } }; const char *description = "Operation Specification:\n\n" "The OPSPEC in any command line option is of the form\n" "${resource}_${task}_${interval_in_ms}@${node}=${rc}\n" "(memcached_monitor_20000@bart.example.com=7, for example).\n" "${rc} is an OCF return code. For more information on these\n" "return codes, refer to https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/2.0/html/Pacemaker_Administration/s-ocf-return-codes.html\n\n" "Examples:\n\n" "Pretend a recurring monitor action found memcached stopped on node\n" "fred.example.com and, during recovery, that the memcached stop\n" "action failed:\n\n" "\tcrm_simulate -LS --op-inject memcached:0_monitor_20000@bart.example.com=7 " "--op-fail memcached:0_stop_0@fred.example.com=1 --save-output /tmp/memcached-test.xml\n\n" "Now see what the reaction to the stop failed would be:\n\n" "\tcrm_simulate -S --xml-file /tmp/memcached-test.xml\n\n"; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "operations", "Operations:", "Show operations options", operation_entries); pcmk__add_arg_group(context, "synthetic", "Synthetic Cluster Events:", "Show synthetic cluster event options", synthetic_entries); pcmk__add_arg_group(context, "artifact", "Artifact Options:", "Show artifact options", artifact_entries); pcmk__add_arg_group(context, "source", "Data Source:", "Show data source options", source_entries); return context; } int main(int argc, char **argv) { int printed = pcmk_rc_no_output; int rc = pcmk_rc_ok; pe_working_set_t *data_set = NULL; pcmk__output_t *out = NULL; xmlNode *input = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "bdefgiqrtuwxDFGINO"); GOptionContext *context = build_arg_context(args, &output_group); /* This must come before g_option_context_parse_strv. */ options.xml_file = strdup("-"); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } pcmk__cli_init_logging("crm_simulate", args->verbosity); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { fprintf(stderr, "Error creating output format %s: %s\n", args->output_ty, pcmk_rc_str(rc)); exit_code = CRM_EX_ERROR; goto done; } if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches) && !options.show_scores && !options.show_utilization) { pcmk__force_args(context, &error, "%s --text-fancy", g_get_prgname()); } else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) { pcmk__force_args(context, &error, "%s --xml-simple-list --xml-substitute", g_get_prgname()); } crm_simulate_register_messages(out); pe__register_messages(out); pcmk__register_lib_messages(out); out->quiet = args->quiet; if (args->version) { out->version(out, false); goto done; } if (args->verbosity > 0) { #ifdef PCMK__COMPAT_2_0 /* Redirect stderr to stdout so we can grep the output */ close(STDERR_FILENO); dup2(STDOUT_FILENO, STDERR_FILENO); #endif action_numbers = TRUE; } data_set = pe_new_working_set(); if (data_set == NULL) { rc = ENOMEM; g_set_error(&error, PCMK__RC_ERROR, rc, "Could not allocate working set"); goto done; } if (options.show_scores) { pe__set_working_set_flags(data_set, pe_flag_show_scores); } if (options.show_utilization) { pe__set_working_set_flags(data_set, pe_flag_show_utilization); } pe__set_working_set_flags(data_set, pe_flag_no_compat); if (options.test_dir != NULL) { data_set->priv = out; profile_all(options.test_dir, options.repeat, data_set, options.use_date); rc = pcmk_rc_ok; goto done; } rc = setup_input(options.xml_file, options.store ? options.xml_file : options.output_file, &error); if (rc != pcmk_rc_ok) { goto done; } global_cib = cib_new(); rc = global_cib->cmds->signon(global_cib, crm_system_name, cib_command); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); g_set_error(&error, PCMK__RC_ERROR, rc, "Could not connect to the CIB: %s", pcmk_rc_str(rc)); goto done; } rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call | cib_scope_local); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); g_set_error(&error, PCMK__RC_ERROR, rc, "Could not get local CIB: %s", pcmk_rc_str(rc)); goto done; } data_set->input = input; data_set->priv = out; get_date(data_set, true, options.use_date); if(options.xml_file) { pe__set_working_set_flags(data_set, pe_flag_sanitized); } if (options.show_scores) { pe__set_working_set_flags(data_set, pe_flag_show_scores); } if (options.show_utilization) { pe__set_working_set_flags(data_set, pe_flag_show_utilization); } cluster_status(data_set); if (!out->is_quiet(out)) { unsigned int opts = options.print_pending ? pe_print_pending : 0; if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) { printed = out->message(out, "maint-mode", data_set->flags); } if (data_set->disabled_resources || data_set->blocked_resources) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); printed = out->info(out, "%d of %d resource instances DISABLED and %d BLOCKED " "from further action due to failure", data_set->disabled_resources, data_set->ninstances, data_set->blocked_resources); } PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); /* Most formatted output headers use caps for each word, but this one * only has the first word capitalized for compatibility with pcs. */ out->begin_list(out, NULL, NULL, "Current cluster status"); print_cluster_status(data_set, opts); out->end_list(out); printed = pcmk_rc_ok; } if (options.modified) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); modify_configuration(data_set, global_cib, options.quorum, options.watchdog, options.node_up, options.node_down, options.node_fail, options.op_inject, options.ticket_grant, options.ticket_revoke, options.ticket_standby, options.ticket_activate); printed = pcmk_rc_ok; rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); g_set_error(&error, PCMK__RC_ERROR, rc, "Could not get modified CIB: %s", pcmk_rc_str(rc)); goto done; } cleanup_calculations(data_set); data_set->input = input; data_set->priv = out; get_date(data_set, true, options.use_date); if(options.xml_file) { pe__set_working_set_flags(data_set, pe_flag_sanitized); } if (options.show_scores) { pe__set_working_set_flags(data_set, pe_flag_show_scores); } if (options.show_utilization) { pe__set_working_set_flags(data_set, pe_flag_show_utilization); } cluster_status(data_set); } if (options.input_file != NULL) { rc = write_xml_file(input, options.input_file, FALSE); if (rc < 0) { rc = pcmk_legacy2rc(rc); g_set_error(&error, PCMK__RC_ERROR, rc, "Could not create '%s': %s", options.input_file, pcmk_rc_str(rc)); goto done; } } if (options.process || options.simulate) { crm_time_t *local_date = NULL; pcmk__output_t *logger_out = NULL; if (pcmk_all_flags_set(data_set->flags, pe_flag_show_scores|pe_flag_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Allocation Scores and Utilization Information"); printed = pcmk_rc_ok; } else if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Allocation Scores"); printed = pcmk_rc_ok; } else if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Utilization Information"); printed = pcmk_rc_ok; } else { logger_out = pcmk__new_logger(); if (logger_out == NULL) { goto done; } data_set->priv = logger_out; } pcmk__schedule_actions(data_set, input, local_date); if (logger_out == NULL) { out->end_list(out); } else { logger_out->finish(logger_out, CRM_EX_OK, true, NULL); pcmk__output_free(logger_out); data_set->priv = out; } input = NULL; /* Don't try and free it twice */ if (options.graph_file != NULL) { write_xml_file(data_set->graph, options.graph_file, FALSE); } if (options.dot_file != NULL) { if (!create_dotfile(data_set, options.dot_file, options.all_actions, &error)) { goto done; } } if (!out->is_quiet(out)) { GList *gIter = NULL; PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Transition Summary"); LogNodeActions(data_set); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; LogActions(rsc, data_set); } out->end_list(out); printed = pcmk_rc_ok; } } rc = pcmk_rc_ok; if (options.simulate) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); if (run_simulation(data_set, global_cib, options.op_fail) != pcmk_rc_ok) { rc = pcmk_rc_error; } printed = pcmk_rc_ok; if (!out->is_quiet(out)) { get_date(data_set, true, options.use_date); PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Revised Cluster Status"); - printed = pcmk_rc_ok; if (options.show_scores) { pe__set_working_set_flags(data_set, pe_flag_show_scores); } if (options.show_utilization) { pe__set_working_set_flags(data_set, pe_flag_show_utilization); } cluster_status(data_set); print_cluster_status(data_set, 0); out->end_list(out); } } done: pcmk__output_and_clear_error(error, NULL); /* There sure is a lot to free in options. */ free(options.dot_file); free(options.graph_file); g_free(options.input_file); g_list_free_full(options.node_up, g_free); g_list_free_full(options.node_down, g_free); g_list_free_full(options.node_fail, g_free); g_list_free_full(options.op_fail, g_free); g_list_free_full(options.op_inject, g_free); g_free(options.output_file); free(options.quorum); g_free(options.test_dir); g_list_free_full(options.ticket_grant, g_free); g_list_free_full(options.ticket_revoke, g_free); g_list_free_full(options.ticket_standby, g_free); g_list_free_full(options.ticket_activate, g_free); free(options.use_date); free(options.watchdog); free(options.xml_file); pcmk__free_arg_context(context); g_strfreev(processed_args); if (data_set) { pe_free_working_set(data_set); } if (global_cib) { global_cib->cmds->signoff(global_cib); cib_delete(global_cib); } fflush(stderr); if (temp_shadow) { unlink(temp_shadow); free(temp_shadow); } if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); } if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } crm_exit(exit_code); } diff --git a/tools/pcmk_simtimes.in b/tools/pcmk_simtimes.in index f082922f1e..35686c0a0a 100644 --- a/tools/pcmk_simtimes.in +++ b/tools/pcmk_simtimes.in @@ -1,148 +1,151 @@ #!@PYTHON@ """ Timing comparisons for crm_simulate profiling output """ -__copyright__ = "Copyright 2019-2020 the Pacemaker project contributors" +__copyright__ = "Copyright 2019-2021 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import re import sys import errno import argparse DESC = """Compare timings from crm_simulate profiling output""" # These values must be kept in sync with include/crm/crm.h class CrmExit(object): OK = 0 BEFORE_HELP = """Output of "crm_simulate --profile cts/scheduler --repeat " from earlier Pacemaker build""" -# line like: * Testing cts/scheduler/1360.xml ... 0.07 secs +# line like: * Testing cts/scheduler/xml/1360.xml ... 0.07 secs PATTERN = r"""^\s*\*\s+Testing\s+.*/([^/]+)\.xml\s+\.+\s+([.0-9]+)\s+secs\s*$""" def parse_args(argv=sys.argv): """ Parse command-line arguments """ parser = argparse.ArgumentParser(description=DESC) parser.add_argument('-V', '--verbose', action='count', help='Increase verbosity') parser.add_argument('-p', '--threshold-percent', type=float, default=0, help="Don't show tests with less than this percentage difference in times") parser.add_argument('-s', '--threshold-seconds', type=float, default=0, help="Don't show tests with less than this seconds difference in times") parser.add_argument('-S', '--sort', choices=['test', 'before', 'after', 'diff', 'percent'], default='test', help="Sort results by this column") parser.add_argument('-r', '--reverse', action='store_true', help="Sort results in descending order") parser.add_argument('before_file', metavar='BEFORE', type=argparse.FileType('r'), help=BEFORE_HELP) parser.add_argument('after_file', metavar='AFTER', type=argparse.FileType('r'), help='Output of same command from later Pacemaker build') return parser.parse_args(argv[1:]) def extract_times(infile): """ Extract test names and times into hash table from file """ result = {} for line in infile: match = re.search(PATTERN, line) if match is not None: result[match.group(1)] = match.group(2) return result def compare_test(test, before, after, args): """ Compare one test's timings """ try: before_time = float(before[test]) except KeyError: if args.verbose > 0: print("No previous test " + test + " to compare") return None after_time = float(after[test]) time_diff = after_time - before_time time_diff_percent = (time_diff / before_time) * 100 if ((abs(time_diff) >= args.threshold_seconds) and (abs(time_diff_percent) >= args.threshold_percent)): return { 'test': test, 'before': before_time, 'after': after_time, 'diff': time_diff, 'percent': time_diff_percent } return None def sort_diff(result): """ Sort two test results by time difference """ global sort_field return result[sort_field] def print_results(results, sort_reverse): """ Output the comparison results """ if results == []: return # Sort and print test differences results.sort(reverse=sort_reverse, key=sort_diff) for result in results: print("%-40s %6.2fs vs %6.2fs (%+.2fs = %+6.2f%%)" % (result['test'], result['before'], result['after'], result['diff'], result['percent'])) # Print average differences diff_total = sum(d['diff'] for d in results) percent_total = sum(d['percent'] for d in results) nresults = len(results) print("\nAverages: %+.2fs %+6.2f%%" % ((diff_total / nresults), (percent_total / nresults))) if __name__ == "__main__": global sort_field try: args = parse_args() + if args.verbose is None: + args.verbose = 0 + before = extract_times(args.before_file) after = extract_times(args.after_file) sort_field = args.sort # Build a list of test differences results = [] for test in after.keys(): result = compare_test(test, before, after, args) if result is not None: results = results + [ result ] print_results(results, sort_reverse=args.reverse) except KeyboardInterrupt: pass except IOError as e: if e.errno != errno.EPIPE: raise sys.exit(CrmExit.OK) # vim: set filetype=python expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=120: