diff --git a/agents/ocf/ClusterMon.in b/agents/ocf/ClusterMon.in index 722cd3218c..f1fd095e6f 100755 --- a/agents/ocf/ClusterMon.in +++ b/agents/ocf/ClusterMon.in @@ -1,276 +1,276 @@ #!@BASH_PATH@ # # ocf:pacemaker:ClusterMon resource agent # # Original copyright 2004 SUSE LINUX AG, Lars Marowsky-Bre # Later changes copyright 2008-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # Starts crm_mon in background which logs cluster status as # html to the specified file. ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} # Explicitly list all environment variables used, to make static analysis happy : ${OCF_RESKEY_user:=""} : ${OCF_RESKEY_pidfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} : ${OCF_RESKEY_update:="15000"} : ${OCF_RESKEY_extra_options:=""} : ${OCF_RESKEY_htmlfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} ####################################################################### meta_data() { cat < 1.1 This is a ClusterMon Resource Agent. It outputs current cluster status to the html. Runs crm_mon in the background, recording the cluster status to an HTML file The user we want to run crm_mon as The user we want to run crm_mon as How frequently should we update the cluster status (in milliseconds). For compatibility with old documentation, values less than 1000 will be treated as seconds. Update interval in milliseconds Additional options to pass to crm_mon. Eg. -n -r Extra options PID file location to ensure only one instance is running PID file Location to write HTML output to. HTML output END } ####################################################################### ClusterMon_usage() { cat </dev/null | \ grep -qE "[c]rm_mon.*${OCF_RESKEY_pidfile}" case $? in 0) exit $OCF_SUCCESS;; 1) exit $OCF_NOT_RUNNING;; *) exit $OCF_ERR_GENERIC;; esac fi fi exit $OCF_NOT_RUNNING } CheckOptions() { while getopts Vi:nrh:cdp: OPTION do case "$OPTION" in V|n|r|c|d);; i) ocf_log warn "You should not have specified the -i option, since OCF_RESKEY_update is set already!";; h) ocf_log warn "You should not have specified the -h option, since OCF_RESKEY_htmlfile is set already!";; p) ocf_log warn "You should not have specified the -p option, since OCF_RESKEY_pidfile is set already!";; *) return $OCF_ERR_ARGS;; esac case "$OCF_RESKEY_extra_options" in *--output-as*) ocf_log warn "You should not have specified the -output-as option, since OCF_RESKEY_htmlfile is set already!";; *--output-to*) ocf_log warn "You should not have specified the -output-to option, since OCF_RESKEY_htmlfile is set already!";; esac done if [ $? -ne 0 ]; then return $OCF_ERR_ARGS fi # We should have eaten all options at this stage shift $(($OPTIND -1)) if [ $# -gt 0 ]; then false else true fi } ClusterMon_validate() { # Host-specific checks if [ "$OCF_CHECK_LEVEL" = "10" ]; then # Existence of the user if [ -n "$OCF_RESKEY_user" ]; then getent passwd "$OCF_RESKEY_user" >/dev/null if [ $? -eq 0 ]; then : Yes, user exists. We can further check his permission on crm_mon if necessary else ocf_log err "The user $OCF_RESKEY_user does not exist!" exit $OCF_ERR_ARGS fi fi fi # Pidfile should be an absolute path case "$OCF_RESKEY_pidfile" in /*) ;; *) ocf_log warn "pidfile ($OCF_RESKEY_pidfile) is not an absolute path" ;; esac # Check the update interval if ocf_is_decimal "$OCF_RESKEY_update" && [ $OCF_RESKEY_update -gt 0 ]; then : else ocf_log err "Invalid update interval $OCF_RESKEY_update. It should be positive integer!" exit $OCF_ERR_ARGS fi if CheckOptions $OCF_RESKEY_extra_options; then : else ocf_log err "Invalid options $OCF_RESKEY_extra_options!" exit $OCF_ERR_ARGS fi # Htmlfile should be an absolute path case "$OCF_RESKEY_htmlfile" in /*) ;; *) ocf_log warn "htmlfile ($OCF_RESKEY_htmlfile) is not an absolute path" ;; esac echo "Validate OK" return $OCF_SUCCESS } if [ $# -ne 1 ]; then ClusterMon_usage exit $OCF_ERR_ARGS fi if [ ${OCF_RESKEY_update} -ge 1000 ]; then OCF_RESKEY_update=$(( $OCF_RESKEY_update / 1000 )) fi CMON_CMD="${HA_SBIN_DIR}/crm_mon -p \"$OCF_RESKEY_pidfile\" -d -i $OCF_RESKEY_update $OCF_RESKEY_extra_options --output-as=html --output-to=\"$OCF_RESKEY_htmlfile\"" case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) ClusterMon_start ;; stop) ClusterMon_stop ;; monitor) ClusterMon_monitor ;; validate-all) ClusterMon_validate ;; usage|help) ClusterMon_usage exit $OCF_SUCCESS ;; *) ClusterMon_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? -# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/agents/ocf/Dummy.in b/agents/ocf/Dummy.in index ceaafad03c..d8ffccaebc 100755 --- a/agents/ocf/Dummy.in +++ b/agents/ocf/Dummy.in @@ -1,323 +1,323 @@ #!/bin/sh # # ocf:pacemaker:Dummy resource agent # # Original copyright 2004 SUSE LINUX AG, Lars Marowsky-Bre # Later changes copyright 2008-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # The Dummy agent is intended primarily for testing, and has various options to # make actions intentionally fail or take a long time. It may also be used as a # template for resource agent writers, in which case: # # - Replace all occurrences of "dummy" and "Dummy" with your agent name. # - Update the meta-data appropriately for your agent, such as the description # and supported options. Pay particular attention to the timeouts specified in # the actions section; they should be meaningful for the kind of service the # agent manages. They should be the minimum advised timeouts, but shouldn't # try to cover _all_ possible instances. So, try to be neither overly generous # nor too stingy, but moderate. The minimum timeouts should never be below 10 # seconds. # - Don't copy the stuff here that is just for testing, such as the # sigterm_handler() or dump_env(). # - You don't need the state file stuff here if you have a better way of # determining whether your service is running. It's only useful for agents # such as health agents that don't actually correspond to a running service. # - Implement the actions appropriately for your service. Your monitor action # must differentiate correctly between running, not running, and failed (that # is THREE states, not just yes/no). The migrate_to, migrate_from, and reload # actions are optional and not appropriate to all services. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} # Explicitly list all environment variables used, to make static analysis happy : ${OCF_RESKEY_fake:="dummy"} : ${OCF_RESKEY_op_sleep:=0} : ${OCF_RESKEY_CRM_meta_interval:=0} : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} : ${OCF_RESKEY_envfile:=""} : ${OCF_RESKEY_fail_start_on:=""} : ${OCF_RESKEY_migrate_source:=""} : ${OCF_RESKEY_migrate_target:=""} : ${OCF_RESKEY_envfile:=""} : ${OCF_RESKEY_state:=""} ####################################################################### meta_data() { cat < 1.1 This is a dummy OCF resource agent. It does absolutely nothing except keep track of whether it is running or not, and can be configured so that actions fail or take a long time. Its purpose is primarily for testing, and to serve as a template for resource agent writers. Example stateless resource agent Location to store the resource state in. State file Fake password field Password Fake attribute that can be changed to cause an agent reload Fake attribute that can be changed to cause an agent reload Number of seconds to sleep during operations. This can be used to test how the cluster reacts to operation timeouts. Operation sleep duration in seconds. Start, migrate_from, and reload-agent actions will return failure if running on the host specified here, but the resource will run successfully anyway (future monitor calls will find it running). This can be used to test on-fail=ignore. Report bogus start failure on specified host If this is set, the environment will be dumped to this file for every call. Environment dump file END } ####################################################################### # don't exit on TERM, to test that pacemaker-execd makes sure that we do exit trap sigterm_handler TERM sigterm_handler() { ocf_log info "They use TERM to bring us down. No such luck." # Since we're likely going to get KILLed, clean up any monitor # serialization in progress, so the next probe doesn't return an error. rm -f "${VERIFY_SERIALIZED_FILE}" return } dummy_usage() { cat <> "${OCF_RESKEY_envfile}" fi } dummy_start() { dummy_monitor DS_RETVAL=$? if [ $DS_RETVAL -eq $OCF_SUCCESS ]; then if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then DS_RETVAL=$OCF_ERR_GENERIC fi return $DS_RETVAL fi touch "${OCF_RESKEY_state}" DS_RETVAL=$? if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then DS_RETVAL=$OCF_ERR_GENERIC fi return $DS_RETVAL } dummy_stop() { dummy_monitor --force if [ $? -eq $OCF_SUCCESS ]; then rm "${OCF_RESKEY_state}" fi rm -f "${VERIFY_SERIALIZED_FILE}" return $OCF_SUCCESS } dummy_monitor() { if [ $OCF_RESKEY_op_sleep -ne 0 ]; then if [ "$1" = "" ] && [ -f "${VERIFY_SERIALIZED_FILE}" ]; then # two monitor ops have occurred at the same time. # This verifies a condition in pacemaker-execd regression tests. ocf_log err "$VERIFY_SERIALIZED_FILE exists already" ocf_exit_reason "alternate universe collision" return $OCF_ERR_GENERIC fi touch "${VERIFY_SERIALIZED_FILE}" sleep ${OCF_RESKEY_op_sleep} rm "${VERIFY_SERIALIZED_FILE}" fi if [ -f "${OCF_RESKEY_state}" ]; then # Multiple monitor levels are defined to support various tests case "$OCF_CHECK_LEVEL" in 10) # monitor level with delay, useful for testing timeouts sleep 30 ;; 20) # monitor level that fails intermittently n=$(expr "$(dd if=/dev/urandom bs=1 count=1 2>/dev/null | od | head -1 | cut -f2 -d' ')" % 5) if [ $n -eq 1 ]; then ocf_exit_reason "smoke detected near CPU fan" return $OCF_ERR_GENERIC fi ;; 30) # monitor level that always fails ocf_exit_reason "hyperdrive quota reached" return $OCF_ERR_GENERIC ;; 40) # monitor level that returns error code from state file rc=$(cat ${OCF_RESKEY_state}) [ -n "$rc" ] && ocf_exit_reason "CPU ejected. Observed leaving the Kronosnet galaxy at $rc times the speed of light." && return $rc ;; *) ;; esac return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } dummy_validate() { # If specified, is op_sleep an integer? case "$OCF_RESKEY_op_sleep" in ""|*[0-9]*) ;; *) return $OCF_ERR_CONFIGURED ;; esac # Host-specific checks if [ "$OCF_CHECK_LEVEL" = "10" ]; then # Is the state directory writable? state_dir=$(dirname "$OCF_RESKEY_state") [ -d "$state_dir" ] && [ -w "$state_dir" ] && [ -x "$state_dir" ] if [ $? -ne 0 ]; then return $OCF_ERR_ARGS fi # If specified, is the environment file directory writable? if [ -n "$OCF_RESKEY_envfile" ]; then envfile_dir=$(dirname "$OCF_RESKEY_envfile") [ -d "$envfile_dir" ] && [ -w "$envfile_dir" ] && [ -x "$envfile_dir" ] if [ $? -ne 0 ]; then return $OCF_ERR_ARGS fi fi fi return $OCF_SUCCESS } if [ -z "$OCF_RESKEY_state" ]; then OCF_RESKEY_state="${HA_VARRUN%%/}/Dummy-${OCF_RESOURCE_INSTANCE}.state" if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then # Strip off the trailing clone marker (note + is not portable in sed) OCF_RESKEY_state=$(echo $OCF_RESKEY_state | sed s/:[0-9][0-9]*\.state/.state/) fi fi VERIFY_SERIALIZED_FILE="${OCF_RESKEY_state}.serialized" dump_env case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) dummy_start;; stop) dummy_stop;; monitor) dummy_monitor;; migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}." dummy_stop ;; migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}." dummy_start ;; reload) ocf_log debug "Reloading $OCF_RESOURCE_INSTANCE (service)" exit $OCF_SUCCESS ;; reload-agent) ocf_log err "Reloading $OCF_RESOURCE_INSTANCE (agent)" dummy_start ;; validate-all) dummy_validate;; usage|help) dummy_usage exit $OCF_SUCCESS ;; *) dummy_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc -# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/agents/ocf/HealthCPU.in b/agents/ocf/HealthCPU.in index 1a691a98b6..b3a4023679 100755 --- a/agents/ocf/HealthCPU.in +++ b/agents/ocf/HealthCPU.in @@ -1,221 +1,221 @@ #!/bin/sh # # ocf:pacemaker:HealthCPU resource agent # # Copyright 2004-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # Measures CPUs idling and writes #health-cpu status into the CIB # ################################ # # TODO: Enter default values # Error handling in getting uptime # ################################## ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} : ${OCF_RESKEY_dampening:="30s"} ####################################################################### meta_data() { cat < 1.1 System health agent that measures the CPU idling and updates the #health-cpu attribute. System health CPU usage Location to store the resource state in. State file Lower (!) limit of idle percentage to switch the health attribute to yellow. I.e. the #health-cpu will go yellow if the %idle of the CPU falls below 50%. Lower limit for yellow health attribute Lower (!) limit of idle percentage to switch the health attribute to red. I.e. the #health-cpu will go red if the %idle of the CPU falls below 10%. Lower limit for red health attribute The time to wait (dampening) in seconds for further changes before writing The time to wait (dampening) in seconds for further changes before writing END } ####################################################################### healthcpu_usage() { cat < 1.1 System health agent that measures the CPU iowait via top and updates the #health-iowait attribute. System health based on CPU iowait measurement Location to store the resource state in. State file Upper limit of iowait percentage to switch the health attribute to yellow. I.e. the #health-iowait will go yellow if the %iowait of the CPU gets higher than 10%. Upper limit for yellow health attribute Upper limit of iowait percentage to switch the health attribute to red. I.e. the #health-iowait will go red if the %iowait of the CPU get higher than 15%. Upper limit for red health attribute The time to wait (dampening) in seconds for further changes before writing The time to wait (dampening) in seconds for further changes before writing END } ####################################################################### agent_usage() { cat < 1.1 System health agent that checks the S.M.A.R.T. status of the given drives and updates the #health-smart attribute. SMART health status Location to store the resource state in. State file The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". Drives to check The device type(s) to assume for the drive(s) being tested as a SPACE separated list. Device types Lower limit of the temperature in deg C of the drive(s). Below this limit the status of #health-smart will be red. Lower limit for the red smart attribute Upper limit of the temperature if deg C of the drives(s). If the drive reports a temperature higher than this value the status of #health-smart will be red. Upper limit for red smart attribute Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow. Deg C below/above the upper limits for yellow smart attribute The path to the smartctl program, used for querying device health. The path to the smartctl program The time to wait (dampening) for further changes to occur Dampening interval END } ####################################################################### check_temperature() { if [ -n "$1" ]; then if [ $1 -lt ${lower_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C" attrd_updater -n "#health-smart" -B "red" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -gt ${upper_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C" attrd_updater -n "#health-smart" -B "red" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -lt ${lower_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C" attrd_updater -n "#health-smart" -B "yellow" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -gt ${upper_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C" attrd_updater -n "#health-smart" -B "yellow" -d "${OCF_RESKEY_dampen}" return 1 fi fi } common_checks() { # Each item in $OCF_RESKEY_drives must have a corresponding item in # $OCF_RESKEY_devices with the device type. Alternately, # $OCF_RESKEY_devices can be empty. drives_len=${#DRIVES[@]} devices_len=${#DEVICES[@]} if [ "${drives_len}" -ne "${devices_len}" ] && [ "${devices_len}" -gt 0 ]; then ocf_log err "OCF_RESKEY_devices must be empty or the same length as OCF_RESKEY_drives." exit $OCF_ERR_ARGS fi # Each item in $OCF_RESKEY_drives must look like a device node. for d in "${DRIVES[@]}"; do if [[ "$d" != /dev/* ]]; then ocf_log err "Device in OCF_RESKEY_devices does not look like a device node: $d" exit $OCF_ERR_ARGS fi done } init_smart() { #Set temperature defaults if [ -z "${OCF_RESKEY_temp_warning}" ]; then yellow_threshold=5 else yellow_threshold=${OCF_RESKEY_temp_warning} fi if [ -z "${OCF_RESKEY_temp_lower_limit}" ] ; then lower_red_limit=0 else lower_red_limit=${OCF_RESKEY_temp_lower_limit} fi lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold})) if [ -z "${OCF_RESKEY_temp_upper_limit}" ] ; then upper_red_limit=60 else upper_red_limit=${OCF_RESKEY_temp_upper_limit} fi upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold})) for ndx in ${!DRIVES[*]}; do DRIVE=${DRIVES[$ndx]} if [ -n "${OCF_RESKEY_devices}" ]; then DEVICE=${DEVICES[$ndx]} "${OCF_RESKEY_smartctl}" -d "${DEVICE}" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi else "${OCF_RESKEY_smartctl}" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi fi done } HealthSMART_usage() { cat < 1.1 This is an example resource agent that implements Promoted and Unpromoted roles Example stateful resource agent Location to store the resource state in State file If this is set, the environment will be dumped to this file for every call. Environment dump file The notify action will sleep for this many seconds before returning, to simulate a long-running notify. Notify delay in seconds END exit $OCF_SUCCESS } ####################################################################### stateful_usage() { cat < where is one of: meta-data validate-all start stop monitor promote demote notify reload-agent This conforms to the OCF Resource Agent API version 1.1, and expects to have OCF-compliant environment variables provided. END exit $1 } stateful_update() { echo $1 > "${OCF_RESKEY_state}" } stateful_check_state() { target="$1" if [ -f "${OCF_RESKEY_state}" ]; then state=$(cat "${OCF_RESKEY_state}") if [ "$target" = "$state" ]; then return 0 fi else if [ -z "$target" ]; then return 0 fi fi return 1 } dump_env() { if [ "${OCF_RESKEY_envfile}" != "" ]; then echo "### ${__OCF_ACTION} @ $(date) ### $(env | sort) ###" >> "${OCF_RESKEY_envfile}" fi } set_promotion_score() { "${HA_SBIN_DIR}/crm_attribute" --promotion -v "$1" } stateful_start() { stateful_check_state Promoted if [ $? -eq 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_PROMOTED fi stateful_update Unpromoted set_promotion_score $SCORE_UNPROMOTED return 0 } stateful_demote() { stateful_check_state if [ $? -eq 0 ]; then # CRM Error - Should never happen return $OCF_NOT_RUNNING fi stateful_update Unpromoted set_promotion_score $SCORE_UNPROMOTED return 0 } stateful_promote() { stateful_check_state if [ $? -eq 0 ]; then return $OCF_NOT_RUNNING fi stateful_update Promoted set_promotion_score $SCORE_PROMOTED return 0 } stateful_stop() { "${HA_SBIN_DIR}/crm_attribute" --promotion -D stateful_check_state Promoted if [ $? -eq 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_PROMOTED fi if [ -f "${OCF_RESKEY_state}" ]; then rm "${OCF_RESKEY_state}" fi return 0 } stateful_monitor() { # for testing if [ -f "${OCF_RESKEY_state}.rc" ]; then rc=$(cat "${OCF_RESKEY_state}.rc") ocf_exit_reason "$rc GB redirected to /dev/null" exit $rc fi stateful_check_state Promoted if [ $? -eq 0 ]; then if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then # Restore the promotion score during probes set_promotion_score $SCORE_PROMOTED fi return $OCF_RUNNING_PROMOTED fi stateful_check_state Unpromoted if [ $? -eq 0 ]; then if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then # Restore the promotion score during probes set_promotion_score $SCORE_UNPROMOTED fi return $OCF_SUCCESS fi if [ -f "${OCF_RESKEY_state}" ]; then echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents" cat "${OCF_RESKEY_state}" return $OCF_ERR_GENERIC fi return 7 } stateful_notify() { if [ "${OCF_RESKEY_notify_delay}" != "0" ]; then sleep "${OCF_RESKEY_notify_delay}" fi return $OCF_SUCCESS } stateful_validate() { exit $OCF_SUCCESS } stateful_reload_agent() { return $OCF_SUCCESS } if [ -z "$OCF_RESKEY_state" ]; then if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then state="${HA_VARRUN%%/}/Stateful-${OCF_RESOURCE_INSTANCE}.state" # Strip off the trailing clone marker OCF_RESKEY_state=$(echo $state | sed s/:[0-9][0-9]*\.state/.state/) else OCF_RESKEY_state="${HA_VARRUN%%/}/Stateful-${OCF_RESOURCE_INSTANCE}.state" fi fi dump_env case "$__OCF_ACTION" in meta-data) meta_data;; start) stateful_start;; promote) stateful_promote;; demote) stateful_demote;; notify) stateful_notify ;; stop) stateful_stop;; monitor) stateful_monitor;; validate-all) stateful_validate;; reload-agent) stateful_reload_agent;; usage|help) stateful_usage $OCF_SUCCESS;; *) stateful_usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? -# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/agents/ocf/SysInfo.in b/agents/ocf/SysInfo.in index d3a18f9cdc..3464f77aae 100755 --- a/agents/ocf/SysInfo.in +++ b/agents/ocf/SysInfo.in @@ -1,409 +1,409 @@ #!@BASH_PATH@ # # ocf:pacemaker:SysInfo resource agent # # Original copyright 2004 SUSE LINUX AG, Lars Marowsky-Bre # Later changes copyright 2008-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # This agent records (in the CIB) various attributes of a node # ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} # Explicitly list all environment variables used, to make static analysis happy : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/SysInfo-${OCF_RESOURCE_INSTANCE}"} : ${OCF_RESKEY_disk_unit:="G"} : ${OCF_RESKEY_clone:="0"} : ${OCF_RESKEY_disks:=""} : ${OCF_RESKEY_delay:=""} : ${OCF_RESKEY_min_disk_free:=""} ####################################################################### meta_data() { cat < 1.1 This is a SysInfo Resource Agent. It records (in the CIB) various attributes of a node Sample Linux output: arch: i686 os: Linux-2.4.26-gentoo-r14 free_swap: 1999 cpu_info: Intel(R) Celeron(R) CPU 2.40GHz cpu_speed: 4771.02 cpu_cores: 1 cpu_load: 0.00 ram_total: 513 ram_free: 117 root_free: 2.4 #health_disk: red Sample Darwin output: arch: i386 os: Darwin-8.6.2 cpu_info: Intel Core Duo cpu_speed: 2.16 cpu_cores: 2 cpu_load: 0.18 ram_total: 2016 ram_free: 787 root_free: 13 #health_disk: green Units: free_swap: MB ram_*: MB cpu_speed (Linux): bogomips cpu_speed (Darwin): GHz *_free: GB (or user-defined: disk_unit) SysInfo resource agent PID file PID file Interval to allow values to stabilize Dampening Delay Filesystems or Paths to be queried for free disk space as a SPACE separated list - e.g "/dev/sda1 /tmp". Results will be written to an attribute with leading slashes removed, and other slashes replaced with underscore, and the word 'free' appended - e.g for /dev/sda1 it would be 'dev_sda1_free'. Note: The root filesystem '/' is always queried to an attribute named 'root_free' List of Filesytems/Paths to query for free disk space Unit to report disk free space in. Can be one of: B, K, M, G, T, P (case-insensitive) Unit to report disk free space in The amount of free space required in monitored disks. If any of the monitored disks has less than this amount of free space, , with the node attribute "#health_disk" changing to "red", all resources will move away from the node. Set the node-health-strategy property appropriately for this to take effect. If the unit is not specified, it defaults to disk_unit. minimum disk free space required END } ####################################################################### UpdateStat() { name="$1"; shift value="$*" printf "%s:\t%s\n" "$name" "$value" if [ "$__OCF_ACTION" = "start" ] ; then "${HA_SBIN_DIR}/attrd_updater" -d ${OCF_RESKEY_delay} -S status \ -n $name -B "$value" else "${HA_SBIN_DIR}/attrd_updater" -d ${OCF_RESKEY_delay} -S status \ -n $name -v "$value" fi } SysInfoStats() { local DISK_STATUS="green" UpdateStat arch "$(uname -m)" UpdateStat os "$(uname -s)-$(uname -r)" case $(uname -s) in "Darwin") mem=$(top -l 1 | grep Mem: | awk '{print $10}') mem_used=$(top -l 1 | grep Mem: | awk '{print $8}') mem=$(SysInfo_mem_units "$mem") mem_used=$(SysInfo_mem_units "$mem_used") mem_total=$(expr $mem_used + $mem) cpu_type=$(system_profiler SPHardwareDataType | awk -F': ' '/^CPU Type/ {print $2; exit}') cpu_speed=$(system_profiler SPHardwareDataType | awk -F': ' '/^CPU Speed/ {print $2; exit}') cpu_cores=$(system_profiler SPHardwareDataType | awk -F': ' '/^Number Of/ {print $2; exit}') cpu_load=$(uptime | awk -F 'load average: ' '{ print $2 }' | awk -F ', ' '{ print $2 }') ;; "FreeBSD") cpu_type=$(sysctl -in hw.model) cpu_speed=$(sysctl -in dev.cpu.0.freq) cpu_cores=$(sysctl -in hw.ncpu) cpu_load=$(sysctl -in vm.loadavg | awk '{ print $4 }') free_pages=$(sysctl -in vm.stats.vm.v_free_count) page_count=$(sysctl -in vm.stats.vm.v_page_count) page_size=$(sysctl -in vm.stats.vm.v_page_size) mem=$(expr $free_pages \* $page_size / 1024 / 1024)M mem_total=$(expr $page_count \* $page_size / 1024 / 1024)M ;; "Linux") if [ -f /proc/cpuinfo ]; then cpu_type=$(awk -F': ' '/model name/ {print $2; exit}' /proc/cpuinfo) cpu_speed=$(awk -F': ' '/bogomips/ {print $2; exit}' /proc/cpuinfo) cpu_cores=$(grep "^processor" /proc/cpuinfo | wc -l) fi cpu_load=$(uptime | awk -F 'load average: ' '{ print $2 }' | awk -F ', ' '{ print $2 }') if [ -f /proc/meminfo ]; then # meminfo results are in kB mem=$(grep "SwapFree" /proc/meminfo | awk '{print $2"k"}') if [ -n "$mem" ]; then UpdateStat free_swap "$(SysInfo_mem_units "$mem")" fi mem=$(grep "Inactive" /proc/meminfo | awk '{print $2"k"}') mem_total=$(grep "MemTotal" /proc/meminfo | awk '{print $2"k"}') else mem=$(top -n 1 | grep Mem: | awk '{print $7}') fi ;; *) esac if [ -n "$cpu_type" ]; then UpdateStat cpu_info "$cpu_type" fi if [ -n "$cpu_speed" ]; then UpdateStat cpu_speed "$cpu_speed" fi if [ -n "$cpu_cores" ]; then UpdateStat cpu_cores "$cpu_cores" fi if [ -n "$cpu_load" ]; then UpdateStat cpu_load "$cpu_load" fi if [ -n "$mem" ]; then # Massage the memory values UpdateStat ram_total "$(SysInfo_mem_units "$mem_total")" UpdateStat ram_free "$(SysInfo_mem_units "$mem")" fi # Portability notes: # o tail: explicit "-n" not available in Solaris; instead simplify # 'tail -n ' to the equivalent 'tail -'. for disk in "/" ${OCF_RESKEY_disks}; do unset disk_free disk_label disk_free=$(df -h "${disk}" | tail -1 | awk '{print $4}') if [ -n "$disk_free" ]; then disk_label=$(echo $disk | sed -e 's#^/$#root#;s#^/*##;s#/#_#g') disk_free=$(SysInfo_hdd_units "$disk_free") UpdateStat "${disk_label}_free" $disk_free if [ -n "$MIN_FREE" ] && [ $disk_free -le $MIN_FREE ]; then DISK_STATUS="red" fi fi done UpdateStat "#health_disk" "$DISK_STATUS" } SysInfo_megabytes() { # Size in megabytes echo $1 | awk '{ n = $0; sub( /[0-9]+(.[0-9]+)?/, "" ); if ( $0 == "" ) { $0 = "G" }; # Do not change previous behavior `if ($0 == "G" || $0 == "") { n *= 1024 };` split( n, a, $0 ); n = a[1]; if ( /^[pP]i?[bB]?/ ) { n *= 1024 * 1024 * 1024 }; if ( /^[tT]i?[bB]?/ ) { n *= 1024 * 1024 }; if ( /^[gG]i?[bB]?/ ) { n *= 1024 }; if ( /^[mM]i?[bB]?/ ) { n *= 1 }; if ( /^[kK]i?[bB]?/ ) { n /= 1024 }; if ( /^[bB]i?/ ) { n /= 1024 * 1024 }; printf "%d\n", n }' # Intentionally round to an integer } SysInfo_mem_units() { mem="$1" if [ -z "$1" ]; then return fi mem=$(SysInfo_megabytes "$1") # Round to the next multiple of 50 r=$(($mem % 50)) if [ $r -ne 0 ]; then mem=$(($mem + 50 - $r)) fi echo $mem } SysInfo_hdd_units() { # Defauts to size in gigabytes case "$OCF_RESKEY_disk_unit" in [Pp]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024 / 1024));; [Tt]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024));; [Gg]) echo $(($(SysInfo_megabytes "$1") / 1024));; [Mm]) echo "$(SysInfo_megabytes "$1")" ;; [Kk]) echo $(($(SysInfo_megabytes "$1") * 1024));; [Bb]) echo $(($(SysInfo_megabytes "$1") * 1024 * 1024));; *) ocf_log err "Invalid value for disk_unit: $OCF_RESKEY_disk_unit" echo $(($(SysInfo_megabytes "$1") / 1024));; esac } SysInfo_usage() { cat < "$OCF_RESKEY_pidfile" SysInfoStats exit $OCF_SUCCESS } SysInfo_stop() { rm "$OCF_RESKEY_pidfile" exit $OCF_SUCCESS } SysInfo_monitor() { if [ -f "$OCF_RESKEY_pidfile" ]; then clone=$(cat "$OCF_RESKEY_pidfile") fi if [ -z "$clone" ]; then rm "$OCF_RESKEY_pidfile" exit $OCF_NOT_RUNNING elif [ "$clone" = "$OCF_RESKEY_clone" ]; then SysInfoStats exit $OCF_SUCCESS elif ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then SysInfoStats exit $OCF_SUCCESS fi exit $OCF_NOT_RUNNING } SysInfo_reload_agent() { # No action required :; } SysInfo_validate() { # If specified, is min_disk_free a non-negative integer followed by a disk unit? pat="[1-9][0-9]*[KkBbMmGgTtPp]?" if [[ ! $OCF_RESKEY_min_disk_free =~ $pat ]]; then return $OCF_ERR_CONFIGURED fi } if [ $# -ne 1 ]; then SysInfo_usage exit $OCF_ERR_ARGS fi if [ -n "${OCF_RESKEY_delay}" ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" else OCF_RESKEY_delay="-d 0" fi MIN_FREE="" if [ -n "$OCF_RESKEY_min_disk_free" ]; then ocf_is_decimal "$OCF_RESKEY_min_disk_free" && OCF_RESKEY_min_disk_free="$OCF_RESKEY_min_disk_free$OCF_RESKEY_disk_unit" MIN_FREE=$(SysInfo_hdd_units $OCF_RESKEY_min_disk_free) fi case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) SysInfo_start ;; stop) SysInfo_stop ;; monitor) SysInfo_monitor ;; reload-agent) SysInfo_reload_agent ;; validate-all) SysInfo_validate ;; usage|help) SysInfo_usage exit $OCF_SUCCESS ;; *) SysInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? -# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/agents/ocf/attribute.in b/agents/ocf/attribute.in index ced807e3d3..2eb04c1f00 100755 --- a/agents/ocf/attribute.in +++ b/agents/ocf/attribute.in @@ -1,241 +1,241 @@ #!/bin/sh # # ocf:pacemaker:attribute resource agent # # Copyright 2016-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # USAGE="Usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data} Expects to have a fully populated OCF RA-compliant environment set." # If the OCF helper funtions aren't available, we can still show metadata. : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} [ -r "${OCF_FUNCTIONS}" ] && . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} # Ensure certain variables are set and not empty : ${HA_VARRUN:="@runstatedir@"} : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} : ${OCF_RESOURCE_INSTANCE:="RESOURCENAME"} DEFAULT_STATE_FILE="${HA_VARRUN%%/}/opa-${OCF_RESOURCE_INSTANCE}.state" if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then # Strip off any trailing clone marker (note + is not portable in sed) DEFAULT_STATE_FILE=$(echo "$DEFAULT_STATE_FILE" | sed s/:[0-9][0-9]*\.state/.state/) fi DEFAULT_ATTR_NAME="opa-${OCF_RESOURCE_INSTANCE}" DEFAULT_ACTIVE_VALUE="1" DEFAULT_INACTIVE_VALUE="0" : ${OCF_RESKEY_state:="$DEFAULT_STATE_FILE"} : ${OCF_RESKEY_name:="$DEFAULT_ATTR_NAME"} # If the user did not set a value, use the default. If the user explicitly set # a value to the empty string, use that (-z "${V+x}" tests whether $V was set). if [ -z "${OCF_RESKEY_active_value+x}" ]; then OCF_RESKEY_active_value="$DEFAULT_ACTIVE_VALUE" fi if [ -z "${OCF_RESKEY_inactive_value+x}" ]; then OCF_RESKEY_inactive_value="$DEFAULT_INACTIVE_VALUE" fi usage() { USAGE_RC=$1 cat < 1.1 This resource agent controls a node attribute for the node it's running on. It sets the attribute one way when started, and another way when stopped, according to the configuration parameters. Manages a node attribute Full path of a temporary file to store the resource state in State file Name of node attribute to manage Attribute name Value to use for node attribute when resource becomes active (empty string is discouraged, because monitor cannot distinguish it from a query error) Attribute value when active Value to use for node attribute when resource becomes inactive Attribute value when inactive END return $OCF_SUCCESS } validate() { # Host-specific checks if [ "$OCF_CHECK_LEVEL" = "10" ]; then VALIDATE_DIR=$(dirname "${OCF_RESKEY_state}") if [ ! -d "$VALIDATE_DIR" ]; then ocf_exit_reason "state file '$OCF_RESKEY_state' does not have a valid directory" return $OCF_ERR_PERM fi if [ ! -w "$VALIDATE_DIR" ] || [ ! -x "$VALIDATE_DIR" ]; then ocf_exit_reason "insufficient privileges on directory of state file '$OCF_RESKEY_state'" return $OCF_ERR_PERM fi fi if [ "$OCF_RESKEY_active_value" = "$OCF_RESKEY_inactive_value" ]; then ocf_exit_reason "active value '%s' must be different from inactive value '%s'" \ "$OCF_RESKEY_active_value" "$OCF_RESKEY_inactive_value" return $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } get_attribute() { GET_LINE=$(attrd_updater -n "$OCF_RESKEY_name" -Q 2>/dev/null) if [ $? -ne 0 ]; then echo "" else echo "$GET_LINE" | sed -e "s/.* value=\"\(.*\)\"$/\1/" fi } set_attribute() { attrd_updater -n "$OCF_RESKEY_name" -U "$1" 2>/dev/null # TODO if above call is async, loop until get_attribute returns expected value } check_attribute() { CHECK_VALUE=$(get_attribute) CHECK_REASON="" if [ ! -f "$OCF_RESKEY_state" ]; then if [ "$CHECK_VALUE" != "" ] && [ "$CHECK_VALUE" != "$OCF_RESKEY_inactive_value" ]; then CHECK_REASON="Node attribute $OCF_RESKEY_name='$CHECK_VALUE' differs from expected value '$OCF_RESKEY_inactive_value'" return $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING fi if [ "$CHECK_VALUE" != "$OCF_RESKEY_active_value" ]; then CHECK_REASON="Node attribute $OCF_RESKEY_name='$CHECK_VALUE' differs from expected value '$OCF_RESKEY_active_value'" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } monitor() { check_attribute MONITOR_RC=$? if [ $MONITOR_RC -eq $OCF_ERR_GENERIC ]; then ocf_exit_reason "$CHECK_REASON" fi return $MONITOR_RC } start() { check_attribute if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi touch "${OCF_RESKEY_state}" 2>/dev/null if [ $? -ne 0 ]; then ocf_exit_reason "Unable to manage state file $OCF_RESKEY_state" return $OCF_ERR_GENERIC fi set_attribute "${OCF_RESKEY_active_value}" if [ $? -ne 0 ]; then rm -f "${OCF_RESKEY_state}" ocf_exit_reason "Unable to set node attribute $OCF_RESKEY_name='$OCF_RESKEY_active_value'" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } stop() { check_attribute if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi rm -f ${OCF_RESKEY_state} set_attribute "${OCF_RESKEY_inactive_value}" if [ $? -ne 0 ]; then ocf_exit_reason "Unable to set node attribute $OCF_RESKEY_name='$OCF_RESKEY_inactive_value'" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data ;; start) start ;; stop) stop ;; monitor) monitor ;; # We don't do anything special for live migration, but we support it so that # other resources that live migrate can depend on this one. migrate_to) stop ;; migrate_from) start ;; reload) start ;; validate-all) validate ;; usage|help) usage $OCF_SUCCESS ;; *) usage $OCF_ERR_UNIMPLEMENTED ;; esac exit $? -# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/agents/ocf/controld.in b/agents/ocf/controld.in index c38c0d8416..2e7a206b69 100644 --- a/agents/ocf/controld.in +++ b/agents/ocf/controld.in @@ -1,299 +1,299 @@ #!/bin/sh # # ocf:pacemaker:controld resource agent # # Copyright 2008-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # Manages the DLM controld process # ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} # Explicitly list all environment variables used, to make static analysis happy : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} : ${OCF_RESKEY_allow_stonith_disabled:="false"} : ${OCF_RESKEY_sctp:="false"} : ${OCF_RESOURCE_INSTANCE:=""} case "$OCF_RESOURCE_INSTANCE" in *[dD][lL][mM]*) : ${OCF_RESKEY_args=-s 0} : ${OCF_RESKEY_daemon:=dlm_controld} ;; *) : ${OCF_RESKEY_args=-s 0} : ${OCF_RESKEY_daemon:=dlm_controld} esac ####################################################################### if [ -e "$OCF_ROOT/resource.d/heartbeat/controld" ]; then ocf_log info "Using heartbeat controld agent" "$OCF_ROOT/resource.d/heartbeat/controld" "$1" exit $? fi meta_data() { cat < 1.1 This Resource Agent can control the dlm_controld services needed by cluster-aware file systems. It assumes that dlm_controld is in your default PATH. In most cases, it should be run as an anonymous clone. DLM Agent for cluster file systems Any additional options to start the dlm_controld service with DLM Options The daemon to start - supports dlm_controld The daemon to start Allow DLM start-up even if STONITH/fencing is disabled in the cluster. Setting this option to true will cause cluster malfunction and hangs on fail-over for DLM clients that require fencing (such as GFS2, OCFS2, and cLVM2). This option is advanced use only. Allow start-up even without STONITH/fencing END } ####################################################################### CONFIGFS_DIR="/sys/kernel/config" DLM_CONFIGFS_DIR="${CONFIGFS_DIR}/dlm" DLM_SYSFS_DIR="/sys/kernel/dlm" controld_usage() { cat <&1) if [ $? -eq 0 ]; then if [ -n "$CUL_TMP" ]; then ocf_log err "Uncontrolled lockspace exists, system must reboot. Executing suicide fencing" stonith_admin --reboot="$(crm_node -n)" --tag controld exit $OCF_ERR_GENERIC fi fi } controld_start() { controld_monitor; rc=$? case $rc in "$OCF_SUCCESS") return $OCF_SUCCESS;; "$OCF_NOT_RUNNING") ;; *) return $OCF_ERR_GENERIC;; esac # Ensure @runstatedir@/cluster exists [ -d "@runstatedir@/cluster" ] || mkdir "@runstatedir@/cluster" # Ensure configfs is mounted if [ ! -e "$CONFIGFS_DIR" ]; then modprobe configfs if [ ! -e "$CONFIGFS_DIR" ]; then ocf_log err "$CONFIGFS_DIR not available" return $OCF_ERR_INSTALLED fi fi mount -t configfs | grep " $CONFIGFS_DIR " >/dev/null 2>/dev/null if [ $? -ne 0 ]; then mount -t configfs none "$CONFIGFS_DIR" fi # Ensure DLM is available if [ ! -e "$DLM_CONFIGFS_DIR" ]; then modprobe dlm if [ ! -e "$DLM_CONFIGFS_DIR" ]; then ocf_log err "$DLM_CONFIGFS_DIR not available" return $OCF_ERR_INSTALLED fi fi if ! ocf_is_true "$OCF_RESKEY_allow_stonith_disabled" && \ ! ocf_is_true "$(crm_attribute --type=crm_config --name=stonith-enabled --query --quiet --default=true)"; then ocf_log err "The cluster property stonith-enabled may not be deactivated to use the DLM" return $OCF_ERR_CONFIGURED fi # If no-quorum-policy not set, or not set as freeze, give a warning crm_attribute --type=crm_config --name=no-quorum-policy --query|grep value=freeze >/dev/null 2>/dev/null if [ $? -ne 0 ]; then ocf_log warn "The DLM cluster best practice suggests to set the cluster property \"no-quorum-policy=freeze\"" fi "${OCF_RESKEY_daemon}" $OCF_RESKEY_args while true do sleep 1 controld_monitor; rc=$? case $rc in "$OCF_SUCCESS") CS_ADDR_LIST="$(cat "${DLM_CONFIGFS_DIR}"/cluster/comms/*/addr_list 2>/dev/null)" if [ $? -eq 0 ] && [ -n "$CS_ADDR_LIST" ]; then return $OCF_SUCCESS fi ;; "$OCF_NOT_RUNNING") return $OCF_NOT_RUNNING ;; *) return $OCF_ERR_GENERIC ;; esac ocf_log debug "Waiting for ${OCF_RESKEY_daemon} to be ready" done } controld_stop() { controld_monitor; rc=$? if [ $rc -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi killall -TERM "${OCF_RESKEY_daemon}"; rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC fi rc=$OCF_SUCCESS while [ $rc -eq $OCF_SUCCESS ]; do controld_monitor; rc=$? sleep 1 done if [ $rc -eq $OCF_NOT_RUNNING ]; then rc=$OCF_SUCCESS fi return $rc } controld_monitor() { killall -0 ${OCF_RESKEY_daemon} >/dev/null 2>&1 ; CM_RC=$? case $CM_RC in 0) smw=$(dlm_tool status -v | grep "stateful_merge_wait=" | cut -d= -f2) if [ -n "$smw" ] && [ $smw -eq 1 ]; then ocf_log err "DLM status is: stateful_merge_wait" CM_RC=$OCF_ERR_GENERIC elif [ -z "$smw" ] && dlm_tool ls | grep -q "wait fencing" && \ ! stonith_admin -H '*' --output-as xml | grep -q "extended-status=\"pending\""; then ocf_log err "DLM status is: wait fencing" CM_RC=$OCF_ERR_GENERIC else CM_RC=$OCF_SUCCESS fi ;; 1) CM_RC=$OCF_NOT_RUNNING;; *) CM_RC=$OCF_ERR_GENERIC;; esac # if the dlm is not successfully running, but # dlm lockspace bits are left over, we self must fence. if [ $CM_RC -ne $OCF_SUCCESS ]; then check_uncontrolled_locks fi return $CM_RC } controld_validate() { case "${OCF_RESKEY_CRM_meta_globally_unique}" in [Tt][Rr][Uu][Ee] | [Oo][Nn] | [Yy][Ee][Ss] | [Yy] | 1) msg="The globally-unique meta attribute must not be enabled for" msg="$msg $OCF_RESOURCE_INSTANCE" ocf_log err "$msg" exit $OCF_ERR_CONFIGURED ;; esac # Host-specific checks if [ "$OCF_CHECK_LEVEL" = "10" ]; then check_binary killall check_binary "${OCF_RESKEY_daemon}" fi return $OCF_SUCCESS } case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) controld_validate; controld_start;; stop) controld_stop;; monitor) controld_validate; controld_monitor;; validate-all) controld_validate;; usage|help) controld_usage exit $OCF_SUCCESS ;; *) controld_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? exit $rc -# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/agents/ocf/ifspeed.in b/agents/ocf/ifspeed.in index 8c07c3d7ba..e5cc63ec65 100755 --- a/agents/ocf/ifspeed.in +++ b/agents/ocf/ifspeed.in @@ -1,557 +1,557 @@ #!@BASH_PATH@ # # ocf:pacemaker:ifspeed resource agent # # Copyright 2011-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # # # Record speed of a network interface as a node attribute, based on the sum of # speeds of its active (up, link detected, not blocked) underlying interfaces. # # Originally based on ocf:pacemaker:ping agent # : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} # If these aren't available, we can still show help, # which is all that is needed to build the man pages. [ -r "${OCF_FUNCTIONS}" ] && . "${OCF_FUNCTIONS}" [ -r "${OCF_FUNCTIONS_DIR}/findif.sh" ] && . "${OCF_FUNCTIONS_DIR}/findif.sh" : ${OCF_SUCCESS:=0} : ${__OCF_ACTION:=$1} FINDIF=findif # Defaults OCF_RESKEY_name_default="ifspeed" OCF_RESKEY_bridge_ports_default="detect" OCF_RESKEY_weight_base_default=1000 OCF_RESKEY_dampen_default=5 # Explicitly list all environment variables used, to make static analysis happy : ${OCF_RESKEY_name:=${OCF_RESKEY_name_default}} : ${OCF_RESKEY_bridge_ports:=${OCF_RESKEY_bridge_ports_default}} : ${OCF_RESKEY_weight_base:=${OCF_RESKEY_weight_base_default}} : ${OCF_RESKEY_dampen:=${OCF_RESKEY_dampen_default}} : ${OCF_RESKEY_iface:=""} : ${OCF_RESKEY_ip:=""} : ${OCF_RESKEY_debug:="false"} meta_data() { cat < 1.1 This agent's monitor action records the speed of a specified network interface as a node attribute. The attribute can be used in rules to prefer nodes based on network speeds. This agent can monitor physical interfaces, bonded interfaces, bridges, VLANs, or any combination thereof. For example: *) Bridge on top of one 10Gbps interface (eth2) and 802.3ad bonding (bond0) built on two 1Gbps interfaces (eth0 and eth1). *) Active-backup bonding built on top of one physical interface and one VLAN on another interface. For STP-enabled bridges, this agent tries to determine the network topology, and by default looks only on ports which are connected to an upstream switch. This can be overridden by 'bridge_ports' parameter. Active interfaces in this case are those in "forwarding" state. For balancing bonded interfaces, this agent uses 80% of the sum of the speeds of underlying "up" ports. For non-balancing bonded interfaces ("active-backup" and probably "broadcast"), only the speed of the currently active port is considered. Network interface speed monitor Name of the node attribute to set Attribute name If this is set, monitor this network interface. One of iface or ip must be set. Network interface If this is set instead of iface, monitor the interface that holds this IP address. The address may be specified in dotted-quad notation for IPv4 (for example, 192.168.1.1) or hexadecimal notation for IPv6 (for example, 2001:db8:DC28:0:0:FC57:D4C8:1FFF). One of iface or ip must be set. IPv4 or IPv6 address If set and iface is a bridge, consider these bridge ports (by default, all ports which have designated_bridge=root_id) Bridge ports Relative weight of 1Gbps in interface speed. Can be used to tune how big attribute value will be. Weight of 1Gbps The time to wait (dampening) for further changes to occur. Dampening interval Log more verbosely. Verbose logging END } usage() { cat < 1.1 Every time the monitor action is run, this resource agent records (in the CIB) the current number of nodes the host can connect to using the system fping (preferred) or ping tool. node connectivity PID file PID file The time to wait (dampening) further changes occur Dampening interval The name of the attributes to set. This is the name to be used in the constraints. Attribute name The number by which to multiply the number of connected ping nodes by Value multiplier A space separated list of ping nodes to count. Host list Number of ping attempts, per host, before declaring it dead no. of ping attempts How long, in seconds, to wait before declaring a ping lost ping timeout in seconds A catch all for any other options that need to be passed to ping. Extra Options Resource is failed if the score is less than failure_score. Default never fails. failure_score Use fping rather than ping, if found. If set to 0, fping will not be used even if present. Use fping if available Enables to use default attrd_updater verbose logging on every call. Verbose logging END } ####################################################################### ping_conditional_log() { level="$1"; shift if [ $OCF_RESKEY_debug -gt 0 ]; then ocf_log "$level" "$*" fi } ping_usage() { cat <&1); rc=$? active=$(echo "$fping_output" | grep "is alive" | wc -l) case $rc in 0) if [ $OCF_RESKEY_debug -gt 1 ]; then ping_conditional_log info "$fping_output" fi ;; 1) for h in $(echo "$fping_output" | grep "is unreachable" | awk '{print $1}'); do ping_conditional_log warn "$h is inactive: $fping_output" done ;; *) ocf_log err "Unexpected result for '$cmd' $rc: $(echo "$fping_output" | tr '\n' ';')" ;; esac return $active } ping_check() { active=0 for host in $OCF_RESKEY_host_list; do p_exe=ping case $(uname) in Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";; Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; FreeBSD) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; *) ocf_log err "Unknown host type: $(uname)"; exit $OCF_ERR_INSTALLED;; esac case "$host" in *:*) p_exe=ping6 esac ping_output=$($p_exe $p_args $OCF_RESKEY_options $host 2>&1); rc=$? case $rc in 0) active=$(expr $active + 1) if [ $OCF_RESKEY_debug -gt 1 ]; then ping_conditional_log info "$ping_output" fi ;; 1) ping_conditional_log warn "$host is inactive: $ping_output";; *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $ping_output";; esac done return $active } ping_update() { if use_fping; then fping_check active=$? else ping_check active=$? fi score=$(expr $active \* $OCF_RESKEY_multiplier) attrd_updater -n "$OCF_RESKEY_name" -B "$score" -d "$OCF_RESKEY_dampen" rc=$? case $rc in 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; esac if [ $rc -ne 0 ]; then return $rc fi if [ -n "$OCF_RESKEY_failure_score" ] && [ "$score" -lt "$OCF_RESKEY_failure_score" ]; then ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" return 1 fi return 0 } use_fping() { ocf_is_true "$OCF_RESKEY_use_fping" && have_binary fping; } # return values: # 4 IPv4 # 6 IPv6 # 0 indefinite (i.e. hostname) host_family() { case $1 in *[0-9].*[0-9].*[0-9].*[0-9]) return 4 ;; *:*) return 6 ;; *) return 0 ;; esac } # return values same as host_family plus # 99 ambiguous families hosts_family() { # For fping allow only same IP versions or hostnames family=0 for host in $OCF_RESKEY_host_list; do host_family "$host" f=$? if [ $family -ne 0 ] && [ $f -ne 0 ] && [ $f -ne $family ] ; then family=99 break fi [ $f -ne 0 ] && family=$f done return $family } integer=$(echo ${OCF_RESKEY_timeout} | $EGREP -o '[0-9]*') case "${OCF_RESKEY_timeout}" in *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=$(expr $integer / 1000);; *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=$(expr $integer \* 60);; *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=$(expr $integer \* 60 \* 60);; *) OCF_RESKEY_timeout=$integer;; esac if [ -z "${OCF_RESKEY_timeout}" ]; then if [ -n "$OCF_RESKEY_host_list" ]; then host_count=$(echo $OCF_RESKEY_host_list | awk '{print NF}') OCF_RESKEY_timeout=$(expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts) OCF_RESKEY_timeout=$(expr $OCF_RESKEY_timeout / 1100) # Convert to seconds and finish 10% early else OCF_RESKEY_timeout=5 fi fi if [ ${OCF_RESKEY_timeout} -lt 1 ]; then OCF_RESKEY_timeout=5 elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then # ping actually complains if this value is too high, 5 minutes is plenty OCF_RESKEY_timeout=300 fi if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESKEY_name}"} else : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESOURCE_INSTANCE}"} fi # Check the debug option case "${OCF_RESKEY_debug}" in true|True|TRUE|1) OCF_RESKEY_debug=1;; false|False|FALSE|0) OCF_RESKEY_debug=0;; verbose|Verbose|VERBOSE|2) OCF_RESKEY_debug=2;; *) ocf_log warn "Value for 'debug' is incorrect. Please specify 'true', 'false', or 'verbose', not: ${OCF_RESKEY_debug}" OCF_RESKEY_debug=false ;; esac case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) ping_start;; stop) ping_stop;; monitor) ping_monitor;; validate-all) ping_validate;; reload-agent) ping_reload_agent;; usage|help) ping_usage exit $OCF_SUCCESS ;; *) ping_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? -# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/agents/ocf/remote.in b/agents/ocf/remote.in index 8541d0e18e..117402e473 100755 --- a/agents/ocf/remote.in +++ b/agents/ocf/remote.in @@ -1,106 +1,106 @@ #!/bin/sh # # ocf:pacemaker:remote OCF resource agent # # Copyright 2013-2025 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # This script provides metadata for Pacemaker's internal remote agent. # Outside of acting as a placeholder so the agent can be indexed, and # providing metadata, this script should never be invoked. The actual # functionality behind the remote connection lives within Pacemaker's # controller daemon. # : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} meta_data() { cat < 1.1 Pacemaker Remote connection Server location to connect to (IP address or resolvable host name) Remote hostname TCP port at which to contact Pacemaker Remote executor Remote port If this is a positive time interval, the cluster will attempt to reconnect to a remote node after an active connection has been lost at this interval. Otherwise, the cluster will attempt to reconnect immediately (after any fencing needed). reconnect interval END return $OCF_SUCCESS } remote_usage() { EXITSTATUS="$1" cat < This conforms to the OCF Resource Agent API version 1.1, and expects to have OCF-compliant environment variables provided. END return $EXITSTATUS } remote_unsupported() { ocf_log info "The ocf:pacemaker:remote agent should not be directly invoked except for meta-data action" return $OCF_ERR_GENERIC } case $__OCF_ACTION in meta-data) meta_data ;; start) remote_unsupported ;; stop) remote_unsupported ;; monitor) remote_unsupported ;; migrate_to) remote_unsupported ;; migrate_from) remote_unsupported ;; reload) remote_unsupported ;; reload-agent) remote_unsupported ;; validate-all) remote_unsupported ;; usage|help) remote_usage $OCF_SUCCESS ;; *) remote_usage $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc -# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/agents/stonith/fence_legacy.in b/agents/stonith/fence_legacy.in index 596facf8d8..e7c5cb2da6 100755 --- a/agents/stonith/fence_legacy.in +++ b/agents/stonith/fence_legacy.in @@ -1,271 +1,273 @@ #!@PYTHON@ __copyright__ = "Copyright 2018-2025 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import os import sys import argparse import subprocess # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.exitstatus import ExitStatus VERSION = "1.1.0" USAGE = """Helper that presents a Pacemaker-style interface for Linux-HA stonith plugins Should never be invoked by the user directly Usage: fence_legacy [options] Options: -h usage -t sub agent -n nodename -o Action: on | off | reset (default) | stat | hostlist -s stonith command -q quiet mode -V version""" META_DATA = """ This agent should never be invoked by the user directly. https://www.clusterlabs.org/ Fencing Action Physical plug number or name of virtual machine Display help and exit """ ACTIONS = [ "on", "off", "reset", "reboot", "stat", "status", "metadata", "monitor", "list", "hostlist", "poweroff", "poweron" ] def parse_cli_options(): """ Return parsed command-line options (as argparse namespace) """ # Don't add standard help option, so we can format it how we want parser = argparse.ArgumentParser(add_help=False) parser.add_argument("-t", metavar="SUBAGENT", dest="subagent", nargs=1, default="none", help="sub-agent") parser.add_argument("-n", metavar="NODE", dest="node", nargs=1, default="", help="name of target node") # The help text here is consistent with the original version, though # perhaps all actions should be listed. parser.add_argument("-o", metavar="ACTION", dest="action", nargs=1, choices=ACTIONS, default="reset", help="action: on | off | reset (default) | stat | hostlist") parser.add_argument("-s", metavar="COMMAND", dest="command", nargs=1, default="stonith", help="stonith command") parser.add_argument("-q", dest="quiet", action="store_true", help="quiet mode") parser.add_argument("-h", "--help", action="store_true", help="show usage and exit") parser.add_argument("-V", "--version", action="version", version=VERSION, help="show version and exit") return parser.parse_args() def parse_stdin_options(options): """ Update options namespace with options parsed from stdin """ nlines = 0 for line in sys.stdin: # Remove leading and trailing whitespace line = line.strip() # Skip blank lines and comments if line == "" or line[0] == "#": continue nlines = nlines + 1 # Parse option name and value (allow whitespace around equals sign) try: (name, value) = line.split("=", 1) name = name.rstrip() if name == "": raise ValueError except ValueError: print("parse error: illegal name in option %d" % nlines, file=sys.stderr) sys.exit(ExitStatus.INVALID_PARAM) value = value.lstrip() if name == "plugin": options.subagent = value elif name in [ "option", "action" ]: options.action = value elif name == "nodename": options.node = value os.environ[name] = value elif name == "stonith": options.command = value elif name != "agent": # agent is used by fenced os.environ[name] = value def normalize_options(options): """ Use string rather than list of one string """ if not hasattr(options.subagent, "strip"): options.subagent = options.subagent[0] if not hasattr(options.node, "strip"): options.node = options.node[0] if not hasattr(options.action, "strip"): options.action = options.action[0] if not hasattr(options.command, "strip"): options.command = options.command[0] def build_command(options): """ Return command to execute (as list of arguments) """ if options.action in [ "hostlist", "list" ]: extra_args = [ "-l" ] elif options.action in [ "monitor", "stat", "status" ]: extra_args = [ "-S" ] else: if options.node == "": if not options.quiet: print("failed: no plug number") sys.exit(ExitStatus.ERROR) extra_args = [ "-T", options.action, options.node ] return [ options.command, "-t", options.subagent, "-E" ] + extra_args def handle_local_options(options): """ Handle options that don't require the fence agent """ if options.help: print(USAGE) sys.exit(ExitStatus.OK) def remap_action(options): """ Pre-process requested action """ options.action = options.action.lower() if options.action == "metadata": print(META_DATA) sys.exit(ExitStatus.OK) elif options.action in [ "hostlist", "list" ]: options.quiet = True # Remap accepted aliases to their actual commands elif options.action == "reboot": options.action = "reset" elif options.action == "poweron": options.action = "on" elif options.action == "poweroff": options.action = "off" def execute_command(options, cmd): """ Execute command and return its exit status """ if not options.quiet: print("Performing: " + " ".join(cmd)) return subprocess.call(cmd) def handle_result(options, status): """ Process fence agent result """ if status == 0: message = "success" exitcode = ExitStatus.OK else: message = "failed" exitcode = ExitStatus.ERROR if not options.quiet: print("%s: %s %d" % (message, options.node, status)) sys.exit(exitcode) def main(): """ Execute an LHA-style fence agent """ options = parse_cli_options() handle_local_options(options) normalize_options(options) parse_stdin_options(options) remap_action(options) cmd = build_command(options) status = execute_command(options, cmd) handle_result(options, status) if __name__ == "__main__": main() + +# vim: set filetype=python: diff --git a/agents/stonith/fence_watchdog.in b/agents/stonith/fence_watchdog.in index f43ab879d4..40cee1999a 100755 --- a/agents/stonith/fence_watchdog.in +++ b/agents/stonith/fence_watchdog.in @@ -1,284 +1,286 @@ #!@PYTHON@ """Dummy watchdog fence agent for providing meta-data for the pacemaker internal agent """ __copyright__ = "Copyright 2012-2022 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import re import sys import atexit import getopt AGENT_VERSION = "1.0.0" SHORT_DESC = "Dummy watchdog fence agent" LONG_DESC = """fence_watchdog just provides meta-data - actual fencing is done by the pacemaker internal watchdog agent.""" ALL_OPT = { "version" : { "getopt" : "V", "longopt" : "version", "help" : "-V, --version Display version information and exit", "required" : "0", "shortdesc" : "Display version information and exit", "order" : 53 }, "help" : { "getopt" : "h", "longopt" : "help", "help" : "-h, --help Display this help and exit", "required" : "0", "shortdesc" : "Display help and exit", "order" : 54 }, "action" : { "getopt" : "o:", "longopt" : "action", "help" : "-o, --action=[action] Action: metadata", "required" : "1", "shortdesc" : "Fencing Action", "default" : "metadata", "order" : 1 }, "nodename" : { "getopt" : "N:", "longopt" : "nodename", "help" : "-N, --nodename Node name of fence target (ignored)", "required" : "0", "shortdesc" : "Ignored", "order" : 2 }, "plug" : { "getopt" : "n:", "longopt" : "plug", "help" : "-n, --plug=[id] Physical plug number on device (ignored)", "required" : "1", "shortdesc" : "Ignored", "order" : 4 } } def agent(): """ Return name this file was run as. """ return os.path.basename(sys.argv[0]) def fail_usage(message): """ Print a usage message and exit. """ sys.exit("%s\nPlease use '-h' for usage" % message) def show_docs(options): """ Handle informational options (display info and exit). """ device_opt = options["device_opt"] if "-h" in options: usage(device_opt) sys.exit(0) if "-o" in options and options["-o"].lower() == "metadata": metadata(device_opt, options) sys.exit(0) if "-V" in options: print(AGENT_VERSION) sys.exit(0) def sorted_options(avail_opt): """ Return a list of all options, in their internally specified order. """ sorted_list = [(key, ALL_OPT[key]) for key in avail_opt] sorted_list.sort(key=lambda x: x[1]["order"]) return sorted_list def usage(avail_opt): """ Print a usage message. """ print(LONG_DESC) print() print("Usage:") print("\t" + agent() + " [options]") print("Options:") for dummy, value in sorted_options(avail_opt): if len(value["help"]) != 0: print(" " + value["help"]) def metadata(avail_opt, options): """ Print agent metadata. """ print(""" %s """ % (agent(), SHORT_DESC, LONG_DESC)) for option, dummy in sorted_options(avail_opt): if "shortdesc" in ALL_OPT[option]: print(' ') default = "" default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1] default_name_no_arg = "-" + ALL_OPT[option]["getopt"] if "default" in ALL_OPT[option]: default = 'default="%s"' % str(ALL_OPT[option]["default"]) elif default_name_arg in options: if options[default_name_arg]: try: default = 'default="%s"' % options[default_name_arg] except TypeError: ## @todo/@note: Currently there is no clean way how to handle lists ## we can create a string from it but we can't set it on command line default = 'default="%s"' % str(options[default_name_arg]) elif default_name_no_arg in options: default = 'default="true"' mixed = ALL_OPT[option]["help"] ## split it between option and help text res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) if None != res: mixed = res.group(1) mixed = mixed.replace("<", "<").replace(">", ">") print(' ') if ALL_OPT[option]["getopt"].count(":") > 0: print(' ') else: print(' ') print(' ' + ALL_OPT[option]["shortdesc"] + '') print(' ') print(' \n ') print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print('') def option_longopt(option): """ Return the getopt-compatible long-option name of the given option. """ if ALL_OPT[option]["getopt"].endswith(":"): return ALL_OPT[option]["longopt"] + "=" else: return ALL_OPT[option]["longopt"] def opts_from_command_line(argv, avail_opt): """ Read options from command-line arguments. """ # Prepare list of options for getopt getopt_string = "" longopt_list = [] for k in avail_opt: if k in ALL_OPT: getopt_string += ALL_OPT[k]["getopt"] else: fail_usage("Parse error: unknown option '" + k + "'") if k in ALL_OPT and "longopt" in ALL_OPT[k]: longopt_list.append(option_longopt(k)) try: opt, dummy = getopt.gnu_getopt(argv, getopt_string, longopt_list) except getopt.GetoptError as error: fail_usage("Parse error: " + error.msg) # Transform longopt to short one which are used in fencing agents old_opt = opt opt = {} for old_option in dict(old_opt).keys(): if old_option.startswith("--"): for option in ALL_OPT.keys(): if "longopt" in ALL_OPT[option] and "--" + ALL_OPT[option]["longopt"] == old_option: opt["-" + ALL_OPT[option]["getopt"].rstrip(":")] = dict(old_opt)[old_option] else: opt[old_option] = dict(old_opt)[old_option] return opt def opts_from_stdin(avail_opt): """ Read options from standard input. """ opt = {} name = "" for line in sys.stdin.readlines(): line = line.strip() if line.startswith("#") or (len(line) == 0): continue (name, value) = (line + "=").split("=", 1) value = value[:-1] if name not in avail_opt: print("Parse error: Ignoring unknown option '%s'" % line, file=sys.stderr) continue if ALL_OPT[name]["getopt"].endswith(":"): opt["-"+ALL_OPT[name]["getopt"].rstrip(":")] = value elif value.lower() in ["1", "yes", "on", "true"]: opt["-"+ALL_OPT[name]["getopt"]] = "1" return opt def process_input(avail_opt): """ Set standard environment variables, and parse all options. """ # Set standard environment os.putenv("LANG", "C") os.putenv("LC_ALL", "C") # Read options from command line or standard input if len(sys.argv) > 1: return opts_from_command_line(sys.argv[1:], avail_opt) else: return opts_from_stdin(avail_opt) def atexit_handler(): """ Close stdout on exit. """ try: sys.stdout.close() os.close(1) except IOError: sys.exit("%s failed to close standard output" % agent()) def main(): """ Make it so! """ device_opt = ALL_OPT.keys() ## Defaults for fence agent atexit.register(atexit_handler) options = process_input(device_opt) options["device_opt"] = device_opt show_docs(options) print("Watchdog fencing may be initiated only by the cluster, not this agent.", file=sys.stderr) sys.exit(1) if __name__ == "__main__": main() + +# vim: set filetype=python: diff --git a/cts/benchmark/clubench.in b/cts/benchmark/clubench.in index d20e292df0..1ed27942b8 100644 --- a/cts/benchmark/clubench.in +++ b/cts/benchmark/clubench.in @@ -1,200 +1,202 @@ #!/bin/sh # # Copyright 2010-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. SSHOPTS="-l root -o PasswordAuthentication=no -o ConnectTimeout=5" msg() { echo "$@" >&2 } usage() { echo "usage: $0 " echo " dir: working directory (with the control file)" exit 0 } [ $# -eq 0 ] && usage WORKDIR=$1 test -d "$WORKDIR" || usage CTSCTRL=~/.cts CTRL=$WORKDIR/control CSV=$WORKDIR/bench.csv STATS=$WORKDIR/bench.stats test -f $CTRL && . $CTRL @datadir@/@PACKAGE@/tests/cts/cluster_test 500 || { msg "cluster_test failed" exit 1 } test -f $CTSCTRL || { msg no CTS control file $CTSCTRL exit 1 } . $CTSCTRL : ${CTS_logfacility:=local7} : ${CTS_stack:=corosync} : ${CTS_logfile:="@CRM_LOG_DIR@/ha-log-bench"} : ${CTS_adv:="--schema pacemaker-1.2 --clobber-cib -r"} : ${RUNS:=3} : ${CTSTESTS:="--benchmark"} : ${CTSDIR:="@datadir@/@PACKAGE@/tests/cts"} : ${CTS_node_list:=""} : ${CTS_boot:=""} : ${CTS_stonith:=""} : ${CTS_stonith_args:=""} [ -n "$CTS_node_list" ] || { msg no node list specified exit 1 } case "$CTS_stack" in corosync) CRM_REPORT_OPTS="--corosync";; *) msg "$CTS_stack: cluster stack not recognized"; exit 1;; esac CTSOPTS="--stack $CTS_stack --at-boot $CTS_boot $CTS_adv" CTSOPTS="$CTSOPTS --facility $CTS_logfacility --logfile $CTS_logfile" if [ "x$CTS_stonith" != "x" ]; then CTSOPTS="$CTSOPTS --stonith-type $CTS_stonith" [ "x$CTS_stonith_args" != "x" ] && CTSOPTS="$CTSOPTS --stonith-params \"$CTS_stonith_args\"" else CTSOPTS="$CTSOPTS --stonith 0" fi CTSOPTS="$CTSOPTS $CTSTESTS" fibonacci() { F_LIMIT=$1 F_N=2 F_N_PREV=1 while [ $F_N -le $F_LIMIT ]; do echo $F_N F_N_TMP=$F_N F_N=$((F_N+F_N_PREV)) F_N_PREV=$F_N_TMP done [ $F_N_PREV -ne $F_LIMIT ] && echo $F_LIMIT } [ "$SERIES" ] || SERIES=$(fibonacci "$(echo $CTS_node_list | wc -w)") get_nodes() { GN_C_NODES=$(echo $CTS_node_list | awk -v n="$1" ' { for( i=1; i<=NF; i++ ) node[cnt++]=$i } END{for( i=0; i "$RC_ODIR/ctsrun.out" 2>&1 & ctspid=$! tail -f "$RC_ODIR/ctsrun.out" & tailpid=$! wait $ctspid kill $tailpid >/dev/null 2>&1 } bench_re='CTS:.*runtime:' diginfo() { DI_CTS_DIR="$1" DI_S="$2" filter="$3" ( cd "$DI_CTS_DIR" || return for r in [0-9]*.tar.bz2; do tar xjf $r DI_D=$(basename "$r" .tar.bz2) for DI_V in $(grep "$bench_re" "$DI_D/ha-log.txt" | eval "$filter"); do DI_S="$DI_S,$DI_V" done rm -r "$DI_D" done echo $DI_S ) } printheader() { diginfo $1 "" "awk '{print \$(NF-2)}'" } printstats() { diginfo $1 "$clusize" "awk '{print \$(NF)}'" } printmedians() { PM_F="$1" PM_S="$clusize" PM_MIDDLE=$((RUNS/2 + 1)) set $(head -1 "$PM_F" | sed 's/,/ /g') PM_COLS=$# for PM_I in $(seq 2 $PM_COLS); do PM_V=$(awk -v i=$PM_I -F, '{print $i}' < $PM_F | sort -n | head -$PM_MIDDLE | tail -1) PM_S="$PM_S,$PM_V" done echo $PM_S } rm -f $CSV tmpf=`mktemp` test -f "$tmpf" || { msg "can't create temporary file" exit 1 } trap "rm -f $tmpf" 0 for clusize in $SERIES; do nodes=`get_nodes $clusize` outdir=$WORKDIR/$clusize rm -rf $outdir mkdir -p $outdir rm -f $tmpf node_cleanup for i in `seq $RUNS`; do true > $CTS_logfile mkdir -p $outdir/$i runcts $outdir/$i mkreports $outdir/$i printstats $outdir/$i >> $tmpf done [ -f "$CSV" ] || printheader $outdir/1 > $CSV printmedians $tmpf >> $CSV cat $tmpf >> $STATS msg "Statistics for $clusize-node cluster saved" done msg "Tests done for series $SERIES, output in $CSV and $STATS" + +# vim: set filetype=sh: diff --git a/cts/cluster_test.in b/cts/cluster_test.in index f5cb3e82b8..9dcc64612a 100755 --- a/cts/cluster_test.in +++ b/cts/cluster_test.in @@ -1,175 +1,177 @@ #!@BASH_PATH@ # # Copyright 2008-2020 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # if [ -e ~/.cts ]; then . ~/.cts fi anyAsked=0 [ $# -lt 1 ] || CTS_numtests=$1 die() { echo "$@"; exit 1; } if [ -z "$CTS_asked_once" ]; then anyAsked=1 echo "This script should only be executed on the test exerciser." echo "The test exerciser will remotely execute the actions required by the" echo "tests and should not be part of the cluster itself." read -p "Is this host intended to be the test exerciser? (yN) " doUnderstand [ "$doUnderstand" = "y" ] \ || die "This script must be executed on the test exerciser" fi if [ -z "$CTS_node_list" ]; then anyAsked=1 read -p "Please list your cluster nodes (eg. node1 node2 node3): " CTS_node_list else echo "Beginning test of cluster: $CTS_node_list" fi if [ -z "$CTS_stack" ]; then anyAsked=1 read -p "Which cluster stack are you using? ([corosync]): " CTS_stack [ -n "$CTS_stack" ] || CTS_stack=corosync else echo "Using the $CTS_stack cluster stack" fi [ "${CTS_node_list}" = "${CTS_node_list/$HOSTNAME/}" ] \ || die "This script must be executed on the test exerciser, and the test exerciser cannot be part of the cluster" printf "+ Bootstrapping ssh... " if [ -z "$SSH_AUTH_SOCK" ]; then printf "\n + Initializing SSH " eval "$(ssh-agent)" echo " + Adding identities..." ssh-add rc=$? if [ $rc -ne 0 ]; then echo " -- No identities added" printf "\nThe ability to open key-based 'ssh' connections (as the user 'root') is required to use CTS.\n" read -p " - Do you want this program to help you create one? (yN) " auto_fix if [ "$auto_fix" = "y" ]; then ssh-keygen -t dsa ssh-add else die "Please run 'ssh-keygen -t dsa' to create a new key" fi fi else echo "OK" fi test_ok=1 printf "+ Testing ssh configuration... " for n in $CTS_node_list; do ssh -l root -o PasswordAuthentication=no -o ConnectTimeout=5 "$n" /bin/true rc=$? if [ $rc -ne 0 ]; then echo " - connection to $n failed" test_ok=0 fi done if [ $test_ok -eq 0 ]; then printf "\nThe ability to open key-based 'ssh' connections (as the user 'root') is required to use CTS.\n" read -p " - Do you want this program to help you with such a setup? (yN) " auto_fix if [ "$auto_fix" = "y" ]; then # XXX are we picking the most suitable identity? privKey=$(ssh-add -L | head -n1 | cut -d" " -f3) sshCopyIdOpts="-o User=root" [ -z "$privKey" ] || sshCopyIdOpts+=" -i \"${privKey}.pub\"" for n in $CTS_node_list; do eval "ssh-copy-id $sshCopyIdOpts \"${n}\"" \ || die "Attempt to 'ssh-copy-id $sshCopyIdOpts \"$n\"' failed" done else die "Please install one of your SSH public keys to root's account on all cluster nodes" fi fi echo "OK" if [ -z "$CTS_logfile" ]; then anyAsked=1 read -p " + Where does/should syslog store logs from remote hosts? (/var/log/messages) " CTS_logfile [ -n "$CTS_logfile" ] || CTS_logfile=/var/log/messages fi [ -e "$CTS_logfile" ] || die "$CTS_logfile doesn't exist" if [ -z "$CTS_logfacility" ]; then anyAsked=1 read -p " + Which log facility does the cluster use? (daemon) " CTS_logfacility [ -n "$CTS_logfacility" ] || CTS_logfacility=daemon fi if [ -z "$CTS_boot" ]; then read -p "+ Is the cluster software started automatically when a node boots? [yN] " CTS_boot if [ -z "$CTS_boot" ]; then CTS_boot=0 else case $CTS_boot in 1|y|Y) CTS_boot=1;; *) CTS_boot=0;; esac fi fi if [ -z "$CTS_numtests" ]; then read -p "+ How many test iterations should be performed? (500) " CTS_numtests [ -n "$CTS_numtests" ] || CTS_numtests=500 fi if [ -z "$CTS_asked_once" ]; then anyAsked=1 read -p "+ What type of STONITH agent do you use? (none) " CTS_stonith [ -z "$CTS_stonith" ] \ || read -p "+ List any STONITH agent parameters (eq. device_host=switch.power.com): " CTS_stonith_args [ -n "$CTS_adv" ] \ || read -p "+ (Advanced) Any extra CTS parameters? (none) " CTS_adv fi [ $anyAsked -eq 0 ] \ || read -p "+ Save values to ~/.cts for next time? (yN) " doSave if [ "$doSave" = "y" ]; then cat > ~/.cts <<-EOF # CTS Test data CTS_stack="$CTS_stack" CTS_node_list="$CTS_node_list" CTS_logfile="$CTS_logfile" CTS_logport="$CTS_logport" CTS_logfacility="$CTS_logfacility" CTS_asked_once=1 CTS_adv="$CTS_adv" CTS_stonith="$CTS_stonith" CTS_stonith_args="$CTS_stonith_args" CTS_boot="$CTS_boot" EOF fi cts_extra="" if [ -n "$CTS_stonith" ]; then cts_extra="$cts_extra --stonith-type $CTS_stonith" [ -z "$CTS_stonith_args" ] \ || cts_extra="$cts_extra --stonith-params \"$CTS_stonith_args\"" else cts_extra="$cts_extra --stonith 0" echo " - Testing a cluster without STONITH is like a blunt pencil... pointless" fi printf "\nAll set to go for %d iterations!\n" "$CTS_numtests" [ $anyAsked -ne 0 ] \ || echo "+ To use a different configuration, remove ~/.cts and re-run cts (or edit it manually)." echo Now paste the following command into this shell: echo "@PYTHON@ `dirname "$0"`/cts-lab -L \"$CTS_logfile\" --syslog-facility \"$CTS_logfacility\" --no-unsafe-tests --stack \"$CTS_stack\" $CTS_adv --at-boot \"$CTS_boot\" $cts_extra \"$CTS_numtests\" --nodes \"$CTS_node_list\"" + +# vim: set filetype=sh: diff --git a/cts/cts-attrd.in b/cts/cts-attrd.in index 94eef777a3..948d843240 100644 --- a/cts/cts-attrd.in +++ b/cts/cts-attrd.in @@ -1,414 +1,416 @@ #!@PYTHON@ """Regression tests for Pacemaker's attribute daemon.""" # pylint doesn't like the module name "cts-attrd" which is an invalid complaint for this file # but probably something we want to continue warning about elsewhere # pylint: disable=invalid-name # pacemaker imports need to come after we modify sys.path, which pylint will complain about. # pylint: disable=wrong-import-position __copyright__ = "Copyright 2023-2025 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import argparse import os import subprocess import sys import tempfile # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") # pylint: disable=comparison-of-constants,comparison-with-itself,condition-evals-to-constant if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus from pacemaker._cts.corosync import Corosync from pacemaker._cts.process import killall, exit_if_proc_running from pacemaker._cts.test import Test, Tests TEST_DIR = sys.path[0] def update_path(): """Set the PATH environment variable appropriately for the tests.""" new_path = os.environ['PATH'] if os.path.exists(f"{TEST_DIR}/cts-attrd.in"): # pylint: disable=protected-access print(f"Running tests from the source tree: {BuildOptions._BUILD_DIR} ({TEST_DIR})") # For pacemaker-attrd new_path = f"{BuildOptions._BUILD_DIR}/daemons/attrd:{new_path}" else: print(f"Running tests from the install tree: {BuildOptions.DAEMON_DIR} (not {TEST_DIR})") # For pacemaker-attrd new_path = f"{BuildOptions.DAEMON_DIR}:{new_path}" print(f'Using PATH="{new_path}"') os.environ['PATH'] = new_path class AttributeTest(Test): """Executor for a single test.""" def __init__(self, name, description, **kwargs): """ Create a new AttributeTest instance. Arguments: name -- A unique name for this test. This can be used on the command line to specify that only a specific test should be executed. description -- A meaningful description for the test. """ Test.__init__(self, name, description, **kwargs) self._daemon_location = "pacemaker-attrd" self._enable_corosync = True def _kill_daemons(self): killall([self._daemon_location]) def _start_daemons(self): if self.verbose: print(f"Starting {self._daemon_location}") cmd = [self._daemon_location, "-s", "-l", self.logpath] # pylint: disable=consider-using-with self._daemon_process = subprocess.Popen(cmd) class AttributeTests(Tests): """Collection of all attribute regression tests.""" def __init__(self, **kwargs): """Create a new AttributeTests instance.""" Tests.__init__(self, **kwargs) self._corosync = Corosync(self.verbose, self.logdir, "cts-attrd") def new_test(self, name, description): """Create a named test.""" test = AttributeTest(name, description, verbose=self.verbose, logdir=self.logdir) self._tests.append(test) return test def setup_environment(self, use_corosync): """Prepare the host before executing any tests.""" if use_corosync: self._corosync.start(kill_first=True) def cleanup_environment(self, use_corosync): """Clean up the host after executing desired tests.""" if use_corosync: self._corosync.stop() def build_basic_tests(self): """Add basic tests - setting, querying, updating, and deleting attributes.""" test = self.new_test("set_attr_1", "Set and query an attribute") test.add_cmd("attrd_updater", args="--name AAA -U 111 --output-as=xml") test.add_cmd("attrd_updater", args="--name AAA -Q --output-as=xml", stdout_match='name="AAA" value="111"') test.add_cmd("attrd_updater", args="--name AAA -Q", stdout_match='name="AAA" host="[^"]+" value="111"', validate=False) test.add_log_pattern(r"Setting AAA\[.*\] in instance_attributes: \(unset\) -> 111", regex=True) # Setting the delay on an attribute that doesn't exist fails, but the failure is # not passed back to attrd_updater. test = self.new_test("set_attr_2", "Set an attribute's delay") test.add_cmd("attrd_updater", args="--name AAA -Y -d 5 --output-as=xml") test.add_log_pattern(r"Processed update-delay request from client .*: Error \(Attribute AAA does not exist\)", regex=True) test = self.new_test("set_attr_3", "Set and query an attribute's delay and value") test.add_cmd("attrd_updater", args="--name AAA -B 111 -d 5 --output-as=xml") test.add_cmd("attrd_updater", args="--name AAA -Q --output-as=xml", stdout_match='name="AAA" value="111"') test.add_cmd("attrd_updater", args="--name AAA -Q", stdout_match='name="AAA" host="[^"]+" value="111"', validate=False) test.add_log_pattern(r"Setting AAA\[.*\] in instance_attributes: \(unset\) -> 111 \| from .* with 5s write delay", regex=True) test = self.new_test("set_attr_4", "Update an attribute that does not exist with a delay") test.add_cmd("attrd_updater", args="--name BBB -U 999 -d 10 --output-as=xml") test.add_cmd("attrd_updater", args="--name BBB -Q --output-as=xml", stdout_match='name="BBB" value="999"') test.add_cmd("attrd_updater", args="--name BBB -Q", stdout_match='name="BBB" host="[^"]+" value="999"', validate=False) test.add_log_pattern(r"Setting BBB\[.*\] in instance_attributes: \(unset\) -> 999 \| from .* with 10s write delay", regex=True) test = self.new_test("update_attr_1", "Update an attribute that already exists") test.add_cmd("attrd_updater", args="--name BBB -U 222 --output-as=xml") test.add_cmd("attrd_updater", args="--name BBB -U 333 --output-as=xml") test.add_cmd("attrd_updater", args="--name BBB -Q --output-as=xml", stdout_match='name="BBB" value="333"') test.add_cmd("attrd_updater", args="--name BBB -Q", stdout_match='name="BBB" host="[^"]+" value="333"', validate=False) test.add_log_pattern(r"Setting BBB\[.*\] in instance_attributes: \(unset\) -> 222", regex=True) test.add_log_pattern(r"Setting BBB\[.*\] in instance_attributes: 222 -> 333", regex=True) test = self.new_test("update_attr_2", "Update an attribute using a delay other than its default") test.add_cmd("attrd_updater", args="--name BBB -U 777 -d 10 --output-as=xml") test.add_cmd("attrd_updater", args="--name BBB -U 888 -d 7 --output-as=xml") test.add_log_pattern(r"Setting BBB\[.*\] in instance_attributes: 777 -> 888 \| from .* with 10s write delay", regex=True) test = self.new_test("update_attr_delay_1", "Update the delay of an attribute that already exists") test.add_cmd("attrd_updater", args="--name BBB -U 222 --output-as=xml") test.add_cmd("attrd_updater", args="--name BBB -Y -d 5 --output-as=xml") test.add_log_pattern(r"Setting BBB\[.*\] in instance_attributes: \(unset\) -> 222", regex=True) test.add_log_pattern("Update attribute BBB delay to 5000ms (5)") test = self.new_test("update_attr_delay_2", "Update the delay and value of an attribute that already exists") test.add_cmd("attrd_updater", args="--name BBB -U 222 --output-as=xml") test.add_cmd("attrd_updater", args="--name BBB -B 333 -d 5 --output-as=xml") test.add_log_pattern(r"Setting BBB\[.*\] in instance_attributes: \(unset\) -> 222", regex=True) test.add_log_pattern("Update attribute BBB delay to 5000ms (5)") test.add_log_pattern(r"Setting BBB\[.*\] in instance_attributes: 222 -> 333", regex=True) test = self.new_test("missing_attr_1", "Query an attribute that does not exist") test.add_cmd("attrd_updater", args="--name NOSUCH --output-as=xml", expected_exitcode=ExitStatus.CONFIG) test = self.new_test("delete_attr_1", "Delete an existing attribute") test.add_cmd("attrd_updater", args="--name CCC -U 444 --output-as=xml") test.add_cmd("attrd_updater", args="--name CCC -D --output-as=xml") test.add_log_pattern(r"Setting CCC\[.*\] in instance_attributes: \(unset\) -> 444", regex=True) test.add_log_pattern(r"Setting CCC\[.*\] in instance_attributes: 444 -> \(unset\)", regex=True) test = self.new_test("missing_attr_2", "Delete an attribute that does not exist") test.add_cmd("attrd_updater", args="--name NOSUCH2 -D --output-as=xml") test = self.new_test("attr_in_set_1", "Set and query an attribute in a specific set") test.add_cmd("attrd_updater", args="--name DDD -U 555 --set=foo --output-as=xml") test.add_cmd("attrd_updater", args="--name DDD -Q --output-as=xml", stdout_match='name="DDD" value="555"') test.add_cmd("attrd_updater", args="--name DDD -Q", stdout_match='name="DDD" host="[^"]+" value="555"', validate=False) test.add_log_pattern("Processed 1 private change for DDD (set foo)") def build_multiple_query_tests(self): """Add tests that set and query an attribute across multiple nodes.""" # NOTE: These tests make use of the fact that nothing in attrd actually # cares about whether a node exists when you set or query an attribute. # It just keeps creating new hash tables for each node you ask it about. test = self.new_test("multi_query_1", "Query an attribute set across multiple nodes") test.add_cmd("attrd_updater", args="--name AAA -U 111 --node cluster1 --output-as=xml") test.add_cmd("attrd_updater", args="--name AAA -U 222 --node cluster2 --output-as=xml") test.add_cmd("attrd_updater", args="--name AAA -QA --output-as=xml", stdout_match=r'\n.*') test.add_cmd("attrd_updater", args="--name AAA -QA", stdout_match='name="AAA" host="cluster1" value="111"\nname="AAA" host="cluster2" value="222"', validate=False) test.add_cmd("attrd_updater", args="--name AAA -Q --node=cluster1 --output-as=xml", stdout_match='') test.add_cmd("attrd_updater", args="--name AAA -Q --node=cluster1", stdout_match='name="AAA" host="cluster1" value="111"', validate=False) test.add_cmd("attrd_updater", args="--name AAA -Q --node=cluster2 --output-as=xml", stdout_match='') test.add_cmd("attrd_updater", args="--name AAA -Q --node=cluster2", stdout_match='name="AAA" host="cluster2" value="222"', validate=False) test.add_cmd("attrd_updater", args="--name AAA -QA --output-as=xml", stdout_match=r'\n.*', env={"OCF_RESKEY_CRM_meta_on_node": "cluster1"}) test.add_cmd("attrd_updater", args="--name AAA -QA", stdout_match='name="AAA" host="cluster1" value="111"\nname="AAA" host="cluster2" value="222"', validate=False, env={"OCF_RESKEY_CRM_meta_on_node": "cluster1"}) test.add_cmd("attrd_updater", args="--name AAA -Q --output-as=xml", stdout_match='', env={"OCF_RESKEY_CRM_meta_on_node": "cluster1"}) test.add_cmd("attrd_updater", args="--name AAA -Q", stdout_match='name="AAA" host="cluster1" value="111"', validate=False, env={"OCF_RESKEY_CRM_meta_on_node": "cluster1"}) test.add_cmd("attrd_updater", args="--name AAA -Q --node=cluster2 --output-as=xml", stdout_match='', env={"OCF_RESKEY_CRM_meta_on_node": "cluster1"}) test.add_cmd("attrd_updater", args="--name AAA -Q --node=cluster2", stdout_match='name="AAA" host="cluster2" value="222"', validate=False, env={"OCF_RESKEY_CRM_meta_on_node": "cluster1"}) def build_regex_tests(self): """Add tests that use regexes.""" test = self.new_test("regex_update_1", "Update attributes using a regex") test.add_cmd("attrd_updater", args="--name AAA -U 111 --output-as=xml") test.add_cmd("attrd_updater", args="--name ABB -U 222 --output-as=xml") test.add_cmd("attrd_updater", args="-P 'A.*' -U 333 --output-as=xml") test.add_cmd("attrd_updater", args="--name AAA -Q --output-as=xml", stdout_match='name="AAA" value="333"') test.add_cmd("attrd_updater", args="--name ABB -Q --output-as=xml", stdout_match='name="ABB" value="333"') test.add_cmd("attrd_updater", args="--name AAA -Q", stdout_match='name="AAA" host="[^"]+" value="333"', validate=False) test.add_cmd("attrd_updater", args="--name ABB -Q", stdout_match='name="ABB" host="[^"]+" value="333"', validate=False) test.add_log_pattern(r"Setting AAA\[.*\] in instance_attributes: \(unset\) -> 111", regex=True) test.add_log_pattern(r"Setting ABB\[.*\] in instance_attributes: \(unset\) -> 222", regex=True) test.add_log_pattern(r"Setting ABB\[.*\] in instance_attributes: 222 -> 333", regex=True) test.add_log_pattern(r"Setting AAA\[.*\] in instance_attributes: 111 -> 333", regex=True) test = self.new_test("regex_delete_1", "Delete attributes using a regex") test.add_cmd("attrd_updater", args="--name XAX -U 444 --output-as=xml") test.add_cmd("attrd_updater", args="--name XBX -U 555 --output-as=xml") test.add_cmd("attrd_updater", args="-P 'X[A|B]X' -D --output-as=xml") test.add_log_pattern(r"Setting XAX\[.*\] in instance_attributes: \(unset\) -> 444", regex=True) test.add_log_pattern(r"Setting XBX\[.*\] in instance_attributes: \(unset\) -> 555", regex=True) test.add_log_pattern(r"Setting XBX\[.*\] in instance_attributes: 555 -> \(unset\)", regex=True) test.add_log_pattern(r"Setting XAX\[.*\] in instance_attributes: 444 -> \(unset\)", regex=True) def build_utilization_tests(self): """Add tests that involve utilization attributes.""" test = self.new_test("utilization_1", "Set and query a utilization attribute") test.add_cmd("attrd_updater", args="--name AAA -U ABC -z --output-as=xml") test.add_cmd("attrd_updater", args="--name AAA -Q --output-as=xml", stdout_match='name="AAA" value="ABC"') test.add_cmd("attrd_updater", args="--name AAA -Q", stdout_match='name="AAA" host="[^"]+" value="ABC"', validate=False) test.add_log_pattern(r"Setting AAA\[.*\] in utilization: \(unset\) -> ABC", regex=True) def build_sync_point_tests(self): """Add tests that involve sync points.""" test = self.new_test("local_sync_point", "Wait for a local sync point") test.add_cmd("attrd_updater", args="--name AAA -U 123 --wait=local --output-as=xml") test.add_cmd("attrd_updater", args="--name AAA -Q --output-as=xml", stdout_match='name="AAA" value="123"') test.add_cmd("attrd_updater", args="--name AAA -Q", stdout_match='name="AAA" host="[^"]+" value="123"', validate=False) test.add_log_pattern(r"Alerting client .* for reached local sync point", regex=True) test = self.new_test("cluster_sync_point", "Wait for a cluster-wide sync point") test.add_cmd("attrd_updater", args="--name BBB -U 456 --wait=cluster --output-as=xml") test.add_cmd("attrd_updater", args="--name BBB -Q --output-as=xml", stdout_match='name="BBB" value="456"') test.add_cmd("attrd_updater", args="--name BBB -Q", stdout_match='name="BBB" host="[^"]+" value="456"', validate=False) test.add_log_pattern(r"Alerting client .* for reached cluster sync point", regex=True) def build_options(): """Handle command line arguments.""" parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description="Run pacemaker-attrd regression tests", epilog="Example: Run only the test 'start_stop'\n" f"\t {sys.argv[0]} --run-only start_stop\n\n" "Example: Run only the tests with the string 'systemd' present in them\n" f"\t {sys.argv[0]} --run-only-pattern systemd") parser.add_argument("-l", "--list-tests", action="store_true", help="Print out all registered tests") parser.add_argument("-p", "--run-only-pattern", metavar='PATTERN', help="Run only tests matching the given pattern") parser.add_argument("-r", "--run-only", metavar='TEST', help="Run a specific test") parser.add_argument("-V", "--verbose", action="store_true", help="Verbose output") args = parser.parse_args() return args def main(): """Run attrd regression tests as specified by arguments.""" update_path() # Ensure all command output is in portable locale for comparison os.environ['LC_ALL'] = "C" opts = build_options() exit_if_proc_running("pacemaker-attrd") # Create a temporary directory for log files (the directory and its # contents will automatically be erased when done) with tempfile.TemporaryDirectory(prefix="cts-attrd-") as logdir: tests = AttributeTests(verbose=opts.verbose, logdir=logdir) tests.build_basic_tests() tests.build_multiple_query_tests() tests.build_regex_tests() tests.build_utilization_tests() tests.build_sync_point_tests() if opts.list_tests: tests.print_list() sys.exit(ExitStatus.OK) print("Starting ...") try: tests.setup_environment(True) except TimeoutError: print("corosync did not start in time, exiting") sys.exit(ExitStatus.TIMEOUT) if opts.run_only_pattern: tests.run_tests_matching(opts.run_only_pattern) tests.print_results() elif opts.run_only: tests.run_single(opts.run_only) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_environment(True) tests.exit() if __name__ == "__main__": main() + +# vim: set filetype=python: diff --git a/cts/cts-cli.in b/cts/cts-cli.in index e85a986a44..b2d2218f77 100644 --- a/cts/cts-cli.in +++ b/cts/cts-cli.in @@ -1,3374 +1,3374 @@ #!@PYTHON@ """Regression tests for Pacemaker's command line tools.""" # pylint doesn't like the module name "cts-cli" which is an invalid complaint for this file # but probably something we want to continue warning about elsewhere # pylint: disable=invalid-name # pacemaker imports need to come after we modify sys.path, which pylint will complain about. # pylint: disable=wrong-import-position # We know this is a very long file. # pylint: disable=too-many-lines __copyright__ = "Copyright 2024-2025 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import argparse from contextlib import contextmanager from datetime import datetime, timedelta import fileinput from functools import partial from gettext import ngettext from multiprocessing import Pool, cpu_count import os import pathlib import re from shutil import copyfile import signal from string import Formatter import subprocess import sys from tempfile import NamedTemporaryFile, TemporaryDirectory, mkstemp import types # These imports allow running from a source checkout after running `make`. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") # pylint: disable=comparison-of-constants,comparison-with-itself,condition-evals-to-constant if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker._cts.errors import XmlValidationError from pacemaker._cts.validate import validate from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus # Individual tool tests are split out, but can also be accessed as a group with "tools" tools_tests = ["cibadmin", "crm_attribute", "crm_standby", "crm_resource", "crm_ticket", "crmadmin", "crm_shadow", "crm_verify", "crm_simulate", "crm_diff"] # The default list of tests to run, in the order they should be run default_tests = ["access_render", "daemons", "dates", "error_codes"] + tools_tests + \ ["crm_mon", "acls", "validity", "upgrade", "rules", "feature_set"] other_tests = ["agents"] # The directory containing this program test_home = os.path.dirname(os.path.realpath(__file__)) # Where test data is stored cts_cli_data = f"{test_home}/cli" # The name of the shadow CIB SHADOW_NAME = "cts-cli" # Arguments to pass to valgrind VALGRIND_ARGS = ["-q", "--gen-suppressions=all", "--show-reachable=no", "--leak-check=full", "--trace-children=no", "--time-stamp=yes", "--num-callers=20", f"--suppressions={test_home}/valgrind-pcmk.suppressions"] class PluralFormatter(Formatter): """ Special string formatting class for selecting singular vs. plurals. Use like so: fmt = PluralFormatter() print(fmt.format("{0} {0}:plural,test,tests} succeeded", n_tests)) """ def format_field(self, value, format_spec): """Convert a value to a formatted representation.""" if format_spec.startswith("plural,"): eles = format_spec.split(',') if len(eles) == 2: singular = eles[1] plural = singular + "s" else: singular = eles[1] plural = eles[2] return ngettext(singular, plural, value) return super().format_field(value, format_spec) def cleanup_shadow_dir(): """Remove any previously created shadow CIB directory.""" subprocess.run(["crm_shadow", "--force", "--delete", SHADOW_NAME], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) def copy_existing_cib(existing): """ Generate a CIB by copying an existing one to a temporary location. This is suitable for use with the cib_gen= parameter to the TestGroup class. """ (fp, new) = mkstemp(prefix="cts-cli.cib.xml.") os.close(fp) copyfile(existing, new) return new def current_cib(): """Return the complete current CIB.""" with environ({"CIB_user": "root"}): return subprocess.check_output(["cibadmin", "-Q"], encoding="utf-8") def make_test_group(desc, cmd, **kwargs): """ Create a TestGroup that replicates the same test for multiple classes. The given description, cmd, and kwargs will be passed as arguments to each Test subclass. The resulting objects will then be added to a TestGroup and returned. The main purpose of this function is to be able to run the same test for both text and XML formats without having to duplicate everything. """ tests = [] for c in [Test, ValidatingTest]: # Insert "--output-as=" after the command name. splitup = cmd.split() splitup.insert(1, c.format_args) obj = c(desc, " ".join(splitup), **kwargs) tests.append(obj) return TestGroup(tests) def create_shadow_cib(shadow_dir, create_empty=True, validate_with=None, valgrind=False): """ Create a shadow CIB file. Keyword arguments: create_empty -- If True, the shadow CIB will be empty. Otherwise, the shadow CIB will be a copy of the currently active cluster configuration. validate_with -- If not None, the schema version to validate the CIB against valgrind -- If True, run the create operation under valgrind """ args = ["crm_shadow", "--batch", "--force"] if create_empty: args += ["--create-empty", SHADOW_NAME] else: args += ["--create", SHADOW_NAME] if validate_with is not None: args += ["--validate-with", validate_with] if valgrind: args = ["valgrind"] + VALGRIND_ARGS + args os.environ["CIB_shadow_dir"] = shadow_dir os.environ["CIB_shadow"] = SHADOW_NAME subprocess.run(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) delete_shadow_resource_defaults() def delete_shadow_resource_defaults(): """Clear out the rsc_defaults section from a shadow CIB file.""" # A newly created empty CIB might or might not have a rsc_defaults section # depending on whether the --with-resource-stickiness-default configure # option was used. To ensure regression tests behave the same either way, # delete any rsc_defaults after creating or erasing a CIB. subprocess.run(["cibadmin", "--delete", "--xml-text", ""], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) # The above command might or might not bump the CIB version, so reset it # to ensure future changes result in the same version for comparison. reset_shadow_cib_version() def reset_shadow_cib_version(): """Set various version numbers in a shadow CIB file back to 0.""" with fileinput.input(files=[shadow_path()], inplace=True) as f: for line in f: line = re.sub('epoch="[0-9]*"', 'epoch="1"', line) line = re.sub('num_updates="[0-9]*"', 'num_updates="0"', line) line = re.sub('admin_epoch="[0-9]*"', 'admin_epoch="0"', line) print(line, end='') def run_cmd_list(cmds): """ Run one or more shell commands. cmds can be: * A string * A Python function * A list of the above Raises subprocess.CalledProcessError on error. """ if cmds is None: return if isinstance(cmds, (str, types.FunctionType)): cmds = [cmds] for c in cmds: if isinstance(c, types.FunctionType): c() else: subprocess.run(c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, universal_newlines=True, check=True) def sanitize_output(s): """ Replace content in the output expected to change between test runs. This is stuff like version numbers, timestamps, source line numbers, build options, system names and messages, etc. """ # A list of tuples of regular expressions and their replacements. replacements = [ (r'Created new pacemaker-.* configuration', r'Created new pacemaker configuration'), (r'Device not configured', r'No such device or address'), (r'^Entity: line [0-9]+: ', r''), (r'(Injecting attribute last-failure-ping#monitor_10000=)[0-9]*', r'\1'), (r'Last change: .*', r'Last change:'), (r'Last updated: .*', r'Last updated:'), (r'^Migration will take effect until: .*', r'Migration will take effect until:'), (r'(\* Possible values.*: .*)\(default: [^)]*\)', r'\1(default: )'), (r"""-X '.*'""", r"""-X '...'"""), (r' api-version="[^"]*"', r' api-version="X"'), (r'\(apply_upgrade@.*\.c:[0-9]+\)', r'apply_upgrade'), (r'\(invert_action@.*\.c:[0-9]+\)', r'invert_action'), (r'\(pcmk__update_schema@.*\.c:[0-9]+\)', r'pcmk__update_schema'), (r'( """ # Create a test CIB that has ACL roles basic_tests = [ Test("Configure some ACLs", "cibadmin -M -o acls -p", update_cib=True, stdin=acl_cib), Test("Enable ACLs", "crm_attribute -n enable-acl -v true", update_cib=True), # Run cibadmin --show-access on the test CIB as an ACL-restricted user Test("An instance of ACLs render (into color)", "cibadmin --force --show-access=color -Q --user tony"), Test("An instance of ACLs render (into namespacing)", "cibadmin --force --show-access=namespace -Q --user tony"), Test("An instance of ACLs render (into text)", "cibadmin --force --show-access=text -Q --user tony"), ] return [ ShadowTestGroup(basic_tests), ] class DaemonsRegressionTest(RegressionTest): """A class for testing command line options of pacemaker daemons.""" @property def name(self): """Return the name of this regression test.""" return "daemons" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" return [ Test("Get CIB manager metadata", "pacemaker-based metadata"), Test("Get controller metadata", "pacemaker-controld metadata"), Test("Get fencer metadata", "pacemaker-fenced metadata"), Test("Get scheduler metadata", "pacemaker-schedulerd metadata"), ] class DatesRegressionTest(RegressionTest): """A class for testing handling of ISO8601 dates.""" @property def name(self): """Return the name of this regression test.""" return "dates" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" invalid_periods = [ "", "2019-01-01 00:00:00Z", # Start with no end "2019-01-01 00:00:00Z/", # Start with only a trailing slash "PT2S/P1M", # Two durations "2019-13-01 00:00:00Z/P1M", # Out-of-range month "20191077T15/P1M", # Out-of-range day "2019-10-01T25:00:00Z/P1M", # Out-of-range hour "2019-10-01T24:00:01Z/P1M", # Hour 24 with anything but :00:00 "PT5H/20191001T007000Z", # Out-of-range minute "2019-10-01 00:00:80Z/P1M", # Out-of-range second "2019-10-01 00:00:10 +25:00/P1M", # Out-of-range offset hour "20191001T000010 -00:61/P1M", # Out-of-range offset minute "P1Y/2019-02-29 00:00:00Z", # Feb. 29 in non-leap-year "2019-01-01 00:00:00Z/P", # Duration with no values "P1Z/2019-02-20 00:00:00Z", # Invalid duration unit "P1YM/2019-02-20 00:00:00Z", # No number for duration unit ] # Ensure invalid period specifications are rejected invalid_period_tests = [] for p in invalid_periods: invalid_period_tests.append(Test(f"Invalid period - [{p}]", f"iso8601 -p '{p}'", expected_rc=ExitStatus.INVALID_PARAM)) year_tests = [] for y in ["06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "40"]: year_tests.extend([ Test(f"20{y}-W01-7", f"iso8601 -d '20{y}-W01-7 00Z'"), Test(f"20{y}-W01-7 - round-trip", f"iso8601 -d '20{y}-W01-7 00Z' -W -E '20{y}-W01-7 00:00:00Z'"), Test(f"20{y}-W01-1", f"iso8601 -d '20{y}-W01-1 00Z'"), Test(f"20{y}-W01-1 - round-trip", f"iso8601 -d '20{y}-W01-1 00Z' -W -E '20{y}-W01-1 00:00:00Z'") ]) return invalid_period_tests + [ make_test_group("'2005-040/2005-043' period", "iso8601 -p '2005-040/2005-043'"), Test("2014-01-01 00:30:00 - 1 Hour", "iso8601 -d '2014-01-01 00:30:00Z' -D P-1H -E '2013-12-31 23:30:00Z'"), Test("Valid date - Feb 29 in leap year", "iso8601 -d '2020-02-29 00:00:00Z' -E '2020-02-29 00:00:00Z'"), Test("Valid date - using 'T' and offset", "iso8601 -d '20191201T131211 -05:00' -E '2019-12-01 18:12:11Z'"), Test("24:00:00 equivalent to 00:00:00 of next day", "iso8601 -d '2019-12-31 24:00:00Z' -E '2020-01-01 00:00:00Z'"), ] + year_tests + [ make_test_group("2009-W53-07", "iso8601 -d '2009-W53-7 00:00:00Z' -W -E '2009-W53-7 00:00:00Z'"), Test("epoch + 2 Years 5 Months 6 Minutes", "iso8601 -d 'epoch' -D P2Y5MT6M -E '1972-06-01 00:06:00Z'"), Test("2009-01-31 + 1 Month", "iso8601 -d '20090131T000000Z' -D P1M -E '2009-02-28 00:00:00Z'"), Test("2009-01-31 + 2 Months", "iso8601 -d '2009-01-31 00:00:00Z' -D P2M -E '2009-03-31 00:00:00Z'"), Test("2009-01-31 + 3 Months", "iso8601 -d '2009-01-31 00:00:00Z' -D P3M -E '2009-04-30 00:00:00Z'"), make_test_group("2009-03-31 - 1 Month", "iso8601 -d '2009-03-31 01:00:00 +01:00' -D P-1M -E '2009-02-28 00:00:00Z'"), make_test_group("2038-01-01 + 3 Months", "iso8601 -d '2038-01-01 00:00:00Z' -D P3M -E '2038-04-01 00:00:00Z'"), ] class ErrorCodeRegressionTest(RegressionTest): """A class for testing error code reporting.""" @property def name(self): """Return the name of this regression test.""" return "error_codes" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" # Legacy return codes # # Don't test unknown legacy code. FreeBSD includes a colon in strerror(), # while other distros do not. legacy_tests = [ make_test_group("Get legacy return code", "crm_error 201"), make_test_group("Get legacy return code (with name)", "crm_error -n 201"), make_test_group("Get multiple legacy return codes", "crm_error 201 202"), make_test_group("Get multiple legacy return codes (with names)", "crm_error -n 201 202"), # We can only rely on our custom codes, so we'll spot-check codes 201-209 Test("List legacy return codes (spot check)", "crm_error -l | grep 20[1-9]"), ValidatingTest("List legacy return codes (spot check)", "crm_error -l --output-as=xml | grep -Ev '&1 | sed -e 's/Digest:.*/Digest:/'"), Test("Require --force for CIB erasure", "cibadmin -E", expected_rc=ExitStatus.UNSAFE, update_cib=True), Test("Allow CIB erasure with --force", "cibadmin -E --force"), # Verify the output after erasure Test("Query CIB", "cibadmin -Q", setup=delete_shadow_resource_defaults, update_cib=True), ] # Add some stuff to the empty CIB so we know that erasing it did something. basic_tests_setup = [ """cibadmin -C -o nodes --xml-text ''""", """cibadmin -C -o crm_config --xml-text ''""", """cibadmin -C -o resources --xml-text ''""" ] return [ ShadowTestGroup(basic_tests, setup=basic_tests_setup), ] class CrmAttributeRegressionTest(RegressionTest): """A class for testing crm_attribute.""" @property def name(self): """Return the name of this regression test.""" return "crm_attribute" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" options_tests = [ make_test_group("List all available options (invalid type)", "crm_attribute --list-options=asdf", expected_rc=ExitStatus.USAGE), make_test_group("List non-advanced cluster options", "crm_attribute --list-options=cluster"), make_test_group("List all available cluster options", "crm_attribute --list-options=cluster --all"), Test("Return usage error if both -p and OCF_RESOURCE_INSTANCE are empty strings", "crm_attribute -N cluster01 -p '' -G", expected_rc=ExitStatus.USAGE), ] value_update_tests = [ Test("Query the value of an attribute that does not exist", "crm_attribute -n ABCD --query --quiet", expected_rc=ExitStatus.NOSUCH), Test("Configure something before erasing", "crm_attribute -n test_attr -v 5", update_cib=True), Test("Test '++' XML attribute update syntax", """cibadmin -M --score --xml-text=''""", update_cib=True), Test("Test '+=' XML attribute update syntax", """cibadmin -M --score --xml-text=''""", update_cib=True), make_test_group("Test '++' nvpair value update syntax", "crm_attribute -n test_attr -v 'value++' --score", update_cib=True), make_test_group("Test '+=' nvpair value update syntax", "crm_attribute -n test_attr -v 'value+=2' --score", update_cib=True), Test("Test '++' XML attribute update syntax (--score not set)", """cibadmin -M --xml-text=''""", update_cib=True), Test("Test '+=' XML attribute update syntax (--score not set)", """cibadmin -M --xml-text=''""", update_cib=True), make_test_group("Test '++' nvpair value update syntax (--score not set)", "crm_attribute -n test_attr -v 'value++'", update_cib=True), make_test_group("Test '+=' nvpair value update syntax (--score not set)", "crm_attribute -n test_attr -v 'value+=2'", update_cib=True), ] query_set_tests = [ Test("Set cluster option", "crm_attribute -n cluster-delay -v 60s", update_cib=True), Test("Query new cluster option", "cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay"), Test("Set no-quorum policy", "crm_attribute -n no-quorum-policy -v ignore", update_cib=True), Test("Delete nvpair", """cibadmin -D -o crm_config --xml-text ''""", update_cib=True), Test("Create operation should fail", """cibadmin -C -o crm_config --xml-text ''""", expected_rc=ExitStatus.EXISTS, update_cib=True), Test("Modify cluster options section", """cibadmin -M -o crm_config --xml-text ''""", update_cib=True), Test("Query updated cluster option", "cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay", update_cib=True), Test("Set duplicate cluster option", "crm_attribute -n cluster-delay -v 40s -s duplicate", update_cib=True), Test("Setting multiply defined cluster option should fail", "crm_attribute -n cluster-delay -v 30s", expected_rc=ExitStatus.MULTIPLE, update_cib=True), Test("Set cluster option with -s", "crm_attribute -n cluster-delay -v 30s -s duplicate", update_cib=True), Test("Delete cluster option with -i", "crm_attribute -n cluster-delay -D -i cib-bootstrap-options-cluster-delay", update_cib=True), Test("Create node1 and bring it online", "crm_simulate --live-check --in-place --node-up=node1", update_cib=True), Test("Create node attribute", "crm_attribute -n ram -v 1024M -N node1 -t nodes", update_cib=True), Test("Query new node attribute", "cibadmin -Q -o nodes | grep node1-ram", update_cib=True), Test("Create second node attribute", "crm_attribute -n rattr -v XYZ -N node1 -t nodes", update_cib=True), Test("Query node attributes by pattern", "crm_attribute -t nodes -P 'ra.*' -N node1 --query"), Test("Update node attributes by pattern", "crm_attribute -t nodes -P 'rat.*' -N node1 -v 10", update_cib=True), Test("Delete node attributes by pattern", "crm_attribute -t nodes -P 'rat.*' -N node1 -D", update_cib=True), Test("Set a transient (fail-count) node attribute", "crm_attribute -n fail-count-foo -v 3 -N node1 -t status", update_cib=True), Test("Query a fail count", "crm_failcount --query -r foo -N node1", update_cib=True), Test("Show node attributes with crm_simulate", "crm_simulate --live-check --show-attrs"), Test("Set a second transient node attribute", "crm_attribute -n fail-count-bar -v 5 -N node1 -t status", update_cib=True), Test("Query transient node attributes by pattern", "crm_attribute -t status -P fail-count -N node1 --query"), Test("Update transient node attributes by pattern", "crm_attribute -t status -P fail-count -N node1 -v 10", update_cib=True), Test("Delete transient node attributes by pattern", "crm_attribute -t status -P fail-count -N node1 -D", update_cib=True), Test("crm_attribute given invalid delete usage", "crm_attribute -t nodes -N node1 -D", expected_rc=ExitStatus.USAGE), Test("Set a utilization node attribute", "crm_attribute -n cpu -v 1 -N node1 -z", update_cib=True), Test("Query utilization node attribute", "crm_attribute --query -n cpu -N node1 -z"), # This update will fail because it has version numbers Test("Replace operation should fail", """cibadmin -Q | sed -e 's/epoch="[^"]*"/epoch="1"/' | cibadmin -R -p""", expected_rc=ExitStatus.OLD), ] promotable_tests = [ make_test_group("Query a nonexistent promotable score attribute", "crm_attribute -N cluster01 -p promotable-rsc -G", expected_rc=ExitStatus.NOSUCH), make_test_group("Delete a nonexistent promotable score attribute", "crm_attribute -N cluster01 -p promotable-rsc -D"), make_test_group("Query after deleting a nonexistent promotable score attribute", "crm_attribute -N cluster01 -p promotable-rsc -G", expected_rc=ExitStatus.NOSUCH), make_test_group("Update a nonexistent promotable score attribute", "crm_attribute -N cluster01 -p promotable-rsc -v 1"), make_test_group("Query after updating a nonexistent promotable score attribute", "crm_attribute -N cluster01 -p promotable-rsc -G"), make_test_group("Update an existing promotable score attribute", "crm_attribute -N cluster01 -p promotable-rsc -v 5"), make_test_group("Query after updating an existing promotable score attribute", "crm_attribute -N cluster01 -p promotable-rsc -G"), make_test_group("Delete an existing promotable score attribute", "crm_attribute -N cluster01 -p promotable-rsc -D"), make_test_group("Query after deleting an existing promotable score attribute", "crm_attribute -N cluster01 -p promotable-rsc -G", expected_rc=ExitStatus.NOSUCH), ] # Test for an issue with legacy command line parsing when the resource is # specified in the environment (CLBZ#5509) ocf_rsc_instance_tests = [ make_test_group("Update a promotable score attribute to -INFINITY", "crm_attribute -N cluster01 -p -v -INFINITY", env={"OCF_RESOURCE_INSTANCE": "promotable-rsc"}), make_test_group("Query after updating a promotable score attribute to -INFINITY", "crm_attribute -N cluster01 -p -G", env={"OCF_RESOURCE_INSTANCE": "promotable-rsc"}), Test("Try OCF_RESOURCE_INSTANCE if -p is specified with an empty string", "crm_attribute -N cluster01 -p '' -G", env={"OCF_RESOURCE_INSTANCE": "promotable-rsc"}), ] return options_tests + [ ShadowTestGroup(value_update_tests), ShadowTestGroup(query_set_tests), TestGroup(promotable_tests + ocf_rsc_instance_tests, env={"OCF_RESOURCE_INSTANCE": "promotable-rsc"}, cib_gen=partial(copy_existing_cib, f"{cts_cli_data}/crm_mon.xml")), ] class CrmStandbyRegressionTest(RegressionTest): """A class for testing crm_standby.""" @property def name(self): """Return the name of this regression test.""" return "crm_standby" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" basic_tests = [ Test("Default standby value", "crm_standby -N node1 -G"), Test("Set standby status", "crm_standby -N node1 -v true", update_cib=True), Test("Query standby value", "crm_standby -N node1 -G"), Test("Delete standby value", "crm_standby -N node1 -D", update_cib=True), ] return [ ShadowTestGroup(basic_tests, setup="""cibadmin -C -o nodes --xml-text ''"""), ] class CrmResourceRegressionTest(RegressionTest): """A class for testing crm_resource.""" @property def name(self): """Return the name of this regression test.""" return "crm_resource" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" options_tests = [ Test("crm_resource run with extra arguments", "crm_resource foo bar", expected_rc=ExitStatus.USAGE), Test("List all available resource options (invalid type)", "crm_resource --list-options=asdf", expected_rc=ExitStatus.USAGE), Test("List all available resource options (invalid type)", "crm_resource --list-options=asdf --output-as=xml", expected_rc=ExitStatus.USAGE), make_test_group("List non-advanced primitive meta-attributes", "crm_resource --list-options=primitive"), make_test_group("List all available primitive meta-attributes", "crm_resource --list-options=primitive --all"), make_test_group("List non-advanced fencing parameters", "crm_resource --list-options=fencing"), make_test_group("List all available fencing parameters", "crm_resource --list-options=fencing --all"), ] basic_tests = [ Test("Create a resource", """cibadmin -C -o resources --xml-text ''""", update_cib=True), Test("crm_resource given both -r and resource config", "crm_resource -r xyz --class ocf --provider pacemaker --agent Dummy", expected_rc=ExitStatus.USAGE), Test("crm_resource given resource config with invalid action", "crm_resource --class ocf --provider pacemaker --agent Dummy -D", expected_rc=ExitStatus.USAGE), Test("Create a resource meta attribute", "crm_resource -r dummy --meta -p is-managed -v false", update_cib=True), Test("Query a resource meta attribute", "crm_resource -r dummy --meta -g is-managed", update_cib=True), Test("Remove a resource meta attribute", "crm_resource -r dummy --meta -d is-managed", update_cib=True), ValidatingTest("Create another resource meta attribute", "crm_resource -r dummy --meta -p target-role -v Stopped --output-as=xml"), ValidatingTest("Show why a resource is not running", "crm_resource -Y -r dummy --output-as=xml"), ValidatingTest("Remove another resource meta attribute", "crm_resource -r dummy --meta -d target-role --output-as=xml"), ValidatingTest("Get a non-existent attribute from a resource element", "crm_resource -r dummy --get-parameter nonexistent --element --output-as=xml"), make_test_group("Get a non-existent attribute from a resource element", "crm_resource -r dummy --get-parameter nonexistent --element", update_cib=True), Test("Get an existent attribute from a resource element", "crm_resource -r dummy --get-parameter class --element", update_cib=True), ValidatingTest("Set a non-existent attribute for a resource element", "crm_resource -r dummy --set-parameter=description -v test_description --element --output-as=xml", update_cib=True), ValidatingTest("Set an existent attribute for a resource element", "crm_resource -r dummy --set-parameter=description -v test_description --element --output-as=xml", update_cib=True), ValidatingTest("Delete an existent attribute for a resource element", "crm_resource -r dummy -d description --element --output-as=xml", update_cib=True), ValidatingTest("Delete a non-existent attribute for a resource element", "crm_resource -r dummy -d description --element --output-as=xml", update_cib=True), Test("Set a non-existent attribute for a resource element", "crm_resource -r dummy --set-parameter=description -v test_description --element", update_cib=True), Test("Set an existent attribute for a resource element", "crm_resource -r dummy --set-parameter=description -v test_description --element", update_cib=True), Test("Delete an existent attribute for a resource element", "crm_resource -r dummy -d description --element", update_cib=True), Test("Delete a non-existent attribute for a resource element", "crm_resource -r dummy -d description --element", update_cib=True), Test("Create a resource attribute", "crm_resource -r dummy -p delay -v 10s", update_cib=True), make_test_group("List the configured resources", "crm_resource -L", update_cib=True), Test("Implicitly list the configured resources", "crm_resource"), Test("List IDs of instantiated resources", "crm_resource -l"), make_test_group("Show XML configuration of resource", "crm_resource -q -r dummy"), Test("Require a destination when migrating a resource that is stopped", "crm_resource -r dummy -M", update_cib=True, expected_rc=ExitStatus.USAGE), Test("Don't support migration to non-existent locations", "crm_resource -r dummy -M -N i.do.not.exist", update_cib=True, expected_rc=ExitStatus.NOSUCH), Test("Create a fencing resource", """cibadmin -C -o resources --xml-text ''""", update_cib=True), Test("Bring resources online", "crm_simulate --live-check --in-place", update_cib=True), Test("Try to move a resource to its existing location", "crm_resource -r dummy --move --node node1", update_cib=True, expected_rc=ExitStatus.EXISTS), Test("Try to move a resource that doesn't exist", "crm_resource -r xyz --move --node node1", expected_rc=ExitStatus.NOSUCH), Test("Move a resource from its existing location", "crm_resource -r dummy --move", update_cib=True), Test("Clear out constraints generated by --move", "crm_resource -r dummy --clear", update_cib=True), Test("Ban a resource on unknown node", "crm_resource -r dummy -B -N host1", expected_rc=ExitStatus.NOSUCH), Test("Create two more nodes and bring them online", "crm_simulate --live-check --in-place --node-up=node2 --node-up=node3", update_cib=True), Test("Ban dummy from node1", "crm_resource -r dummy -B -N node1", update_cib=True), Test("Show where a resource is running", "crm_resource -r dummy -W"), Test("Show constraints on a resource", "crm_resource -a -r dummy"), ValidatingTest("Ban dummy from node2", "crm_resource -r dummy -B -N node2 --output-as=xml", update_cib=True), Test("Relocate resources due to ban", "crm_simulate --live-check --in-place -S", update_cib=True), ValidatingTest("Move dummy to node1", "crm_resource -r dummy -M -N node1 --output-as=xml", update_cib=True), Test("Clear implicit constraints for dummy on node2", "crm_resource -r dummy -U -N node2", update_cib=True), Test("Drop the status section", "cibadmin -R -o status --xml-text ''"), Test("Create a clone", """cibadmin -C -o resources --xml-text ''"""), Test("Create a resource meta attribute", "crm_resource -r test-primitive --meta -p is-managed -v false", update_cib=True), Test("Create a resource meta attribute in the primitive", "crm_resource -r test-primitive --meta -p is-managed -v false --force", update_cib=True), Test("Update resource meta attribute with duplicates", "crm_resource -r test-clone --meta -p is-managed -v true", update_cib=True), Test("Update resource meta attribute with duplicates (force clone)", "crm_resource -r test-clone --meta -p is-managed -v true --force", update_cib=True), Test("Update child resource meta attribute with duplicates", "crm_resource -r test-primitive --meta -p is-managed -v false", update_cib=True), Test("Delete resource meta attribute with duplicates", "crm_resource -r test-clone --meta -d is-managed", update_cib=True), Test("Delete resource meta attribute in parent", "crm_resource -r test-primitive --meta -d is-managed", update_cib=True), Test("Create a resource meta attribute in the primitive", "crm_resource -r test-primitive --meta -p is-managed -v false --force", update_cib=True), Test("Update existing resource meta attribute", "crm_resource -r test-clone --meta -p is-managed -v true", update_cib=True), Test("Create a resource meta attribute in the parent", "crm_resource -r test-clone --meta -p is-managed -v true --force", update_cib=True), Test("Delete resource parent meta attribute (force)", "crm_resource -r test-clone --meta -d is-managed --force", update_cib=True), # Restore meta-attributes before running this test Test("Delete resource child meta attribute", "crm_resource -r test-primitive --meta -d is-managed", setup=["crm_resource -r test-primitive --meta -p is-managed -v true --force", "crm_resource -r test-clone --meta -p is-managed -v true --force"], update_cib=True), Test("Create the dummy-group resource group", """cibadmin -C -o resources --xml-text '""" """""" """""" """'""", update_cib=True), Test("Create a resource meta attribute in dummy1", "crm_resource -r dummy1 --meta -p is-managed -v true", update_cib=True), Test("Create a resource meta attribute in dummy-group", "crm_resource -r dummy-group --meta -p is-managed -v false", update_cib=True), Test("Delete the dummy-group resource group", "cibadmin -D -o resources --xml-text ''", update_cib=True), Test("Specify a lifetime when moving a resource", "crm_resource -r dummy --move --node node2 --lifetime=PT1H", update_cib=True), Test("Try to move a resource previously moved with a lifetime", "crm_resource -r dummy --move --node node1", update_cib=True), Test("Ban dummy from node1 for a short time", "crm_resource -r dummy -B -N node1 --lifetime=PT1S", update_cib=True), Test("Remove expired constraints", "sleep 2 && crm_resource --clear --expired", update_cib=True), # Clear has already been tested elsewhere, but we need to get rid of the # constraints so testing delete works. It won't delete if there's still # a reference to the resource somewhere. Test("Clear all implicit constraints for dummy", "crm_resource -r dummy -U", update_cib=True), Test("Set a node health strategy", "crm_attribute -n node-health-strategy -v migrate-on-red", update_cib=True), Test("Set a node health attribute", "crm_attribute -N node3 -n '#health-cts-cli' -v red", update_cib=True), ValidatingTest("Show why a resource is not running on an unhealthy node", "crm_resource -N node3 -Y -r dummy --output-as=xml"), Test("Delete a resource", "crm_resource -D -r dummy -t primitive", update_cib=True), ] constraint_tests = [] for rsc in ["prim1", "prim2", "prim3", "prim4", "prim5", "prim6", "prim7", "prim8", "prim9", "prim10", "prim11", "prim12", "prim13", "group", "clone"]: constraint_tests.extend([ make_test_group(f"Check locations and constraints for {rsc}", f"crm_resource -a -r {rsc}"), make_test_group(f"Recursively check locations and constraints for {rsc}", f"crm_resource -A -r {rsc}"), ]) constraint_tests.extend([ Test("Check locations and constraints for group member (referring to group)", "crm_resource -a -r gr2"), Test("Check locations and constraints for group member (without referring to group)", "crm_resource -a -r gr2 --force"), ]) colocation_tests = [ ValidatingTest("Set a meta-attribute for primitive and resources colocated with it", "crm_resource -r prim5 --meta --set-parameter=target-role -v Stopped --recursive --output-as=xml"), Test("Set a meta-attribute for group and resource colocated with it", "crm_resource -r group --meta --set-parameter=target-role -v Stopped --recursive"), ValidatingTest("Set a meta-attribute for clone and resource colocated with it", "crm_resource -r clone --meta --set-parameter=target-role -v Stopped --recursive --output-as=xml"), ] digest_tests = [ ValidatingTest("Show resource digests", "crm_resource --digests -r rsc1 -N node1 --output-as=xml"), Test("Show resource digests with overrides", "crm_resource --digests -r rsc1 -N node1 --output-as=xml CRM_meta_interval=10000 CRM_meta_timeout=20000"), make_test_group("Show resource operations", "crm_resource --list-operations"), ] basic2_tests = [ make_test_group("List a promotable clone resource", "crm_resource --locate -r promotable-clone"), make_test_group("List the primitive of a promotable clone resource", "crm_resource --locate -r promotable-rsc"), make_test_group("List a single instance of a promotable clone resource", "crm_resource --locate -r promotable-rsc:0"), make_test_group("List another instance of a promotable clone resource", "crm_resource --locate -r promotable-rsc:1"), Test("Try to move an instance of a cloned resource", "crm_resource -r promotable-rsc:0 --move --node cluster01", expected_rc=ExitStatus.INVALID_PARAM), ] basic_tests_setup = [ "crm_attribute -n no-quorum-policy -v ignore", "crm_simulate --live-check --in-place --node-up=node1" ] return options_tests + [ ShadowTestGroup(basic_tests, setup=basic_tests_setup), TestGroup(constraint_tests, env={"CIB_file": f"{cts_cli_data}/constraints.xml"}), TestGroup(colocation_tests, cib_gen=partial(copy_existing_cib, f"{cts_cli_data}/constraints.xml")), TestGroup(digest_tests, env={"CIB_file": f"{cts_cli_data}/crm_resource_digests.xml"}), TestGroup(basic2_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml"}), ValidatingTest("Check that CIB_file=\"-\" works - crm_resource", "crm_resource --digests -r rsc1 -N node1 --output-as=xml", env={"CIB_file": "-"}, stdin=pathlib.Path(f"{cts_cli_data}/crm_resource_digests.xml")), ] class CrmTicketRegressionTest(RegressionTest): """A class for testing crm_ticket.""" @property def name(self): """Return the name of this regression test.""" return "crm_ticket" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" basic_tests = [ Test("Default ticket granted state", "crm_ticket -t ticketA -G granted -d false"), Test("Set ticket granted state", "crm_ticket -t ticketA -r --force", update_cib=True), make_test_group("List ticket IDs", "crm_ticket -w"), make_test_group("Query ticket state", "crm_ticket -t ticketA -q"), make_test_group("Query ticket granted state", "crm_ticket -t ticketA -G granted"), Test("Delete ticket granted state", "crm_ticket -t ticketA -D granted --force", update_cib=True), Test("Make a ticket standby", "crm_ticket -t ticketA -s", update_cib=True), Test("Query ticket standby state", "crm_ticket -t ticketA -G standby"), Test("Activate a ticket", "crm_ticket -t ticketA -a", update_cib=True), make_test_group("List ticket details", "crm_ticket -L -t ticketA"), Test("Add a second ticket", "crm_ticket -t ticketB -G granted -d false", update_cib=True), Test("Set second ticket granted state", "crm_ticket -t ticketB -r --force", update_cib=True), make_test_group("List tickets", "crm_ticket -l"), Test("Delete second ticket", """cibadmin --delete --xml-text ''""", update_cib=True), Test("Delete ticket standby state", "crm_ticket -t ticketA -D standby", update_cib=True), Test("Add a constraint to a ticket", """cibadmin -C -o constraints --xml-text ''""", update_cib=True), make_test_group("Query ticket constraints", "crm_ticket -t ticketA -c"), Test("Delete ticket constraint", """cibadmin --delete --xml-text ''""", update_cib=True), ] basic_tests_setup = [ """cibadmin -C -o crm_config --xml-text ''""", """cibadmin -C -o resources --xml-text ''""" ] return [ ShadowTestGroup(basic_tests, setup=basic_tests_setup), ] class CrmadminRegressionTest(RegressionTest): """A class for testing crmadmin.""" @property def name(self): """Return the name of this regression test.""" return "crmadmin" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" basic_tests = [ make_test_group("List all nodes", "crmadmin -N"), make_test_group("Minimally list all nodes", "crmadmin -N -q"), Test("List all nodes as bash exports", "crmadmin -N -B"), make_test_group("List cluster nodes", "crmadmin -N cluster"), make_test_group("List guest nodes", "crmadmin -N guest"), make_test_group("List remote nodes", "crmadmin -N remote"), make_test_group("List cluster,remote nodes", "crmadmin -N cluster,remote"), make_test_group("List guest,remote nodes", "crmadmin -N guest,remote"), ] return [ TestGroup(basic_tests, env={"CIB_file": f"{cts_cli_data}/crmadmin-cluster-remote-guest-nodes.xml"}), Test("Check that CIB_file=\"-\" works", "crmadmin -N", env={"CIB_file": "-"}, stdin=pathlib.Path(f"{cts_cli_data}/crmadmin-cluster-remote-guest-nodes.xml")), ] class CrmShadowRegressionTest(RegressionTest): """A class for testing crm_shadow.""" @property def name(self): """Return the name of this regression test.""" return "crm_shadow" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" no_instance_tests = [ make_test_group("Get active shadow instance (no active instance)", "crm_shadow --which", expected_rc=ExitStatus.NOSUCH), make_test_group("Get active shadow instance's file name (no active instance)", "crm_shadow --file", expected_rc=ExitStatus.NOSUCH), make_test_group("Get active shadow instance's contents (no active instance)", "crm_shadow --display", expected_rc=ExitStatus.NOSUCH), make_test_group("Get active shadow instance's diff (no active instance)", "crm_shadow --diff", expected_rc=ExitStatus.NOSUCH), ] # Create new shadow instance based on active CIB # Don't use create_shadow_cib() here; test explicitly new_instance_tests = [ make_test_group("Create copied shadow instance", f"crm_shadow --create {SHADOW_NAME} --batch", setup=f"crm_shadow --delete {SHADOW_NAME} --force"), # Query shadow instance based on active CIB make_test_group("Get active shadow instance (copied)", "crm_shadow --which"), make_test_group("Get active shadow instance's file name (copied)", "crm_shadow --file"), make_test_group("Get active shadow instance's contents (copied)", "crm_shadow --display"), make_test_group("Get active shadow instance's diff (copied)", "crm_shadow --diff"), ] # Make some changes to the shadow file modify_cib = """export CIB_file=$(crm_shadow --file) && """ \ """cibadmin --modify --xml-text '' && """ \ """cibadmin --delete --xml-text '' && """ \ """cibadmin --create -o resources --xml-text '' && """ \ """cibadmin --create -o status --xml-text ''""" more_tests = [ # We can't use make_test_group() here because we only want to run # the modify_cib setup code once, and make_test_group will pass all # kwargs to every instance it creates. Test("Get active shadow instance's diff (after changes)", "crm_shadow --diff", setup=modify_cib, expected_rc=ExitStatus.ERROR), ValidatingTest("Get active shadow instance's diff (after changes)", "crm_shadow --diff --output-as=xml", expected_rc=ExitStatus.ERROR), TestGroup([ # Commit the modified shadow CIB to a temp active CIB file Test("Commit shadow instance", f"crm_shadow --commit {SHADOW_NAME}", expected_rc=ExitStatus.USAGE), Test("Commit shadow instance (force)", f"crm_shadow --commit {SHADOW_NAME} --force"), Test("Get active shadow instance's diff (after commit)", "crm_shadow --diff", expected_rc=ExitStatus.ERROR), Test("Commit shadow instance (force) (all)", f"crm_shadow --commit {SHADOW_NAME} --force --all"), Test("Get active shadow instance's diff (after commit all)", "crm_shadow --diff", expected_rc=ExitStatus.ERROR), ], cib_gen=partial(copy_existing_cib, f"{cts_cli_data}/crm_mon.xml")), TestGroup([ # Repeat sequence with XML output ValidatingTest("Commit shadow instance", f"crm_shadow --commit {SHADOW_NAME} --output-as=xml", expected_rc=ExitStatus.USAGE), ValidatingTest("Commit shadow instance (force)", f"crm_shadow --commit {SHADOW_NAME} --force --output-as=xml"), ValidatingTest("Get active shadow instance's diff (after commit)", "crm_shadow --diff --output-as=xml", expected_rc=ExitStatus.ERROR), ValidatingTest("Commit shadow instance (force) (all)", f"crm_shadow --commit {SHADOW_NAME} --force --all --output-as=xml"), ValidatingTest("Get active shadow instance's diff (after commit all)", "crm_shadow --diff --output-as=xml", expected_rc=ExitStatus.ERROR), # Commit an inactive shadow instance with no active instance make_test_group("Commit shadow instance (no active instance)", f"crm_shadow --commit {SHADOW_NAME}", env={"CIB_shadow": None}, expected_rc=ExitStatus.USAGE), make_test_group("Commit shadow instance (no active instance) (force)", f"crm_shadow --commit {SHADOW_NAME} --force", env={"CIB_shadow": None}), # Commit an inactive shadow instance with an active instance make_test_group("Commit shadow instance (mismatch)", f"crm_shadow --commit {SHADOW_NAME}", env={"CIB_shadow": "nonexistent_shadow"}, expected_rc=ExitStatus.USAGE), make_test_group("Commit shadow instance (mismatch) (force)", f"crm_shadow --commit {SHADOW_NAME} --force", env={"CIB_shadow": "nonexistent_shadow"}), # Commit an active shadow instance whose shadow file is missing make_test_group("Commit shadow instance (nonexistent shadow file)", "crm_shadow --commit nonexistent_shadow", env={"CIB_shadow": "nonexistent_shadow"}, expected_rc=ExitStatus.USAGE), make_test_group("Commit shadow instance (nonexistent shadow file) (force)", "crm_shadow --commit nonexistent_shadow --force", env={"CIB_shadow": "nonexistent_shadow"}, expected_rc=ExitStatus.NOSUCH), make_test_group("Get active shadow instance's diff (nonexistent shadow file)", "crm_shadow --diff", env={"CIB_shadow": "nonexistent_shadow"}, expected_rc=ExitStatus.NOSUCH), # Commit an active shadow instance when the CIB file is missing make_test_group("Commit shadow instance (nonexistent CIB file)", f"crm_shadow --commit {SHADOW_NAME}", env={"CIB_file": f"{cts_cli_data}/nonexistent_cib.xml"}, expected_rc=ExitStatus.USAGE), make_test_group("Commit shadow instance (nonexistent CIB file) (force)", f"crm_shadow --commit {SHADOW_NAME} --force", env={"CIB_file": f"{cts_cli_data}/nonexistent_cib.xml"}, expected_rc=ExitStatus.NOSUCH), make_test_group("Get active shadow instance's diff (nonexistent CIB file)", "crm_shadow --diff", env={"CIB_file": f"{cts_cli_data}/nonexistent_cib.xml"}, expected_rc=ExitStatus.NOSUCH), ], cib_gen=partial(copy_existing_cib, f"{cts_cli_data}/crm_mon.xml")), ] delete_1_tests = [ # Delete an active shadow instance Test("Delete shadow instance", f"crm_shadow --delete {SHADOW_NAME}", expected_rc=ExitStatus.USAGE), Test("Delete shadow instance (force)", f"crm_shadow --delete {SHADOW_NAME} --force"), ShadowTestGroup([ ValidatingTest("Delete shadow instance", f"crm_shadow --delete {SHADOW_NAME} --output-as=xml", expected_rc=ExitStatus.USAGE), ValidatingTest("Delete shadow instance (force)", f"crm_shadow --delete {SHADOW_NAME} --force --output-as=xml"), ]) ] delete_2_tests = [ # Delete an inactive shadow instance with no active instance Test("Delete shadow instance (no active instance)", f"crm_shadow --delete {SHADOW_NAME}", expected_rc=ExitStatus.USAGE), Test("Delete shadow instance (no active instance) (force)", f"crm_shadow --delete {SHADOW_NAME} --force"), ] delete_3_tests = [ ValidatingTest("Delete shadow instance (no active instance)", f"crm_shadow --delete {SHADOW_NAME} --output-as=xml", expected_rc=ExitStatus.USAGE), ValidatingTest("Delete shadow instance (no active instance) (force)", f"crm_shadow --delete {SHADOW_NAME} --force --output-as=xml"), ] delete_4_tests = [ # Delete an inactive shadow instance with an active instance Test("Delete shadow instance (mismatch)", f"crm_shadow --delete {SHADOW_NAME}", expected_rc=ExitStatus.USAGE), Test("Delete shadow instance (mismatch) (force)", f"crm_shadow --delete {SHADOW_NAME} --force"), ] delete_5_tests = [ ValidatingTest("Delete shadow instance (mismatch)", f"crm_shadow --delete {SHADOW_NAME} --output-as=xml", expected_rc=ExitStatus.USAGE), ValidatingTest("Delete shadow instance (mismatch) (force)", f"crm_shadow --delete {SHADOW_NAME} --force --output-as=xml"), # Delete an active shadow instance whose shadow file is missing Test("Delete shadow instance (nonexistent shadow file)", "crm_shadow --delete nonexistent_shadow", expected_rc=ExitStatus.USAGE), Test("Delete shadow instance (nonexistent shadow file) (force)", "crm_shadow --delete nonexistent_shadow --force"), ValidatingTest("Delete shadow instance (nonexistent shadow file)", "crm_shadow --delete nonexistent_shadow --output-as=xml", expected_rc=ExitStatus.USAGE), ValidatingTest("Delete shadow instance (nonexistent shadow file) (force)", "crm_shadow --delete nonexistent_shadow --force --output-as=xml"), ] delete_6_tests = [ # Delete an active shadow instance when the CIB file is missing Test("Delete shadow instance (nonexistent CIB file)", f"crm_shadow --delete {SHADOW_NAME}", expected_rc=ExitStatus.USAGE), Test("Delete shadow instance (nonexistent CIB file) (force)", f"crm_shadow --delete {SHADOW_NAME} --force"), ] delete_7_tests = [ ValidatingTest("Delete shadow instance (nonexistent CIB file)", f"crm_shadow --delete {SHADOW_NAME} --output-as=xml", expected_rc=ExitStatus.USAGE), ValidatingTest("Delete shadow instance (nonexistent CIB file) (force)", f"crm_shadow --delete {SHADOW_NAME} --force --output-as=xml"), ] create_1_tests = [ # Create new shadow instance based on active CIB with no instance active make_test_group("Create copied shadow instance (no active instance)", f"crm_shadow --create {SHADOW_NAME} --batch", setup=f"crm_shadow --delete {SHADOW_NAME} --force", env={"CIB_shadow": None}), # Create new shadow instance based on active CIB with other instance active make_test_group("Create copied shadow instance (mismatch)", f"crm_shadow --create {SHADOW_NAME} --batch", setup=f"crm_shadow --delete {SHADOW_NAME} --force", env={"CIB_shadow": "nonexistent_shadow"}), # Create new shadow instance based on CIB (shadow file already exists) make_test_group("Create copied shadow instance (file already exists)", f"crm_shadow --create {SHADOW_NAME} --batch", expected_rc=ExitStatus.CANTCREAT), make_test_group("Create copied shadow instance (file already exists) (force)", f"crm_shadow --create {SHADOW_NAME} --batch --force"), # Create new shadow instance based on active CIB when the CIB file is missing make_test_group("Create copied shadow instance (nonexistent CIB file) (force)", f"crm_shadow --create {SHADOW_NAME} --batch --force", expected_rc=ExitStatus.NOSUCH, setup=f"crm_shadow --delete {SHADOW_NAME} --force", env={"CIB_file": f"{cts_cli_data}/nonexistent_cib.xml"}), ] create_2_tests = [ # Create new empty shadow instance make_test_group("Create empty shadow instance", f"crm_shadow --create-empty {SHADOW_NAME} --batch", setup=f"crm_shadow --delete {SHADOW_NAME} --force"), # Create empty shadow instance with no active instance make_test_group("Create empty shadow instance (no active instance)", f"crm_shadow --create-empty {SHADOW_NAME} --batch", setup=f"crm_shadow --delete {SHADOW_NAME} --force", env={"CIB_shadow": None}), # Create empty shadow instance with other instance active make_test_group("Create empty shadow instance (mismatch)", f"crm_shadow --create-empty {SHADOW_NAME} --batch", setup=f"crm_shadow --delete {SHADOW_NAME} --force", env={"CIB_shadow": "nonexistent_shadow"}), # Create empty shadow instance when the CIB file is missing make_test_group("Create empty shadow instance (nonexistent CIB file)", f"crm_shadow --create-empty {SHADOW_NAME} --batch", setup=f"crm_shadow --delete {SHADOW_NAME} --force", env={"CIB_file": f"{cts_cli_data}/nonexistent_cib.xml"}), # Create empty shadow instance (shadow file already exists) make_test_group("Create empty shadow instance (file already exists)", f"crm_shadow --create-empty {SHADOW_NAME} --batch", expected_rc=ExitStatus.CANTCREAT), make_test_group("Create empty shadow instance (file already exists) (force)", f"crm_shadow --create-empty {SHADOW_NAME} --batch --force"), # Query shadow instance with an empty CIB. # --which and --file queries were done earlier. TestGroup([ make_test_group("Get active shadow instance's contents (empty CIB)", "crm_shadow --display"), make_test_group("Get active shadow instance's diff (empty CIB)", "crm_shadow --diff", expected_rc=ExitStatus.ERROR), ], setup=delete_shadow_resource_defaults), ] reset_1_tests = [ Test("Resetting active shadow instance to active CIB requires force", f"crm_shadow --reset {SHADOW_NAME} --batch", expected_rc=ExitStatus.USAGE), Test("Reset active shadow instance to active CIB", f"crm_shadow --reset {SHADOW_NAME} --batch --force"), Test("Active shadow instance no different from active CIB after reset", "crm_shadow --diff"), Test("Active shadow instance differs from active CIB after change", "crm_shadow --diff", setup="crm_attribute -n admin_epoch -v 99", expected_rc=ExitStatus.ERROR), ValidatingTest("Reset active shadow instance to active CIB", f"crm_shadow --reset {SHADOW_NAME} --batch --force --output-as=xml"), ValidatingTest("Active shadow instance no different from active CIB after reset", "crm_shadow --diff --output-as=xml"), ValidatingTest("Active shadow instance differs from active CIB after change", "crm_shadow --diff --output-as=xml", setup="crm_attribute -n admin_epoch -v 199", expected_rc=ExitStatus.ERROR), make_test_group("Reset shadow instance to active CIB with nonexistent shadow file", f"crm_shadow --reset {SHADOW_NAME} --batch --force", setup=f"crm_shadow --delete {SHADOW_NAME} --force"), Test("Active shadow instance no different from active CIB after force-reset", "crm_shadow --diff"), ] reset_2_tests = [ make_test_group("Reset inactive shadow instance (none active) to active CIB", f"crm_shadow --reset {SHADOW_NAME} --force --batch"), ] reset_3_tests = [ make_test_group("Reset inactive shadow instance while another instance active", f"crm_shadow --reset {SHADOW_NAME} --batch --force"), ] reset_4_tests = [ make_test_group("Reset shadow instance with nonexistent CIB", f"crm_shadow --reset {SHADOW_NAME} --batch --force", expected_rc=ExitStatus.NOSUCH), ] # Switch shadow instances switch_tests = [ make_test_group("Switch to new shadow instance", f"crm_shadow --switch {SHADOW_NAME} --batch"), TestGroup([ make_test_group("Switch to nonexistent shadow instance", f"crm_shadow --switch {SHADOW_NAME} --batch", expected_rc=ExitStatus.NOSUCH), make_test_group("Switch to nonexistent shadow instance (force)", f"crm_shadow --switch {SHADOW_NAME} --batch --force", expected_rc=ExitStatus.NOSUCH), ], setup=f"crm_shadow --delete {SHADOW_NAME} --force"), ] return no_instance_tests + [ ShadowTestGroup(new_instance_tests + more_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml"}, create=False), ShadowTestGroup(delete_1_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml"}), ShadowTestGroup(delete_2_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml", "CIB_shadow": None}), ShadowTestGroup(delete_3_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml", "CIB_shadow": None}), ShadowTestGroup(delete_4_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml", "CIB_shadow": "nonexistent_shadow"}), ShadowTestGroup(delete_5_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml", "CIB_shadow": "nonexistent_shadow"}), ShadowTestGroup(delete_6_tests, env={"CIB_file": f"{cts_cli_data}/nonexistent_cib.xml"}), ShadowTestGroup(delete_7_tests, env={"CIB_file": f"{cts_cli_data}/nonexistent_cib.xml"}), ShadowTestGroup(create_1_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml"}, create=False), ShadowTestGroup(create_2_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml"}, create=False), ShadowTestGroup(reset_1_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml"}), ShadowTestGroup(reset_2_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml", "CIB_shadow": None}), ShadowTestGroup(reset_3_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml", "CIB_shadow": "nonexistent_shadow"}), ShadowTestGroup(reset_4_tests, env={"CIB_file": f"{cts_cli_data}/nonexistent_cib.xml"}), ShadowTestGroup(switch_tests, env={"CIB_shadow": "nonexistent_shadow"}, create_empty=True), ] class CrmVerifyRegressionTest(RegressionTest): """A class for testing crm_verify.""" @property def name(self): """Return the name of this regression test.""" return "crm_verify" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" invalid_tests = [ make_test_group("Verify a file-specified invalid configuration", f"crm_verify --xml-file {cts_cli_data}/crm_verify_invalid_bz.xml", expected_rc=ExitStatus.CONFIG), make_test_group("Verify a file-specified invalid configuration (verbose)", f"crm_verify --xml-file {cts_cli_data}/crm_verify_invalid_bz.xml --verbose", expected_rc=ExitStatus.CONFIG), make_test_group("Verify a file-specified invalid configuration (quiet)", f"crm_verify --xml-file {cts_cli_data}/crm_verify_invalid_bz.xml --quiet", expected_rc=ExitStatus.CONFIG), ValidatingTest("Verify another file-specified invalid configuration", f"crm_verify --xml-file {cts_cli_data}/crm_verify_invalid_no_stonith.xml --output-as=xml", expected_rc=ExitStatus.CONFIG), ] with open(f"{test_home}/cli/crm_mon.xml", encoding="utf-8") as f: cib_contents = f.read() valid_tests = [ ValidatingTest("Verify a file-specified valid configuration", f"crm_verify --xml-file {cts_cli_data}/crm_mon.xml --output-as=xml"), ValidatingTest("Verify a piped-in valid configuration", "crm_verify -p --output-as=xml", stdin=pathlib.Path(f"{cts_cli_data}/crm_mon.xml")), ValidatingTest("Verbosely verify a file-specified valid configuration", f"crm_verify --xml-file {cts_cli_data}/crm_mon.xml --output-as=xml --verbose"), ValidatingTest("Verbosely verify a piped-in valid configuration", "crm_verify -p --output-as=xml --verbose", stdin=pathlib.Path(f"{cts_cli_data}/crm_mon.xml")), ValidatingTest("Verify a string-supplied valid configuration", f"crm_verify -X '{cib_contents}' --output-as=xml"), ValidatingTest("Verbosely verify a string-supplied valid configuration", f"crm_verify -X '{cib_contents}' --output-as=xml --verbose"), ] return invalid_tests + valid_tests class CrmSimulateRegressionTest(RegressionTest): """A class for testing crm_simulate.""" @property def name(self): """Return the name of this regression test.""" return "crm_simulate" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" good_cib = """ """ bad_cib = good_cib.replace("start", "break") bad_version_cib = good_cib.replace("pacemaker-1.2", "pacemaker-9999.0") recoverable_cib = good_cib.replace("", "") no_version_cib = good_cib.replace('validate-with="pacemaker-1.2" ', "") no_version_bad_cib = bad_version_cib.replace('epoch="3"', 'epoch="30"').replace("start", "break") basic_tests = [ Test("Show allocation scores with crm_simulate", f"crm_simulate -x {cts_cli_data}/crm_mon.xml --show-scores --output-as=xml"), Test("Show utilization with crm_simulate", f"crm_simulate -x {cts_cli_data}/crm_mon.xml --show-utilization"), Test("Simulate injecting a failure", f"crm_simulate -x {cts_cli_data}/crm_mon.xml -S -i ping_monitor_10000@cluster02=1"), Test("Simulate bringing a node down", f"crm_simulate -x {cts_cli_data}/crm_mon.xml -S --node-down=cluster01"), Test("Simulate a node failing", f"crm_simulate -x {cts_cli_data}/crm_mon.xml -S --node-fail=cluster02"), Test("Run crm_simulate with invalid CIB (enum violation)", "crm_simulate -p -S", stdin=bad_cib, env={"PCMK_trace_functions": "apply_upgrade,pcmk__update_schema"}, expected_rc=ExitStatus.CONFIG), Test("Run crm_simulate with invalid CIB (unrecognized validate-with)", "crm_simulate -p -S", stdin=bad_version_cib, env={"PCMK_trace_functions": "apply_upgrade,pcmk__update_schema"}, expected_rc=ExitStatus.CONFIG), Test("Run crm_simulate with invalid, but possibly recoverable CIB (valid with X.Y+1)", "crm_simulate -p -S", stdin=recoverable_cib, env={"PCMK_trace_functions": "apply_upgrade,pcmk__update_schema"}), Test("Run crm_simulate with valid CIB, but without validate-with attribute", "crm_simulate -p -S", stdin=no_version_cib, env={"PCMK_trace_functions": "apply_upgrade,pcmk__update_schema"}, expected_rc=ExitStatus.CONFIG), Test("Run crm_simulate with invalid CIB, also without validate-with attribute", "crm_simulate -p -S", stdin=no_version_bad_cib, env={"PCMK_trace_functions": "apply_upgrade,pcmk__update_schema"}, expected_rc=ExitStatus.CONFIG), ] return [ ShadowTestGroup(basic_tests, create=False, env={"CIB_shadow": None}), ] class CrmDiffRegressionTest(RegressionTest): """A class for testing crm_diff.""" @property def name(self): """Return the name of this regression test.""" return "crm_diff" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" return [ Test("Create an XML patchset", f"crm_diff -o {cts_cli_data}/crm_diff_old.xml -n {cts_cli_data}/crm_diff_new.xml", expected_rc=ExitStatus.ERROR) ] class CrmMonRegressionTest(RegressionTest): """A class for testing crm_mon.""" @property def name(self): """Return the name of this regression test.""" return "crm_mon" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" basic_tests = [ make_test_group("Basic output", "crm_mon -1"), make_test_group("Output without node section", "crm_mon -1 --exclude=nodes"), # The next test doesn't need to be performed for other output formats. It's # really just a test to make sure that blank lines are correct. Test("Output with only the node section", "crm_mon -1 --exclude=all --include=nodes"), # XML includes everything already so there's no need for a complete test Test("Complete text output", "crm_mon -1 --include=all"), # XML includes detailed output already Test("Complete text output with detail", "crm_mon -1R --include=all"), Test("Complete brief text output", "crm_mon -1 --include=all --brief"), Test("Complete text output grouped by node", "crm_mon -1 --include=all --group-by-node"), # XML does not have a brief output option Test("Complete brief text output grouped by node", "crm_mon -1 --include=all --group-by-node --brief"), ValidatingTest("Output grouped by node", "crm_mon --output-as=xml --group-by-node"), make_test_group("Complete output filtered by node", "crm_mon -1 --include=all --node=cluster01"), make_test_group("Complete output filtered by tag", "crm_mon -1 --include=all --node=even-nodes"), make_test_group("Complete output filtered by resource tag", "crm_mon -1 --include=all --resource=fencing-rscs"), make_test_group("Output filtered by node that doesn't exist", "crm_mon -1 --node=blah"), Test("Basic text output with inactive resources", "crm_mon -1 -r"), # XML already includes inactive resources Test("Basic text output with inactive resources, filtered by node", "crm_mon -1 -r --node=cluster02"), make_test_group("Complete output filtered by primitive resource", "crm_mon -1 --include=all --resource=Fencing"), make_test_group("Complete output filtered by group resource", "crm_mon -1 --include=all --resource=exim-group"), Test("Complete text output filtered by group resource member", "crm_mon -1 --include=all --resource=Public-IP"), ValidatingTest("Output filtered by group resource member", "crm_mon --output-as=xml --resource=Email"), make_test_group("Complete output filtered by clone resource", "crm_mon -1 --include=all --resource=ping-clone"), make_test_group("Complete output filtered by clone resource instance", "crm_mon -1 --include=all --resource=ping"), Test("Complete text output filtered by exact clone resource instance", "crm_mon -1 --include=all --show-detail --resource=ping:0"), ValidatingTest("Output filtered by exact clone resource instance", "crm_mon --output-as=xml --resource=ping:1"), make_test_group("Output filtered by resource that doesn't exist", "crm_mon -1 --resource=blah"), Test("Basic text output with inactive resources, filtered by tag", "crm_mon -1 -r --resource=inactive-rscs"), Test("Basic text output with inactive resources, filtered by bundle resource", "crm_mon -1 -r --resource=httpd-bundle"), ValidatingTest("Output filtered by inactive bundle resource", "crm_mon --output-as=xml --resource=httpd-bundle"), Test("Basic text output with inactive resources, filtered by bundled IP address resource", "crm_mon -1 -r --resource=httpd-bundle-ip-192.168.122.131"), ValidatingTest("Output filtered by bundled IP address resource", "crm_mon --output-as=xml --resource=httpd-bundle-ip-192.168.122.132"), Test("Basic text output with inactive resources, filtered by bundled container", "crm_mon -1 -r --resource=httpd-bundle-docker-1"), ValidatingTest("Output filtered by bundled container", "crm_mon --output-as=xml --resource=httpd-bundle-docker-2"), Test("Basic text output with inactive resources, filtered by bundle connection", "crm_mon -1 -r --resource=httpd-bundle-0"), ValidatingTest("Output filtered by bundle connection", "crm_mon --output-as=xml --resource=httpd-bundle-0"), Test("Basic text output with inactive resources, filtered by bundled primitive resource", "crm_mon -1 -r --resource=httpd"), ValidatingTest("Output filtered by bundled primitive resource", "crm_mon --output-as=xml --resource=httpd"), Test("Complete text output, filtered by clone name in cloned group", "crm_mon -1 --include=all --show-detail --resource=mysql-clone-group"), ValidatingTest("Output, filtered by clone name in cloned group", "crm_mon --output-as=xml --resource=mysql-clone-group"), Test("Complete text output, filtered by group name in cloned group", "crm_mon -1 --include=all --show-detail --resource=mysql-group"), ValidatingTest("Output, filtered by group name in cloned group", "crm_mon --output-as=xml --resource=mysql-group"), Test("Complete text output, filtered by exact group instance name in cloned group", "crm_mon -1 --include=all --show-detail --resource=mysql-group:1"), ValidatingTest("Output, filtered by exact group instance name in cloned group", "crm_mon --output-as=xml --resource=mysql-group:1"), Test("Complete text output, filtered by primitive name in cloned group", "crm_mon -1 --include=all --show-detail --resource=mysql-proxy"), ValidatingTest("Output, filtered by primitive name in cloned group", "crm_mon --output-as=xml --resource=mysql-proxy"), Test("Complete text output, filtered by exact primitive instance name in cloned group", "crm_mon -1 --include=all --show-detail --resource=mysql-proxy:1"), ValidatingTest("Output, filtered by exact primitive instance name in cloned group", "crm_mon --output-as=xml --resource=mysql-proxy:1"), ] partial_tests = [ Test("Output of partially active resources", "crm_mon -1 --show-detail"), ValidatingTest("Output of partially active resources", "crm_mon --output-as=xml"), Test("Output of partially active resources, with inactive resources", "crm_mon -1 -r --show-detail"), # XML already includes inactive resources Test("Complete brief text output, with inactive resources", "crm_mon -1 -r --include=all --brief --show-detail"), # XML does not have a brief output option Test("Text output of partially active group", "crm_mon -1 --resource=partially-active-group"), Test("Text output of partially active group, with inactive resources", "crm_mon -1 --resource=partially-active-group -r"), Test("Text output of active member of partially active group", "crm_mon -1 --resource=dummy-1"), Test("Text output of inactive member of partially active group", "crm_mon -1 --resource=dummy-2 --show-detail"), Test("Complete brief text output grouped by node, with inactive resources", "crm_mon -1 -r --include=all --group-by-node --brief --show-detail"), Test("Text output of partially active resources, with inactive resources, filtered by node", "crm_mon -1 -r --node=cluster01"), ValidatingTest("Output of partially active resources, filtered by node", "crm_mon --output-as=xml --node=cluster01"), ] unmanaged_tests = [ make_test_group("Output of active unmanaged resource on offline node", "crm_mon -1"), Test("Brief text output of active unmanaged resource on offline node", "crm_mon -1 --brief"), Test("Brief text output of active unmanaged resource on offline node, grouped by node", "crm_mon -1 --brief --group-by-node"), ] maint1_tests = [ make_test_group("Output of all resources with maintenance-mode enabled", "crm_mon -1 -r", setup="crm_attribute -n maintenance-mode -v true", teardown="crm_attribute -n maintenance-mode -v false"), make_test_group("Output of all resources with maintenance enabled for a node", "crm_mon -1 -r", setup="crm_attribute -n maintenance -N cluster02 -v true", teardown="crm_attribute -n maintenance -N cluster02 -v false"), ] maint2_tests = [ # The fence resource is excluded, for comparison make_test_group("Output of all resources with maintenance meta attribute true", "crm_mon -1 -r"), ] t180_tests = [ Test("Text output of guest node's container on different node from its remote resource", "crm_mon -1"), Test("Complete text output of guest node's container on different node from its remote resource", "crm_mon -1 --show-detail"), ] return [ TestGroup(basic_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon.xml"}), Test("Check that CIB_file=\"-\" works", "crm_mon -1", env={"CIB_file": "-"}, stdin=pathlib.Path(f"{cts_cli_data}/crm_mon.xml")), TestGroup(partial_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon-partial.xml"}), TestGroup(unmanaged_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon-unmanaged.xml"}), TestGroup(maint1_tests, cib_gen=partial(copy_existing_cib, f"{cts_cli_data}/crm_mon.xml")), TestGroup(maint2_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon-rsc-maint.xml"}), TestGroup(t180_tests, env={"CIB_file": f"{cts_cli_data}/crm_mon-T180.xml"}), ] class AclsRegressionTest(RegressionTest): """A class for testing access control lists.""" @property def name(self): """Return the name of this regression test.""" return "acls" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" acl_cib = """ """ basic_tests = [ Test("Configure some ACLs", "cibadmin -M -o acls -p", update_cib=True, stdin=acl_cib), Test("Enable ACLs", "crm_attribute -n enable-acl -v true", update_cib=True), Test("Set cluster option", "crm_attribute -n no-quorum-policy -v ignore", update_cib=True), Test("New ACL role", """cibadmin --create -o acls --xml-text ''""", update_cib=True), Test("New ACL target", """cibadmin --create -o acls --xml-text ''""", update_cib=True), Test("Another ACL role", """cibadmin --create -o acls --xml-text ''""", update_cib=True), Test("Another ACL target", """cibadmin --create -o acls --xml-text ''""", update_cib=True), Test("Updated ACL", """cibadmin --replace -o acls --xml-text ''""", update_cib=True), ] no_acl_tests = [ Test("unknownguy: Query configuration", "cibadmin -Q", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("unknownguy: Set enable-acl", "crm_attribute -n enable-acl -v false", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("unknownguy: Set stonith-enabled", "crm_attribute -n stonith-enabled -v false", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("unknownguy: Create a resource", """cibadmin -C -o resources --xml-text ''""", expected_rc=ExitStatus.INSUFFICIENT_PRIV), ] deny_cib_tests = [ Test("l33t-haxor: Query configuration", "cibadmin -Q", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("l33t-haxor: Set enable-acl", "crm_attribute -n enable-acl -v false", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("l33t-haxor: Set stonith-enabled", "crm_attribute -n stonith-enabled -v false", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("l33t-haxor: Create a resource", """cibadmin -C -o resources --xml-text ''""", expected_rc=ExitStatus.INSUFFICIENT_PRIV), ] observer_tests = [ Test("niceguy: Query configuration", "cibadmin -Q"), Test("niceguy: Set enable-acl", "crm_attribute -n enable-acl -v false", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("niceguy: Set stonith-enabled", "crm_attribute -n stonith-enabled -v false", update_cib=True), Test("niceguy: Create a resource", """cibadmin -C -o resources --xml-text ''""", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("root: Query configuration", "cibadmin -Q", env={"CIB_user": "root"}), Test("root: Set stonith-enabled", "crm_attribute -n stonith-enabled -v true", update_cib=True, env={"CIB_user": "root"}), Test("root: Create a resource", """cibadmin -C -o resources --xml-text ''""", update_cib=True, env={"CIB_user": "root"}), # For use with later tests Test("root: Create another resource (with description)", """cibadmin -C -o resources --xml-text ''""", update_cib=True, env={"CIB_user": "root"}), ] deny_cib_2_tests = [ Test("l33t-haxor: Create a resource meta attribute", "crm_resource -r dummy --meta -p target-role -v Stopped", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("l33t-haxor: Query a resource meta attribute", "crm_resource -r dummy --meta -g target-role", expected_rc=ExitStatus.INSUFFICIENT_PRIV), Test("l33t-haxor: Remove a resource meta attribute", "crm_resource -r dummy --meta -d target-role", expected_rc=ExitStatus.INSUFFICIENT_PRIV), ] observer_2_tests = [ Test("niceguy: Create a resource meta attribute", "crm_resource -r dummy --meta -p target-role -v Stopped", update_cib=True), Test("niceguy: Query a resource meta attribute", "crm_resource -r dummy --meta -g target-role", update_cib=True), Test("niceguy: Remove a resource meta attribute", "crm_resource -r dummy --meta -d target-role", update_cib=True), Test("niceguy: Create a resource meta attribute", "crm_resource -r dummy --meta -p target-role -v Started", update_cib=True), ] read_meta_tests = [ Test("badidea: Query configuration - implied deny", "cibadmin -Q"), ] deny_cib_3_tests = [ Test("betteridea: Query configuration - explicit deny", "cibadmin -Q"), ] replace_tests = [ TestGroup([ AclTest("niceguy: Replace - remove acls", "cibadmin --replace -p", setup="cibadmin --delete --xml-text ''", expected_rc=ExitStatus.INSUFFICIENT_PRIV), AclTest("niceguy: Replace - create resource", "cibadmin --replace -p", setup="""cibadmin -C -o resources --xml-text ''""", expected_rc=ExitStatus.INSUFFICIENT_PRIV), AclTest("niceguy: Replace - modify attribute (deny)", "cibadmin --replace -p", setup="crm_attribute -n enable-acl -v false", expected_rc=ExitStatus.INSUFFICIENT_PRIV), AclTest("niceguy: Replace - delete attribute (deny)", "cibadmin --replace -p", setup="""cibadmin --replace --xml-text ''""", expected_rc=ExitStatus.INSUFFICIENT_PRIV), AclTest("niceguy: Replace - create attribute (deny)", "cibadmin --replace -p", setup="""cibadmin --modify --xml-text ''""", expected_rc=ExitStatus.INSUFFICIENT_PRIV), ], env={"CIB_user": "niceguy"}), # admin role TestGroup([ AclTest("bob: Replace - create attribute (direct allow)", "cibadmin --replace -o resources -p", setup="""cibadmin --modify --xml-text ''"""), AclTest("bob: Replace - modify attribute (direct allow)", "cibadmin --replace -o resources -p", setup="""cibadmin --modify --xml-text ''"""), AclTest("bob: Replace - delete attribute (direct allow)", "cibadmin --replace -o resources -p", setup="""cibadmin --replace -o resources --xml-text ''"""), ], env={"CIB_user": "bob"}), # super_user role TestGroup([ AclTest("joe: Replace - create attribute (inherited allow)", "cibadmin --replace -o resources -p", setup="""cibadmin --modify --xml-text ''"""), AclTest("joe: Replace - modify attribute (inherited allow)", "cibadmin --replace -o resources -p", setup="""cibadmin --modify --xml-text ''"""), AclTest("joe: Replace - delete attribute (inherited allow)", "cibadmin --replace -o resources -p", setup="""cibadmin --replace -o resources --xml-text ''"""), ], env={"CIB_user": "joe"}), # rsc_writer role TestGroup([ AclTest("mike: Replace - create attribute (allow overrides deny)", "cibadmin --replace -o resources -p", setup="""cibadmin --modify --xml-text ''"""), AclTest("mike: Replace - modify attribute (allow overrides deny)", "cibadmin --replace -o resources -p", setup="""cibadmin --modify --xml-text ''"""), AclTest("mike: Replace - delete attribute (allow overrides deny)", "cibadmin --replace -o resources -p", setup="""cibadmin --replace -o resources --xml-text ''"""), # Create an additional resource for deny-overrides-allow testing AclTest("mike: Create another resource", """cibadmin -C -o resources --xml-text ''""", update_cib=True), ], env={"CIB_user": "mike"}), # rsc_denied role TestGroup([ AclTest("chris: Replace - create attribute (deny overrides allow)", "cibadmin --replace -o resources -p", setup="""cibadmin --modify --xml-text ''""", expected_rc=ExitStatus.INSUFFICIENT_PRIV), AclTest("chris: Replace - modify attribute (deny overrides allow)", "cibadmin --replace -o resources -p", setup="""cibadmin --modify --xml-text ''""", expected_rc=ExitStatus.INSUFFICIENT_PRIV), AclTest("chris: Replace - delete attribute (deny overrides allow)", "cibadmin --replace -o resources -p", setup="""cibadmin --replace -o resources --xml-text ''""", expected_rc=ExitStatus.INSUFFICIENT_PRIV), ], env={"CIB_user": "chris"}), ] loop_tests = [ # no ACL TestGroup(no_acl_tests, env={"CIB_user": "unknownguy"}), # deny /cib permission TestGroup(deny_cib_tests, env={"CIB_user": "l33t-haxor"}), # observer role TestGroup(observer_tests, env={"CIB_user": "niceguy"}), # deny /cib permission TestGroup(deny_cib_2_tests, env={"CIB_user": "l33t-haxor"}), # observer role TestGroup(observer_2_tests, env={"CIB_user": "niceguy"}), # read //meta_attributes TestGroup(read_meta_tests, env={"CIB_user": "badidea"}), # deny /cib, read //meta_attributes TestGroup(deny_cib_3_tests, env={"CIB_user": "betteridea"}), ] + replace_tests return [ ShadowTestGroup(basic_tests + [ TestGroup(loop_tests, env={"PCMK_trace_functions": "pcmk__check_acl,pcmk__apply_creation_acl"})]), ] class ValidityRegressionTest(RegressionTest): """A class for testing CIB validity.""" @property def name(self): """Return the name of this regression test.""" return "validity" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" basic_tests = [ # sanitize_output() strips out validate-with, so there's no point in # outputting the CIB after tests that modify it Test("Try to set unrecognized validate-with", "cibadmin -M --xml-text ''", expected_rc=ExitStatus.CONFIG), Test("Try to remove validate-with attribute", "cibadmin -R -p", stdin=StdinCmd("""cibadmin -Q | sed 's#validate-with="[^"]*"##'"""), expected_rc=ExitStatus.CONFIG), Test("Try to use rsc_order first-action value disallowed by schema", "cibadmin -M -o constraints --xml-text ''", expected_rc=ExitStatus.CONFIG, update_cib=True), Test("Try to use configuration legal only with schema after configured one", "cibadmin -C -o configuration --xml-text ''", expected_rc=ExitStatus.CONFIG, update_cib=True), Test("Disable schema validation", "cibadmin -M --xml-text ''", expected_rc=ExitStatus.OK), Test("Set invalid rsc_order first-action value (schema validation disabled)", "cibadmin -M -o constraints --xml-text ''", expected_rc=ExitStatus.OK, update_cib=True), Test("Run crm_simulate with invalid rsc_order first-action " "(schema validation disabled)", "crm_simulate -SL", expected_rc=ExitStatus.OK), ] basic_tests_setup = [ """cibadmin -C -o resources --xml-text ''""", """cibadmin -C -o resources --xml-text ''""", """cibadmin -C -o constraints --xml-text ''""", ] return [ ShadowTestGroup(basic_tests, validate_with="pacemaker-1.2", setup=basic_tests_setup, env={"PCMK_trace_functions": "apply_upgrade,pcmk__update_schema,invert_action"}), ] class UpgradeRegressionTest(RegressionTest): """A class for testing upgrading the CIB.""" @property def name(self): """Return the name of this regression test.""" return "upgrade" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" resource_cib = """ """ basic_tests = [ Test("Set stonith-enabled=false", "crm_attribute -n stonith-enabled -v false", update_cib=True), Test("Configure the initial resource", "cibadmin -M -o resources -p", update_cib=True, stdin=resource_cib), Test("Upgrade to latest CIB schema (trigger 2.10.xsl + the wrapping)", "cibadmin --upgrade --force -V -V", update_cib=True), Test("Query a resource instance attribute (shall survive)", "crm_resource -r mySmartFuse -g requires", update_cib=True), ] return [ ShadowTestGroup(basic_tests, validate_with="pacemaker-2.10", env={"PCMK_trace_functions": "apply_upgrade,pcmk__update_schema"}) ] class RulesRegressionTest(RegressionTest): """A class for testing support for CIB rules.""" @property def name(self): """Return the name of this regression test.""" return "rules" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" tomorrow = datetime.now() + timedelta(days=1) rule_cib = f""" """ usage_tests = [ make_test_group("crm_rule given no arguments", "crm_rule", expected_rc=ExitStatus.USAGE), make_test_group("crm_rule given no rule to check", "crm_rule -c", expected_rc=ExitStatus.USAGE), make_test_group("crm_rule given invalid input XML", "crm_rule -c -r blahblah -X invalidxml", expected_rc=ExitStatus.DATAERR), make_test_group("crm_rule given invalid input XML on stdin", "crm_rule -c -r blahblah -X -", stdin=StdinCmd("echo invalidxml"), expected_rc=ExitStatus.DATAERR), ] basic_tests = [ make_test_group("Try to check a rule that doesn't exist", "crm_rule -c -r blahblah", expected_rc=ExitStatus.NOSUCH), make_test_group("Try to check a rule that has too many date_expressions", "crm_rule -c -r cli-rule-too-many-date-expressions", expected_rc=ExitStatus.UNIMPLEMENT_FEATURE), make_test_group("Verify basic rule is expired", "crm_rule -c -r cli-prefer-rule-dummy-expired", expected_rc=ExitStatus.EXPIRED), make_test_group("Verify basic rule worked in the past", "crm_rule -c -r cli-prefer-rule-dummy-expired -d 20180101"), make_test_group("Verify basic rule is not yet in effect", "crm_rule -c -r cli-prefer-rule-dummy-not-yet", expected_rc=ExitStatus.NOT_YET_IN_EFFECT), make_test_group("Verify date_spec rule with years has expired", "crm_rule -c -r cli-prefer-rule-dummy-date_spec-only-years", expected_rc=ExitStatus.EXPIRED), make_test_group("Verify multiple rules at once", "crm_rule -c -r cli-prefer-rule-dummy-not-yet -r cli-prefer-rule-dummy-date_spec-only-years", expected_rc=ExitStatus.EXPIRED), make_test_group("Verify date_spec rule with years is in effect", "crm_rule -c -r cli-prefer-rule-dummy-date_spec-only-years -d 20190201"), make_test_group("Try to check a rule whose date_spec does not contain years=", "crm_rule -c -r cli-prefer-rule-dummy-date_spec-without-years", expected_rc=ExitStatus.UNIMPLEMENT_FEATURE), make_test_group("Try to check a rule with no date_expression", "crm_rule -c -r cli-no-date_expression-rule", expected_rc=ExitStatus.UNIMPLEMENT_FEATURE), ] return usage_tests + [ TestGroup(basic_tests, cib_gen=partial(write_cib, rule_cib)) ] class FeatureSetRegressionTest(RegressionTest): """A class for testing support for version-specific features.""" @property def name(self): """Return the name of this regression test.""" return "feature_set" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" basic_tests = [ # Import the test CIB Test("Import the test CIB", f"cibadmin --replace --xml-file {cts_cli_data}/crm_mon-feature_set.xml", update_cib=True), Test("Complete text output, no mixed status", "crm_mon -1 --show-detail"), ValidatingTest("Output, no mixed status", "crm_mon --output-as=xml"), # Modify the CIB to fake that the cluster has mixed versions Test("Fake inconsistent feature set", "crm_attribute --node=cluster02 --name=#feature-set --update=3.15.0 --lifetime=reboot", update_cib=True), Test("Complete text output, mixed status", "crm_mon -1 --show-detail"), ValidatingTest("Output, mixed status", "crm_mon --output-as=xml"), ] return [ ShadowTestGroup(basic_tests), ] # Tests that depend on resource agents and must be run in an installed # environment class AgentRegressionTest(RegressionTest): """A class for testing resource agents.""" @property def name(self): """Return the name of this regression test.""" return "agents" @property def tests(self): """A list of Test instances to be run as part of this regression test.""" return [ make_test_group("Validate a valid resource configuration", "crm_resource --validate --class ocf --provider pacemaker --agent Dummy"), # Make the Dummy configuration invalid (op_sleep can't be a generic string) make_test_group("Validate an invalid resource configuration", "crm_resource --validate --class ocf --provider pacemaker --agent Dummy", expected_rc=ExitStatus.NOT_CONFIGURED, env={"OCF_RESKEY_op_sleep": "asdf"}), ] def build_options(): """Handle command line arguments.""" parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description="Command line tool regression tests", epilog=f"Default tests: {' '.join(default_tests)}\n" "Other tests: agents (must be run in an installed environment)") parser.add_argument("-j", "--jobs", metavar="JOBS", default=cpu_count() - 1, type=int, help="The number of tests to run simultaneously") parser.add_argument("-p", "--path", metavar="DIR", action="append", help="Look for executables in DIR (may be specified multiple times)") parser.add_argument("-r", "--run-only", metavar="TEST", choices=default_tests + ["tools"] + other_tests, action="append", help="Run only specified tests (may be specified multiple times)") parser.add_argument("-s", "--save", action="store_true", help="Save actual output as expected output") parser.add_argument("-v", "--valgrind", action="store_true", help="Run all commands under valgrind") parser.add_argument("-V", "--verbose", action="store_true", help="Display any differences from expected output") args = parser.parse_args() if args.path is None: args.path = [] return args def setup_environment(valgrind): """Set various environment variables needed for operation.""" if valgrind: os.environ["G_SLICE"] = "always-malloc" # Ensure all command output is in portable locale for comparison os.environ["LC_ALL"] = "C" # Log test errors to stderr os.environ["PCMK_stderr"] = "1" # Because we will change the value of PCMK_trace_functions and then reset it # back to some initial value at various points, it's easiest to assume it is # defined but empty by default if "PCMK_trace_functions" not in os.environ: os.environ["PCMK_trace_functions"] = "" def path_prepend(p): """Add another directory to the front of $PATH.""" old = os.environ["PATH"] os.environ["PATH"] = f"{p}:{old}" def setup_path(opts_path): """Set the PATH environment variable appropriately for the tests.""" srcdir = os.path.dirname(test_home) # Add any search paths given on the command line for p in opts_path: path_prepend(p) if os.path.exists(f"{srcdir}/tools/crm_simulate"): print(f"Using local binaries from: {srcdir}") path_prepend(f"{srcdir}/tools") for daemon in ["based", "controld", "fenced", "schedulerd"]: path_prepend(f"{srcdir}/daemons/{daemon}") print(f"Using local schemas from: {srcdir}/xml") os.environ["PCMK_schema_directory"] = f"{srcdir}/xml" else: path_prepend(BuildOptions.DAEMON_DIR) os.environ["PCMK_schema_directory"] = BuildOptions.SCHEMA_DIR def _run_one(valgrind, r): """Run and return a TestGroup object.""" # See comments in run_regression_tests. r.run(valgrind=valgrind) return r def run_regression_tests(regs, jobs, valgrind=False): """Run the given tests and return the modified objects.""" executed = [] with Pool(processes=jobs) as pool: # What we really want to do here is: # pool.map(lambda r: r.run(),regs) # # However, multiprocessing uses pickle somehow in its operation, and python # doesn't want to pickle a lambda (nor a nested function within this one). # Thus, we need to use the _run_one wrapper at the file level just to call # run(). Further, if we don't return the modified object from that and then # return the list of modified objects here, it looks like the rest of the # program will use the originals, before this was ever run. executed = pool.map(partial(_run_one, valgrind), regs) return executed def results(regs, save, verbose): """Print the output from each regression test, returning the number whose output differs.""" output_differs = 0 if verbose: print("\n\nResults") sys.stdout.flush() for r in regs: r.write() if save: dest = f"{test_home}/cli/regression.{r.name}.exp" copyfile(r.results_file, dest) r.diff(verbose) if not r.identical: output_differs += 1 return output_differs def summary(regs, output_differs, verbose): """Print the summary output for the entire test run.""" test_failures = 0 test_successes = 0 for r in regs: test_failures += r.failures test_successes += r.successes print("\n\nSummary") sys.stdout.flush() # First, print all the Passed/Failed lines from each Test run. for r in regs: print("\n".join(r.summary)) fmt = PluralFormatter() # Then, print information specific to each result possibility. Basically, # if there were failures then we print the output differences, leave the # failed output files in place, and exit with an error. Otherwise, clean up # anything that passed. if test_failures > 0 and output_differs > 0: print(fmt.format("{0} {0:plural,test} failed; see output in:", test_failures)) for r in regs: r.process_results(verbose) return ExitStatus.ERROR if test_failures > 0: print(fmt.format("{0} {0:plural,test} failed", test_failures)) for r in regs: r.process_results(verbose) return ExitStatus.ERROR if output_differs: print(fmt.format("{0} {0:plural,test} passed but output was " "unexpected; see output in:", test_successes)) for r in regs: r.process_results(verbose) return ExitStatus.DIGEST print(fmt.format("{0} {0:plural,test} passed", test_successes)) for r in regs: r.cleanup() return ExitStatus.OK regression_classes = [ AccessRenderRegressionTest, DaemonsRegressionTest, DatesRegressionTest, ErrorCodeRegressionTest, CibadminRegressionTest, CrmAttributeRegressionTest, CrmStandbyRegressionTest, CrmResourceRegressionTest, CrmTicketRegressionTest, CrmadminRegressionTest, CrmShadowRegressionTest, CrmVerifyRegressionTest, CrmSimulateRegressionTest, CrmDiffRegressionTest, CrmMonRegressionTest, AclsRegressionTest, ValidityRegressionTest, UpgradeRegressionTest, RulesRegressionTest, FeatureSetRegressionTest, AgentRegressionTest, ] def main(): """Run command line regression tests as specified by arguments.""" opts = build_options() setup_environment(opts.valgrind) setup_path(opts.path) # Filter the list of all regression test classes to include only those that # were requested on the command line. If empty, this defaults to default_tests. if not opts.run_only: opts.run_only = default_tests if opts.run_only == ["tools"]: opts.run_only = tools_tests regs = [] for cls in regression_classes: obj = cls() if obj.name in opts.run_only: regs.append(obj) regs = run_regression_tests(regs, max(1, opts.jobs), valgrind=opts.valgrind) output_differs = results(regs, opts.save, opts.verbose) rc = summary(regs, output_differs, opts.verbose) sys.exit(rc) if __name__ == "__main__": main() -# vim: set filetype=python expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=120: +# vim: set filetype=python: diff --git a/cts/cts-exec.in b/cts/cts-exec.in index 6f0147805d..26833da8f3 100644 --- a/cts/cts-exec.in +++ b/cts/cts-exec.in @@ -1,929 +1,931 @@ #!@PYTHON@ """Regression tests for Pacemaker's pacemaker-execd.""" # pylint doesn't like the module name "cts-execd" which is an invalid complaint for this file # but probably something we want to continue warning about elsewhere # pylint: disable=invalid-name # pacemaker imports need to come after we modify sys.path, which pylint will complain about. # pylint: disable=wrong-import-position __copyright__ = "Copyright 2012-2025 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import argparse import os import stat import sys import subprocess import shutil import tempfile # Where to find test binaries # Prefer the source tree if available TEST_DIR = sys.path[0] # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") # pylint: disable=comparison-of-constants,comparison-with-itself,condition-evals-to-constant if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus from pacemaker._cts.corosync import Corosync from pacemaker._cts.process import killall, exit_if_proc_running, stdout_from_command from pacemaker._cts.test import Test, Tests # File permissions for executable scripts we create EXECMODE = stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH def update_path(): # pylint: disable=protected-access """Set the PATH environment variable appropriately for the tests.""" new_path = os.environ['PATH'] if os.path.exists(f"{TEST_DIR}/cts-exec.in"): print(f"Running tests from the source tree: {BuildOptions._BUILD_DIR} ({TEST_DIR})") # For pacemaker-execd, cts-exec-helper, and pacemaker-remoted new_path = f"{BuildOptions._BUILD_DIR}/daemons/execd:{new_path}" new_path = f"{BuildOptions._BUILD_DIR}/tools:{new_path}" # For crm_resource # For pacemaker-fenced new_path = f"{BuildOptions._BUILD_DIR}/daemons/fenced:{new_path}" # For cts-support new_path = f"{BuildOptions._BUILD_DIR}/cts/support:{new_path}" else: print(f"Running tests from the install tree: {BuildOptions.DAEMON_DIR} (not {TEST_DIR})") # For cts-exec-helper, cts-support, pacemaker-execd, pacemaker-fenced, # and pacemaker-remoted new_path = f"{BuildOptions.DAEMON_DIR}:{new_path}" print(f'Using PATH="{new_path}"') os.environ['PATH'] = new_path class ExecTest(Test): """Executor for a single pacemaker-execd regression test.""" def __init__(self, name, description, **kwargs): """Create a new ExecTest instance. Arguments: name -- A unique name for this test. This can be used on the command line to specify that only a specific test should be executed. description -- A meaningful description for the test. Keyword arguments: tls -- Enable pacemaker-remoted. """ Test.__init__(self, name, description, **kwargs) self.tls = kwargs.get("tls", False) # If we are going to run the stonith resource tests, we will need to # launch and track Corosync and pacemaker-fenced. self._corosync = None self._fencer = None self._is_stonith_test = "stonith" in self.name if self.tls: self._daemon_location = "pacemaker-remoted" else: self._daemon_location = "pacemaker-execd" if self._is_stonith_test: self._corosync = Corosync(self.verbose, self.logdir, "cts-exec") self._test_tool_location = "cts-exec-helper" def _kill_daemons(self): killall([ "corosync", "pacemaker-fenced", "lt-pacemaker-fenced", "pacemaker-execd", "lt-pacemaker-execd", "cts-exec-helper", "lt-cts-exec-helper", "pacemaker-remoted", ]) def _start_daemons(self): if self._corosync: self._corosync.start(kill_first=True) # pylint: disable=consider-using-with self._fencer = subprocess.Popen(["pacemaker-fenced", "-s"]) cmd = [self._daemon_location, "-l", self.logpath] if self.verbose: cmd += ["-V"] # pylint: disable=consider-using-with self._daemon_process = subprocess.Popen(cmd) def clean_environment(self): """Clean up the host after running a test.""" if self._daemon_process: self._daemon_process.terminate() self._daemon_process.wait() if self.verbose: print("Daemon Output Start") with open(self.logpath, "rt", errors="replace", encoding="utf-8") as logfile: for line in logfile: print(line.strip()) print("Daemon Output End") if self._corosync: self._fencer.terminate() self._fencer.wait() self._corosync.stop() self._daemon_process = None self._fencer = None self._corosync = None def add_cmd(self, cmd=None, **kwargs): """Add a cts-exec-helper command to be executed as part of this test.""" if cmd is None: cmd = self._test_tool_location if cmd == self._test_tool_location: if self.verbose: kwargs["args"] += " -V " if self.tls: kwargs["args"] += " -S " kwargs["validate"] = False kwargs["check_rng"] = False kwargs["check_stderr"] = False Test.add_cmd(self, cmd, **kwargs) def run(self): """Execute this test.""" if self.tls and self._is_stonith_test: self._result_txt = f"SKIPPED - '{self.name}' - disabled when testing pacemaker_remote" print(self._result_txt) return Test.run(self) class ExecTests(Tests): """Collection of all pacemaker-execd regression tests.""" def __init__(self, **kwargs): """ Create a new ExecTests instance. Keyword arguments: tls -- Enable pacemaker-remoted. """ Tests.__init__(self, **kwargs) self.tls = kwargs.get("tls", False) self._action_timeout = "-t 9000" self._installed_files = [] self._rsc_classes = self._setup_rsc_classes() print(f"Testing resource classes {self._rsc_classes!r}") if "lsb" in self._rsc_classes: service_agent = "LSBDummy" elif "systemd" in self._rsc_classes: service_agent = "pacemaker-cts-dummyd@3" else: service_agent = "unsupported" self._common_cmds = { "ocf_reg_line": f'-c register_rsc -r ocf_test_rsc {self._action_timeout} -C ocf -P pacemaker -T Dummy', "ocf_reg_event": '-l "NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:Done"', "ocf_unreg_line": f'-c unregister_rsc -r ocf_test_rsc {self._action_timeout} ', "ocf_unreg_event": '-l "NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:Done"', "ocf_start_line": f'-c exec -r ocf_test_rsc -a start {self._action_timeout} ', "ocf_start_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:Done" ', "ocf_stop_line": f'-c exec -r ocf_test_rsc -a stop {self._action_timeout} ', "ocf_stop_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:Done" ', "ocf_monitor_line": f'-c exec -r ocf_test_rsc -a monitor -i 2s {self._action_timeout} ', "ocf_monitor_event": f'-l "NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout} ', "ocf_cancel_line": f'-c cancel -r ocf_test_rsc -a monitor -i 2s {self._action_timeout} ', "ocf_cancel_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled" ', "systemd_reg_line": f'-c register_rsc -r systemd_test_rsc {self._action_timeout} -C systemd -T pacemaker-cts-dummyd@3', "systemd_reg_event": '-l "NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:Done"', "systemd_unreg_line": f'-c unregister_rsc -r systemd_test_rsc {self._action_timeout} ', "systemd_unreg_event": '-l "NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:Done"', "systemd_start_line": f'-c exec -r systemd_test_rsc -a start {self._action_timeout} ', "systemd_start_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:Done" ', "systemd_stop_line": f'-c exec -r systemd_test_rsc -a stop {self._action_timeout} ', "systemd_stop_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:Done" ', "systemd_monitor_line": f'-c exec -r systemd_test_rsc -a monitor -i 2s {self._action_timeout} ', "systemd_monitor_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Done" -t 15000 ', "systemd_cancel_line": f'-c cancel -r systemd_test_rsc -a monitor -i 2s {self._action_timeout} ', "systemd_cancel_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled" ', "service_reg_line": f"-c register_rsc -r service_test_rsc {self._action_timeout} -C service -T {service_agent}", "service_reg_event": '-l "NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:Done"', "service_unreg_line": f'-c unregister_rsc -r service_test_rsc {self._action_timeout} ', "service_unreg_event": '-l "NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:Done"', "service_start_line": f'-c exec -r service_test_rsc -a start {self._action_timeout} ', "service_start_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:Done" ', "service_stop_line": f'-c exec -r service_test_rsc -a stop {self._action_timeout} ', "service_stop_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:Done" ', "service_monitor_line": f'-c exec -r service_test_rsc -a monitor -i 2s {self._action_timeout} ', "service_monitor_event": f'-l "NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout} ', "service_cancel_line": f'-c cancel -r service_test_rsc -a monitor -i 2s {self._action_timeout} ', "service_cancel_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled" ', "lsb_reg_line": f'-c register_rsc -r lsb_test_rsc {self._action_timeout} -C lsb -T LSBDummy', "lsb_reg_event": '-l "NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:Done" ', "lsb_unreg_line": f'-c unregister_rsc -r lsb_test_rsc {self._action_timeout} ', "lsb_unreg_event": '-l "NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:Done"', "lsb_start_line": f'-c exec -r lsb_test_rsc -a start {self._action_timeout} ', "lsb_start_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:Done" ', "lsb_stop_line": f'-c exec -r lsb_test_rsc -a stop {self._action_timeout} ', "lsb_stop_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:Done" ', "lsb_monitor_line": f'-c exec -r lsb_test_rsc -a status -i 2s {self._action_timeout} ', "lsb_monitor_event": f'-l "NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Done" {self._action_timeout} ', "lsb_cancel_line": f'-c cancel -r lsb_test_rsc -a status -i 2s {self._action_timeout} ', "lsb_cancel_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled" ', "stonith_reg_line": f'-c register_rsc -r stonith_test_rsc {self._action_timeout} -C stonith -P pacemaker -T fence_dummy', "stonith_reg_event": '-l "NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:Done" ', "stonith_unreg_line": f'-c unregister_rsc -r stonith_test_rsc {self._action_timeout} ', "stonith_unreg_event": '-l "NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:Done"', "stonith_start_line": f'-c exec -r stonith_test_rsc -a start {self._action_timeout} ', "stonith_start_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:Done" ', "stonith_stop_line": f'-c exec -r stonith_test_rsc -a stop {self._action_timeout} ', "stonith_stop_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:Done" ', "stonith_monitor_line": f'-c exec -r stonith_test_rsc -a monitor -i 2s {self._action_timeout} ', "stonith_monitor_event": f'-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout} ', "stonith_cancel_line": f'-c cancel -r stonith_test_rsc -a monitor -i 2s {self._action_timeout} ', "stonith_cancel_event": '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled" ', } def _setup_rsc_classes(self): """Determine which resource classes are supported.""" classes = stdout_from_command(["crm_resource", "--list-standards"]) # Strip trailing empty line classes = classes[:-1] if self.tls: classes.remove("stonith") if "systemd" in classes: try: # This code doesn't need this import, but pacemaker-cts-dummyd # does, so ensure the dependency is available rather than cause # all systemd tests to fail. # pylint: disable=import-outside-toplevel,unused-import import systemd.daemon except ImportError: print("Python systemd bindings not found.") print("The tests for systemd class are not going to be run.") classes.remove("systemd") return classes def new_test(self, name, description): """Create a named test.""" test = ExecTest(name, description, verbose=self.verbose, tls=self.tls, timeout=self.timeout, force_wait=self.force_wait, logdir=self.logdir) self._tests.append(test) return test def setup_environment(self): """Prepare the host before executing any tests.""" if BuildOptions.REMOTE_ENABLED: # @TODO Use systemctl when available, and use the subprocess module # with an argument array instead of os.system() os.system("service pacemaker_remote stop") self.cleanup_environment() # @TODO Support the option of using specified existing certificates authkey = f"{BuildOptions.PACEMAKER_CONFIG_DIR}/authkey" if self.tls and not os.path.isfile(authkey): print(f"Installing {authkey} ...") # @TODO Use os.mkdir() instead os.system(f"mkdir -p {BuildOptions.PACEMAKER_CONFIG_DIR}") # @TODO Use the subprocess module with an argument array instead os.system(f"dd if=/dev/urandom of={authkey} bs=4096 count=1") self._installed_files.append(authkey) # If we're in build directory, install agents if not already installed # pylint: disable=protected-access if os.path.exists(f"{BuildOptions._BUILD_DIR}/cts/cts-exec.in"): if not os.path.exists(f"{BuildOptions.OCF_RA_INSTALL_DIR}/pacemaker"): # @TODO remember which components were created and remove them os.makedirs(f"{BuildOptions.OCF_RA_INSTALL_DIR}/pacemaker", 0o755) for agent in ["Dummy", "Stateful", "ping"]: agent_source = f"{BuildOptions._BUILD_DIR}/extra/resources/{agent}" agent_dest = f"{BuildOptions.OCF_RA_INSTALL_DIR}/pacemaker/{agent}" if not os.path.exists(agent_dest): print(f"Installing {agent_dest} ...") shutil.copyfile(agent_source, agent_dest) os.chmod(agent_dest, EXECMODE) self._installed_files.append(agent_dest) subprocess.call(["cts-support", "install"]) def cleanup_environment(self): """Clean up the host after executing desired tests.""" for installed_file in self._installed_files: print(f"Removing {installed_file} ...") os.remove(installed_file) subprocess.call(["cts-support", "uninstall"]) def _build_cmd_str(self, rsc, ty): """Construct a command string for the given resource and type.""" return f"{self._common_cmds[f'{rsc}_{ty}_line']} {self._common_cmds[f'{rsc}_{ty}_event']}" def build_generic_tests(self): """Register tests that apply to all resource classes.""" common_cmds = self._common_cmds # register/unregister tests for rsc in self._rsc_classes: test = self.new_test(f"generic_registration_{rsc}", f"Simple resource registration test for {rsc} standard") test.add_cmd(args=self._build_cmd_str(rsc, "reg")) test.add_cmd(args=self._build_cmd_str(rsc, "unreg")) # start/stop tests for rsc in self._rsc_classes: test = self.new_test(f"generic_start_stop_{rsc}", f"Simple start and stop test for {rsc} standard") test.add_cmd(args=self._build_cmd_str(rsc, "reg")) test.add_cmd(args=self._build_cmd_str(rsc, "start")) test.add_cmd(args=self._build_cmd_str(rsc, "stop")) test.add_cmd(args=self._build_cmd_str(rsc, "unreg")) # monitor cancel test for rsc in self._rsc_classes: test = self.new_test(f"generic_monitor_cancel_{rsc}", f"Simple monitor cancel test for {rsc} standard") test.add_cmd(args=self._build_cmd_str(rsc, "reg")) test.add_cmd(args=self._build_cmd_str(rsc, "start")) test.add_cmd(args=self._build_cmd_str(rsc, "monitor")) # If this fails, that means the monitor may not be getting rescheduled test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"]) # If this fails, that means the monitor may not be getting rescheduled test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"]) test.add_cmd(args=self._build_cmd_str(rsc, "cancel")) # If this happens the monitor did not actually cancel correctly test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"], expected_exitcode=ExitStatus.TIMEOUT) # If this happens the monitor did not actually cancel correctly test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"], expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=self._build_cmd_str(rsc, "stop")) test.add_cmd(args=self._build_cmd_str(rsc, "unreg")) # monitor duplicate test for rsc in self._rsc_classes: test = self.new_test(f"generic_monitor_duplicate_{rsc}", f"Test creation and canceling of duplicate monitors for {rsc} standard") test.add_cmd(args=self._build_cmd_str(rsc, "reg")) test.add_cmd(args=self._build_cmd_str(rsc, "start")) test.add_cmd(args=self._build_cmd_str(rsc, "monitor")) # If this fails, that means the monitor may not be getting rescheduled test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"]) # If this fails, that means the monitor may not be getting rescheduled test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"]) # Add the duplicate monitors test.add_cmd(args=self._build_cmd_str(rsc, "monitor")) test.add_cmd(args=self._build_cmd_str(rsc, "monitor")) test.add_cmd(args=self._build_cmd_str(rsc, "monitor")) test.add_cmd(args=self._build_cmd_str(rsc, "monitor")) # verify we still get update events # If this fails, that means the monitor may not be getting rescheduled test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"]) # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates test.add_cmd(args=self._build_cmd_str(rsc, "cancel")) # If this happens the monitor did not actually cancel correctly test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"], expected_exitcode=ExitStatus.TIMEOUT) # If this happens the monitor did not actually cancel correctly test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"], expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=self._build_cmd_str(rsc, "stop")) test.add_cmd(args=self._build_cmd_str(rsc, "unreg")) # stop implies cancel test for rsc in self._rsc_classes: test = self.new_test(f"generic_stop_implies_cancel_{rsc}", f"Verify stopping a resource implies cancel of recurring ops for {rsc} standard") test.add_cmd(args=self._build_cmd_str(rsc, "reg")) test.add_cmd(args=self._build_cmd_str(rsc, "start")) test.add_cmd(args=self._build_cmd_str(rsc, "monitor")) # If this fails, that means the monitor may not be getting rescheduled test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"]) # If this fails, that means the monitor may not be getting rescheduled test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"]) test.add_cmd(args=self._build_cmd_str(rsc, "stop")) # If this happens the monitor did not actually cancel correctly test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"], expected_exitcode=ExitStatus.TIMEOUT) # If this happens the monitor did not actually cancel correctly test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"], expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=self._build_cmd_str(rsc, "unreg")) def build_multi_rsc_tests(self): """Register complex tests that involve managing multiple resouces of different types.""" common_cmds = self._common_cmds # do not use service and systemd at the same time, it is the same resource. # register start monitor stop unregister resources of each type at the same time test = self.new_test("multi_rsc_start_stop_all_including_stonith", "Start, monitor, and stop resources of multiple types and classes") for rsc in self._rsc_classes: test.add_cmd(args=self._build_cmd_str(rsc, "reg")) for rsc in self._rsc_classes: test.add_cmd(args=self._build_cmd_str(rsc, "start")) for rsc in self._rsc_classes: test.add_cmd(args=self._build_cmd_str(rsc, "monitor")) for rsc in self._rsc_classes: # If this fails, that means the monitor is not being rescheduled test.add_cmd(args=common_cmds[f"{rsc}_monitor_event"]) for rsc in self._rsc_classes: test.add_cmd(args=self._build_cmd_str(rsc, "cancel")) for rsc in self._rsc_classes: test.add_cmd(args=self._build_cmd_str(rsc, "stop")) for rsc in self._rsc_classes: test.add_cmd(args=self._build_cmd_str(rsc, "unreg")) def build_negative_tests(self): """Register tests related to how pacemaker-execd handles failures.""" # ocf start timeout test test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd(args=f'-c register_rsc -r test_rsc -C ocf -P pacemaker -T Dummy {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ') # -t must be less than self._action_timeout test.add_cmd(args='-c exec -r test_rsc -a start -k op_sleep -v 5 -t 1000 -w') test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:Error occurred op_status:Timed out" ' f'{self._action_timeout}') test.add_cmd(args=f'-c exec -r test_rsc -a stop {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:Done" ') test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # stonith start timeout test test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd(args=f'-c register_rsc -r test_rsc -C stonith -P pacemaker -T fence_dummy {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done"') # -t must be less than self._action_timeout test.add_cmd(args='-c exec -r test_rsc -a start -k monitor_delay -v 30 -t 1000 -w') test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:Error occurred op_status:Timed out" ' f'{self._action_timeout}') test.add_cmd(args=f'-c exec -r test_rsc -a stop {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:Done" ') test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # stonith component fail test = self.new_test("stonith_component_fail", "Kill stonith component after pacemaker-execd connects") test.add_cmd(args=self._build_cmd_str("stonith", "reg")) test.add_cmd(args=self._build_cmd_str("stonith", "start")) test.add_cmd(args='-c exec -r stonith_test_rsc -a monitor -i 600s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Done" ' f'{self._action_timeout}') test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:Error occurred op_status:error" -t 15000', kill="killall -9 -q pacemaker-fenced lt-pacemaker-fenced") test.add_cmd(args=self._build_cmd_str("stonith", "unreg")) # monitor fail for ocf resources test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") test.add_cmd(args=f'-c register_rsc -r test_rsc -C ocf -P pacemaker -T Dummy {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a monitor -i 1s {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done"') test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" ' f'{self._action_timeout}') test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" ' f'{self._action_timeout}') test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Done" {self._action_timeout} ', kill=f"rm -f {BuildOptions.LOCAL_STATE_DIR}/run/Dummy-test_rsc.state") test.add_cmd(args=f'-c cancel -r test_rsc -a monitor -i 1s {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled" ') test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Done" ' f'{self._action_timeout}', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" ' f'{self._action_timeout}', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # verify notify changes only for monitor operation test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd(args=f'-c register_rsc -r test_rsc -C ocf -P pacemaker -T Dummy {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} -o ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a monitor -i 1s {self._action_timeout} ' ' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" ') test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout}', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Done" {self._action_timeout}', kill=f"rm -f {BuildOptions.LOCAL_STATE_DIR}/run/Dummy-test_rsc.state") test.add_cmd(args=f'-c cancel -r test_rsc -a monitor -i 1s {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled" ') test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Done" {self._action_timeout}', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout}', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done"') # monitor fail for systemd resource if "systemd" in self._rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd(args=f'-c register_rsc -r test_rsc -C systemd -T pacemaker-cts-dummyd@3 {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a monitor -i 1s {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" ') test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout}') test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout}') test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Done" {self._action_timeout}', kill="pkill -9 -f pacemaker-cts-dummyd") test.add_cmd(args=f'-c cancel -r test_rsc -a monitor -i 1s {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled" ') test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Done" {self._action_timeout}', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout}', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # Cancel non-existent operation on a resource test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd(args=f'-c register_rsc -r test_rsc -C ocf -P pacemaker -T Dummy {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a monitor -i 1s {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" ') test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout}') # interval is wrong, should fail test.add_cmd(args=f'-c cancel -r test_rsc -a monitor -i 2s {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled" ', expected_exitcode=ExitStatus.ERROR) # action name is wrong, should fail test.add_cmd(args=f'-c cancel -r test_rsc -a stop -i 1s {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled" ', expected_exitcode=ExitStatus.ERROR) test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # Attempt to invoke non-existent rsc id test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:error op_status:Done" ', expected_exitcode=ExitStatus.ERROR) test.add_cmd(args=f'-c exec -r test_rsc -a stop {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:Done" ', expected_exitcode=ExitStatus.ERROR) test.add_cmd(args=f'-c exec -r test_rsc -a monitor -i 6s {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" ', expected_exitcode=ExitStatus.ERROR) test.add_cmd(args=f'-c cancel -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled" ', expected_exitcode=ExitStatus.ERROR) test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # Register and start a resource that doesn't exist, systemd if "systemd" in self._rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd(args=f'-c register_rsc -r test_rsc -C systemd -T this_is_fake1234 {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed" ') test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # Register and start a resource that doesn't exist, ocf test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd(args=f'-c register_rsc -r test_rsc -C ocf -P pacemaker -T this_is_fake1234 {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed" ') test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # Register ocf with non-existent provider test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd(args=f'-c register_rsc -r test_rsc -C ocf -P pancakes -T Dummy {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed" ') test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # Register ocf with empty provider field test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_cmd(args=f'-c register_rsc -r test_rsc -C ocf -T Dummy {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ', expected_exitcode=ExitStatus.ERROR) test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error" ', expected_exitcode=ExitStatus.ERROR) test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') def build_stress_tests(self): """Register stress tests.""" timeout = "-t 20000" iterations = 25 test = self.new_test("ocf_stress", "Verify OCF agent handling works under load") for i in range(iterations): test.add_cmd(args=f'-c register_rsc -r rsc_{i} {timeout} -C ocf -P heartbeat -T Dummy -l "NEW_EVENT event_type:register rsc_id:rsc_{i} action:none rc:ok op_status:Done"') test.add_cmd(args=f'-c exec -r rsc_{i} -a start {timeout} -l "NEW_EVENT event_type:exec_complete rsc_id:rsc_{i} action:start rc:ok op_status:Done"') test.add_cmd(args=f'-c exec -r rsc_{i} -a monitor {timeout} -i 1s ' f'-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_{i} action:monitor rc:ok op_status:Done"') for i in range(iterations): test.add_cmd(args=f'-c exec -r rsc_{i} -a stop {timeout} -l "NEW_EVENT event_type:exec_complete rsc_id:rsc_{i} action:stop rc:ok op_status:Done"') test.add_cmd(args=f'-c unregister_rsc -r rsc_{i} {timeout} -l "NEW_EVENT event_type:unregister rsc_id:rsc_{i} action:none rc:ok op_status:Done"') if "systemd" in self._rsc_classes: test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd(args=f'-c register_rsc -r rsc_{i} {timeout} -C systemd -T pacemaker-cts-dummyd@3 -l "NEW_EVENT event_type:register rsc_id:rsc_{i} action:none rc:ok op_status:Done"') test.add_cmd(args=f'-c exec -r rsc_{i} -a start {timeout} -l "NEW_EVENT event_type:exec_complete rsc_id:rsc_{i} action:start rc:ok op_status:Done"') test.add_cmd(args=f'-c exec -r rsc_{i} -a monitor {timeout} -i 1s ' f'-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_{i} action:monitor rc:ok op_status:Done"') for i in range(iterations): test.add_cmd(args=f'-c exec -r rsc_{i} -a stop {timeout} -l "NEW_EVENT event_type:exec_complete rsc_id:rsc_{i} action:stop rc:ok op_status:Done"') test.add_cmd(args=f'-c unregister_rsc -r rsc_{i} {timeout} -l "NEW_EVENT event_type:unregister rsc_id:rsc_{i} action:none rc:ok op_status:Done"') iterations = 9 timeout = "-t 30000" # Verify recurring op in-flight collision is handled in series properly test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.") test.add_cmd(args='-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy ' f'-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" {self._action_timeout}') test.add_cmd(args=f'-c exec -r test_rsc -a start {timeout} -k op_sleep -v 1 -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done"') for i in range(iterations): test.add_cmd(args=f'-c exec -r test_rsc -a monitor {timeout} -i 100{i}ms -k op_sleep -v 2 ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done"') test.add_cmd(args=f'-c exec -r test_rsc -a stop {timeout} -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:Done"') test.add_cmd(args=f'-c unregister_rsc -r test_rsc {timeout} -l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done"') def build_custom_tests(self): """Register tests that target specific cases.""" # verify resource temporary folder is created and used by OCF agents test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") test.add_cmd("ls", args=f"-al {BuildOptions.RSC_TMP_DIR}") test.add_cmd(args='-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy ' f'-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" {self._action_timeout}') test.add_cmd(args='-c exec -r test_rsc -a start -t 4000') test.add_cmd("ls", args=f"-al {BuildOptions.RSC_TMP_DIR}") test.add_cmd("ls", args=f"{BuildOptions.RSC_TMP_DIR}/Dummy-test_rsc.state") test.add_cmd(args='-c exec -r test_rsc -a stop -t 4000') test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # start delay then stop test test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd(args='-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy ' f'-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" {self._action_timeout}') test.add_cmd(args='-c exec -r test_rsc -s 6000 -a start -w -t 6000') test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" -t 2000', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" -t 6000') test.add_cmd(args=f'-c exec -r test_rsc -a stop {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:Done" ') test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # start delay, but cancel before it gets a chance to start test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd(args='-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy ' f'-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" {self._action_timeout}') test.add_cmd(args='-c exec -r test_rsc -s 5000 -a start -w -t 4000') test.add_cmd(args=f'-c cancel -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled" ') test.add_cmd(args='-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" -t 5000', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # Register a bunch of resources, verify we can get info on them test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self._rsc_classes: test.add_cmd(args=f'-c register_rsc -r rsc1 -C systemd -T pacemaker-cts-dummyd@3 {self._action_timeout}') test.add_cmd(args='-c get_rsc_info -r rsc1 ') test.add_cmd(args=f'-c unregister_rsc -r rsc1 {self._action_timeout}') test.add_cmd(args='-c get_rsc_info -r rsc1 ', expected_exitcode=ExitStatus.ERROR) test.add_cmd(args=f'-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker {self._action_timeout}') test.add_cmd(args='-c get_rsc_info -r rsc2 ') test.add_cmd(args=f'-c unregister_rsc -r rsc2 {self._action_timeout}') test.add_cmd(args='-c get_rsc_info -r rsc2 ', expected_exitcode=ExitStatus.ERROR) # Register duplicate, verify only one entry exists and can still be removed test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd(args=f'-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker {self._action_timeout}') test.add_cmd(args="-c get_rsc_info -r rsc2 ", stdout_match="id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd(args=f'-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker {self._action_timeout}') test.add_cmd(args="-c get_rsc_info -r rsc2 ", stdout_match="id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd(args=f'-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker {self._action_timeout}') test.add_cmd(args="-c get_rsc_info -r rsc2 ", stdout_match="id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd(args=f'-c unregister_rsc -r rsc2 {self._action_timeout}') test.add_cmd(args='-c get_rsc_info -r rsc2 ', expected_exitcode=ExitStatus.ERROR) # verify the option to only send notification to the original client test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd(args=f'-c register_rsc -r test_rsc -C ocf -P pacemaker -T Dummy {self._action_timeout} ' '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a start {self._action_timeout} ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Done" ') test.add_cmd(args=f'-c exec -r test_rsc -a monitor -i 1s {self._action_timeout} -n ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done"') # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_cmd(args=f'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:Done" {self._action_timeout}', expected_exitcode=ExitStatus.TIMEOUT) test.add_cmd(args='-c cancel -r test_rsc -a monitor -i 1s -t 6000 ') test.add_cmd(args=f'-c unregister_rsc -r test_rsc {self._action_timeout} ' '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:Done" ') # get metadata test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") test.add_cmd(args="-c metadata -C ocf -P pacemaker -T Dummy", stdout_match="resource-agent name=\"Dummy\"") test.add_cmd(args="-c metadata -C ocf -P pacemaker -T Stateful") test.add_cmd(args="-c metadata -P pacemaker -T Stateful", expected_exitcode=ExitStatus.ERROR) test.add_cmd(args="-c metadata -C ocf -P pacemaker -T fake_agent", expected_exitcode=ExitStatus.ERROR) # get stonith metadata test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") test.add_cmd(args="-c metadata -C stonith -P pacemaker -T fence_dummy", stdout_match="resource-agent name=\"fence_dummy\"") # get lsb metadata if "lsb" in self._rsc_classes: test = self.new_test("get_lsb_metadata", "Retrieve metadata for an LSB resource") test.add_cmd(args="-c metadata -C lsb -T LSBDummy", stdout_match="resource-agent name='LSBDummy'") # get metadata if "systemd" in self._rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd(args="-c metadata -C systemd -T pacemaker-cts-dummyd@", stdout_match="resource-agent name=\"pacemaker-cts-dummyd@\"") # get ocf providers test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd(args="-c list_ocf_providers ", stdout_match="pacemaker") test.add_cmd(args="-c list_ocf_providers -T ping", stdout_match="pacemaker") # Verify agents only exist in their lists test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") if "ocf" in self._rsc_classes: test.add_cmd(args="-c list_agents ", stdout_match="Stateful") test.add_cmd(args="-c list_agents -C ocf", stdout_match="Stateful", stdout_no_match="pacemaker-cts-dummyd@|fence_dummy") if "service" in self._rsc_classes: test.add_cmd(args="-c list_agents -C service", stdout_match="", stdout_no_match="Stateful|fence_dummy") if "lsb" in self._rsc_classes: test.add_cmd(args="-c list_agents", stdout_match="LSBDummy") test.add_cmd(args="-c list_agents -C lsb", stdout_match="LSBDummy", stdout_no_match="pacemaker-cts-dummyd@|Stateful|fence_dummy") test.add_cmd(args="-c list_agents -C service", stdout_match="LSBDummy") if "systemd" in self._rsc_classes: test.add_cmd(args="-c list_agents ", stdout_match="pacemaker-cts-dummyd@") # systemd test.add_cmd(args="-c list_agents -C systemd", stdout_match="", stdout_no_match="Stateful") # should not exist test.add_cmd(args="-c list_agents -C systemd", stdout_match="pacemaker-cts-dummyd@") test.add_cmd(args="-c list_agents -C systemd", stdout_match="", stdout_no_match="fence_dummy") # should not exist if "stonith" in self._rsc_classes: test.add_cmd(args="-c list_agents -C stonith", stdout_match="fence_dummy") # stonith test.add_cmd(args="-c list_agents -C stonith", stdout_match="", # should not exist stdout_no_match="pacemaker-cts-dummyd@") test.add_cmd(args="-c list_agents -C stonith", stdout_match="", stdout_no_match="Stateful") # should not exist test.add_cmd(args="-c list_agents ", stdout_match="fence_dummy") def build_options(): """Handle command line arguments.""" parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description="Run pacemaker-execd regression tests", epilog="Example: Run only the test 'start_stop'\n" f"\t {sys.argv[0]} --run-only start_stop\n\n" "Example: Run only the tests with the string 'systemd' present in them\n" f"\t {sys.argv[0]} --run-only-pattern systemd") parser.add_argument("-l", "--list-tests", action="store_true", help="Print out all registered tests") parser.add_argument("-p", "--run-only-pattern", metavar='PATTERN', help="Run only tests matching the given pattern") parser.add_argument("-r", "--run-only", metavar='TEST', help="Run a specific test") parser.add_argument("-t", "--timeout", type=float, default=2, help="Up to how many seconds each test case waits for the daemon to " "be initialized. Defaults to 2. The value 0 means no limit.") parser.add_argument("-w", "--force-wait", action="store_true", help="Each test case waits the default/specified --timeout for the " "daemon without tracking the log") if BuildOptions.REMOTE_ENABLED: parser.add_argument("-R", "--pacemaker-remote", action="store_true", help="Test pacemaker-remoted binary instead of pacemaker-execd") parser.add_argument("-V", "--verbose", action="store_true", help="Verbose output") args = parser.parse_args() return args def main(): """Run pacemaker-execd regression tests as specified by arguments.""" update_path() # Ensure all command output is in portable locale for comparison os.environ['LC_ALL'] = "C" opts = build_options() if opts.pacemaker_remote: exit_if_proc_running("pacemaker-remoted") else: exit_if_proc_running("corosync") exit_if_proc_running("pacemaker-execd") exit_if_proc_running("pacemaker-fenced") # Create a temporary directory for log files (the directory will # automatically be erased when done) with tempfile.TemporaryDirectory(prefix="cts-exec-") as logdir: tests = ExecTests(verbose=opts.verbose, tls=opts.pacemaker_remote, timeout=opts.timeout, force_wait=opts.force_wait, logdir=logdir) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() tests.build_custom_tests() tests.build_stress_tests() if opts.list_tests: tests.print_list() sys.exit(ExitStatus.OK) print("Starting ...") tests.setup_environment() if opts.run_only_pattern: tests.run_tests_matching(opts.run_only_pattern) tests.print_results() elif opts.run_only: tests.run_single(opts.run_only) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_environment() tests.exit() if __name__ == "__main__": main() + +# vim: set filetype=python: diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in index 30fb39f298..c6563ea313 100644 --- a/cts/cts-fencing.in +++ b/cts/cts-fencing.in @@ -1,925 +1,927 @@ #!@PYTHON@ """Regression tests for Pacemaker's fencer.""" # pylint doesn't like the module name "cts-fencing" which is an invalid complaint for this file # but probably something we want to continue warning about elsewhere # pylint: disable=invalid-name # pacemaker imports need to come after we modify sys.path, which pylint will complain about. # pylint: disable=wrong-import-position __copyright__ = "Copyright 2012-2025 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import argparse import os import sys import subprocess import tempfile # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") # pylint: disable=comparison-of-constants,comparison-with-itself,condition-evals-to-constant if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus from pacemaker._cts.corosync import Corosync, localname from pacemaker._cts.process import killall, exit_if_proc_running from pacemaker._cts.test import Test, Tests TEST_DIR = sys.path[0] def update_path(): """Set the PATH environment variable appropriately for the tests.""" new_path = os.environ['PATH'] if os.path.exists(f"{TEST_DIR}/cts-fencing.in"): # pylint: disable=protected-access print(f"Running tests from the source tree: {BuildOptions._BUILD_DIR} ({TEST_DIR})") # For pacemaker-fenced and cts-fence-helper new_path = f"{BuildOptions._BUILD_DIR}/daemons/fenced:{new_path}" new_path = f"{BuildOptions._BUILD_DIR}/tools:{new_path}" # For stonith_admin new_path = f"{BuildOptions._BUILD_DIR}/cts/support:{new_path}" # For cts-support else: print(f"Running tests from the install tree: {BuildOptions.DAEMON_DIR} (not {TEST_DIR})") # For pacemaker-fenced, cts-fence-helper, and cts-support new_path = f"{BuildOptions.DAEMON_DIR}:{new_path}" print(f'Using PATH="{new_path}"') os.environ['PATH'] = new_path class FenceTest(Test): """Executor for a single test.""" def __init__(self, name, description, **kwargs): """ Create a new FenceTest instance. Arguments: name -- A unique name for this test. This can be used on the command line to specify that only a specific test should be executed. description -- A meaningful description for the test. """ Test.__init__(self, name, description, **kwargs) self._daemon_location = "pacemaker-fenced" def _kill_daemons(self): killall(["pacemakerd", "pacemaker-fenced"]) def _start_daemons(self): cmd = ["pacemaker-fenced", "--stand-alone", "--logfile", self.logpath] if self.verbose: cmd += ["-V"] s = " ".join(cmd) print(f"Starting {s}") # pylint: disable=consider-using-with self._daemon_process = subprocess.Popen(cmd) class FenceTests(Tests): """Collection of all fencing regression tests.""" def __init__(self, **kwargs): """Create a new FenceTests instance.""" Tests.__init__(self, **kwargs) self._corosync = Corosync(self.verbose, self.logdir, "cts-fencing") def new_test(self, name, description): """Create a named test.""" test = FenceTest(name, description, verbose=self.verbose, timeout=self.timeout, force_wait=self.force_wait, logdir=self.logdir) self._tests.append(test) return test def build_api_sanity_tests(self): """Register tests to verify basic API usage.""" verbose_arg = "" if self.verbose: verbose_arg = "-V" test = self.new_test("low_level_api_test", "Sanity-test client API") test.add_cmd("cts-fence-helper", args=f"-t {verbose_arg}", validate=False) test = self.new_test("low_level_api_mainloop_test", "Sanity-test client API using mainloop") test.add_cmd("cts-fence-helper", args=f"-m {verbose_arg}", validate=False) def build_custom_timeout_tests(self): """Register tests to verify custom timeout usage.""" # custom timeout without topology test = self.new_test("custom_timeout_1", "Verify per device timeouts work as expected without using topology") test.add_cmd('stonith_admin', args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3 -o pcmk_off_timeout=1') test.add_cmd('stonith_admin', args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 -o pcmk_off_timeout=4') test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") # timeout is 5+1+4 = 10 test.add_log_pattern("Total timeout set to 12s") # custom timeout _WITH_ topology test = self.new_test("custom_timeout_2", "Verify per device timeouts work as expected _WITH_ topology") test.add_cmd('stonith_admin', args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3 -o pcmk_off_timeout=1000ms') test.add_cmd('stonith_admin', args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 -o pcmk_off_timeout=4000s') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") # timeout is 5+1+4000 = 4006 test.add_log_pattern("Total timeout set to 4807s") def build_fence_merge_tests(self): """Register tests to verify when fence operations should be merged.""" # Simple test that overlapping fencing operations get merged test = self.new_test("custom_merge_single", "Verify overlapping identical fencing operations are merged, no fencing levels used") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") # one merger will happen test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") # the pattern below signifies that both the original and duplicate operation completed test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") # Test that multiple mergers occur test = self.new_test("custom_merge_multiple", "Verify multiple overlapping identical fencing operations are merged") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o delay=2 -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") # 4 mergers should occur test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") # the pattern below signifies that both the original and duplicate operation completed test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") # Test that multiple mergers occur with topologies used test = self.new_test("custom_merge_with_topology", "Verify multiple overlapping identical fencing operations are merged with fencing levels") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") # 4 mergers should occur test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") # the pattern below signifies that both the original and duplicate operation completed test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") def build_fence_no_merge_tests(self): """Register tests to verify when fence operations should not be merged.""" test = self.new_test("custom_no_merge", "Verify differing fencing operations are not merged") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 node2") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3 node2") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 node2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -F node2 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client", negative=True) def build_standalone_tests(self): """Register a grab bag of tests.""" # test what happens when all devices timeout test = self.new_test("fence_multi_device_failure", "Verify that all devices timeout, a fencing failure is returned") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false3 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 2", expected_exitcode=ExitStatus.TIMEOUT) test.add_log_pattern("Total timeout set to 7s") test.add_log_pattern("targeting node3 using false1 returned ") test.add_log_pattern("targeting node3 using false2 returned ") test.add_log_pattern("targeting node3 using false3 returned ") # test what happens when multiple devices can fence a node, but the first device fails test = self.new_test("fence_device_failure_rollover", "Verify that when one fence device fails for a node, the others are tried") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") test.add_log_pattern("Total timeout set to 18s") # test what happens when we try to use a missing fence-agent test = self.new_test("fence_missing_agent", "Verify proper error-handling when using a non-existent fence-agent") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_missing -o mode=pass -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node2") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5", expected_exitcode=ExitStatus.NOSUCH) test.add_cmd("stonith_admin", args="--output-as=xml -F node2 -t 5") # simple topology test for one device test = self.new_test("topology_simple", "Verify all fencing devices at a level are used") test.add_cmd("stonith_admin", args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") test.add_log_pattern("Total timeout set to 6s") test.add_log_pattern("targeting node3 using true returned 0") # add topology, delete topology, verify fencing still works test = self.new_test("topology_add_remove", "Verify fencing occurrs after all topology levels are removed") test.add_cmd("stonith_admin", args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true") test.add_cmd("stonith_admin", args="--output-as=xml -d node3 -i 1") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") test.add_log_pattern("Total timeout set to 6s") test.add_log_pattern("targeting node3 using true returned 0") # test what happens when the first fencing level has multiple devices test = self.new_test("topology_device_fails", "Verify if one device in a level fails, the other is tried") test.add_cmd("stonith_admin", args='--output-as=xml -R false -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 20") test.add_log_pattern("Total timeout set to 48s") test.add_log_pattern("targeting node3 using false returned 1") test.add_log_pattern("targeting node3 using true returned 0") # test what happens when the first fencing level fails test = self.new_test("topology_multi_level_fails", "Verify if one level fails, the next leve is tried") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 3") test.add_log_pattern("Total timeout set to 21s") test.add_log_pattern("targeting node3 using false1 returned 1") test.add_log_pattern("targeting node3 using false2 returned 1") test.add_log_pattern("targeting node3 using true3 returned 0") test.add_log_pattern("targeting node3 using true4 returned 0") # test what happens when the first fencing level had devices that no one has registered test = self.new_test("topology_missing_devices", "Verify topology can continue with missing devices") test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") # Test what happens if multiple fencing levels are defined, and then the first one is removed test = self.new_test("topology_level_removal", "Verify level removal works") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") # Now remove level 2, verify none of the devices in level two are hit test.add_cmd("stonith_admin", args="--output-as=xml -d node3 -i 2") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 20") test.add_log_pattern("Total timeout set to 96s") test.add_log_pattern("targeting node3 using false1 returned 1") test.add_log_pattern("targeting node3 using false2 returned ", negative=True) test.add_log_pattern("targeting node3 using true3 returned 0") test.add_log_pattern("targeting node3 using true4 returned 0") # Test targeting a topology level by node name pattern test = self.new_test("topology_level_pattern", "Verify targeting topology by node name pattern works") test.add_cmd("stonith_admin", args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r '@node.*' -i 1 -v true") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") test.add_log_pattern("targeting node3 using true returned 0") # test allowing commas and semicolons as delimiters in pcmk_host_list test = self.new_test("host_list_delimiters", "Verify commas and semicolons can be used as pcmk_host_list delimiters") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1,node2,node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=pcmk1;pcmk2;pcmk3"') test.add_cmd("stonith_admin", args="stonith_admin --output-as=xml -F node2 -t 5") test.add_cmd("stonith_admin", args="stonith_admin --output-as=xml -F pcmk3 -t 5") test.add_log_pattern("targeting node2 using true1 returned 0") test.add_log_pattern("targeting pcmk3 using true2 returned 0") # test the stonith builds the correct list of devices that can fence a node test = self.new_test("list_devices", "Verify list of devices that can fence a node is correct") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -l node1 -V", stdout_match="true2", stdout_no_match="true1") test.add_cmd("stonith_admin", args="--output-as=xml -l node1 -V", stdout_match="true3", stdout_no_match="true1") # simple test of device monitor test = self.new_test("monitor", "Verify device is reachable") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node3"') test.add_cmd("stonith_admin", args="--output-as=xml -Q true1") test.add_cmd("stonith_admin", args="--output-as=xml -Q false1") test.add_cmd("stonith_admin", args="--output-as=xml -Q true2", expected_exitcode=ExitStatus.NOSUCH) # Verify monitor occurs for duration of timeout period on failure test = self.new_test("monitor_timeout", "Verify monitor uses duration of timeout period given") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=fail -o monitor_mode=fail -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -Q true1 -t 5", expected_exitcode=ExitStatus.ERROR) test.add_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries test = self.new_test("monitor_timeout_max_retries", "Verify monitor retries until max retry value or timeout is hit") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=fail -o monitor_mode=fail -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -Q true1 -t 15", expected_exitcode=ExitStatus.ERROR) test.add_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") # simple register test test = self.new_test("register", "Verify devices can be registered and un-registered") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -Q true1") test.add_cmd("stonith_admin", args="--output-as=xml -D true1") test.add_cmd("stonith_admin", args="--output-as=xml -Q true1", expected_exitcode=ExitStatus.NOSUCH) # simple reboot test test = self.new_test("reboot", "Verify devices can be rebooted") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -B node3 -t 5") test.add_cmd("stonith_admin", args="--output-as=xml -D true1") test.add_cmd("stonith_admin", args="--output-as=xml -Q true1", expected_exitcode=ExitStatus.NOSUCH) # test fencing history test = self.new_test("fence_history", "Verify last fencing operation is returned") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5 -V") test.add_cmd("stonith_admin", args="--output-as=xml -H node3", stdout_match='action="off" target="node3" .* status="success"') # simple test of dynamic list query test = self.new_test("dynamic_list_query", "Verify dynamic list of fencing devices can be retrieved") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -l fake_port_1", stdout_match='count="3"') # fence using dynamic list query test = self.new_test("fence_dynamic_list_query", "Verify dynamic list of fencing devices can be retrieved") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -F fake_port_1 -t 5 -V") # simple test of query using status action test = self.new_test("status_query", "Verify dynamic list of fencing devices can be retrieved") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_check=status') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_check=status') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o pcmk_host_check=status') test.add_cmd("stonith_admin", args="--output-as=xml -l fake_port_1", stdout_match='count="3"') # test what happens when no reboot action is advertised test = self.new_test("no_reboot_support", "Verify reboot action defaults to off when no reboot action is advertised by agent") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy_no_reboot -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -B node1 -t 5 -V") test.add_log_pattern("does not support reboot") test.add_log_pattern("using true1 returned 0") # make sure reboot is used when reboot action is advertised test = self.new_test("with_reboot_support", "Verify reboot action can be used when metadata advertises it") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -B node1 -t 5 -V") test.add_log_pattern("does not advertise support for 'reboot', performing 'off'", negative=True) test.add_log_pattern("using true1 returned 0") # make sure all fencing delays are applied correctly and taken into account by fencing timeouts with topology test = self.new_test("topology_delays", "Verify all fencing delays are applied correctly and taken into account by fencing timeouts with topology") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1') test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1') # Resulting "random" delay will always be 1 since (rand() % (delay_max - delay_base)) is always 0 here test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1 -o pcmk_delay_max=2') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 --delay 1") # Total fencing timeout takes all fencing delays into account test.add_log_pattern("Total timeout set to 582s") # Fencing timeout for the first device takes the requested fencing delay # and pcmk_delay_base into account test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true1 .*146s.*", regex=True) # Requested fencing delay is applied only for the first device in the # first level, with the static delay from pcmk_delay_base added test.add_log_pattern("Delaying 'off' action targeting node3 using true1 for 2s | timeout=120s requested_delay=1s base=1s max=1s") # Fencing timeout no longer takes the requested fencing delay into account for further devices test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using false1 .*145s.*", regex=True) # Requested fencing delay is no longer applied for further devices test.add_log_pattern("Delaying 'off' action targeting node3 using false1 for 1s | timeout=120s requested_delay=0s base=1s max=1s") # Fencing timeout takes pcmk_delay_max into account test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true2 .*146s.*", regex=True) test.add_log_pattern("Delaying 'off' action targeting node3 using true2 for 1s | timeout=120s requested_delay=0s base=1s max=2s") test.add_log_pattern("Delaying 'off' action targeting node3 using true3", negative=True) def build_unfence_tests(self): """Register tests that verify unfencing.""" our_uname = localname() # verify unfencing using automatic unfencing test = self.new_test("unfence_required_1", "Verify require unfencing on all devices when automatic=true in agent's metadata") test.add_cmd('stonith_admin', args=f'--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list={our_uname}"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list={our_uname}"') test.add_cmd("stonith_admin", args=f"--output-as=xml -U {our_uname} -t 3") # both devices should be executed test.add_log_pattern("using true1 returned 0") test.add_log_pattern("using true2 returned 0") # verify unfencing using automatic unfencing fails if any of the required agents fail test = self.new_test("unfence_required_2", "Verify require unfencing on all devices when automatic=true in agent's metadata") test.add_cmd('stonith_admin', args=f'--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list={our_uname}"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=fail -o "pcmk_host_list={our_uname}"') test.add_cmd("stonith_admin", args=f"--output-as=xml -U {our_uname} -t 6", expected_exitcode=ExitStatus.ERROR) # verify unfencing using automatic devices with topology test = self.new_test("unfence_required_3", "Verify require unfencing on all devices even when at different topology levels") test.add_cmd('stonith_admin', args=f'--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list={our_uname} node3"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list={our_uname} node3"') test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 1 -v true1") test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 2 -v true2") test.add_cmd("stonith_admin", args=f"--output-as=xml -U {our_uname} -t 3") test.add_log_pattern("using true1 returned 0") test.add_log_pattern("using true2 returned 0") # verify unfencing using automatic devices with topology test = self.new_test("unfence_required_4", "Verify all required devices are executed even with topology levels fail") test.add_cmd('stonith_admin', args=f'--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list={our_uname} node3"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list={our_uname} node3"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R true3 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list={our_uname} node3"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R true4 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list={our_uname} node3"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list={our_uname} node3"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list={our_uname} node3"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R false3 -a fence_dummy -o mode=fail -o "pcmk_host_list={our_uname} node3"') test.add_cmd('stonith_admin', args=f'--output-as=xml -R false4 -a fence_dummy -o mode=fail -o "pcmk_host_list={our_uname} node3"') test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 1 -v true1") test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 1 -v false1") test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 2 -v false2") test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 2 -v true2") test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 2 -v false3") test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 2 -v true3") test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 3 -v false4") test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 4 -v true4") test.add_cmd("stonith_admin", args=f"--output-as=xml -U {our_uname} -t 3") test.add_log_pattern("using true1 returned 0") test.add_log_pattern("using true2 returned 0") test.add_log_pattern("using true3 returned 0") test.add_log_pattern("using true4 returned 0") def build_unfence_on_target_tests(self): """Register tests that verify unfencing that runs on the target.""" our_uname = localname() # verify unfencing using on_target device test = self.new_test("unfence_on_target_1", "Verify unfencing with on_target = true") test.add_cmd("stonith_admin", args=f'--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list={our_uname}"') test.add_cmd("stonith_admin", args=f"--output-as=xml -U {our_uname} -t 3") test.add_log_pattern("(on) to be executed on target") # verify failure of unfencing using on_target device test = self.new_test("unfence_on_target_2", "Verify failure unfencing with on_target = true") test.add_cmd("stonith_admin", args=f'--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list={our_uname} node_fake_1234"') test.add_cmd("stonith_admin", args="--output-as=xml -U node_fake_1234 -t 3", expected_exitcode=ExitStatus.NOSUCH) test.add_log_pattern("(on) to be executed on target") # verify unfencing using on_target device with topology test = self.new_test("unfence_on_target_3", "Verify unfencing with on_target = true using topology") test.add_cmd("stonith_admin", args=f'--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list={our_uname} node3"') test.add_cmd("stonith_admin", args=f'--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list={our_uname} node3"') test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 1 -v true1") test.add_cmd("stonith_admin", args=f"--output-as=xml -r {our_uname} -i 2 -v true2") test.add_cmd("stonith_admin", args=f"--output-as=xml -U {our_uname} -t 3") test.add_log_pattern("(on) to be executed on target") # verify unfencing using on_target device with topology fails when target node doesn't exist test = self.new_test("unfence_on_target_4", "Verify unfencing failure with on_target = true using topology") test.add_cmd("stonith_admin", args=f'--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list={our_uname} node_fake"') test.add_cmd("stonith_admin", args=f'--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list={our_uname} node_fake"') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -U node_fake -t 3", expected_exitcode=ExitStatus.NOSUCH) test.add_log_pattern("(on) to be executed on target") def build_remap_tests(self): """Register tests that verify remapping of reboots to off-on.""" test = self.new_test("remap_simple", "Verify sequential topology reboot is remapped to all-off-then-all-on") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake ' '-o pcmk_off_timeout=1 -o pcmk_reboot_timeout=10') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake ' '-o pcmk_off_timeout=2 -o pcmk_reboot_timeout=20') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_log_pattern("Total timeout set to 3s for peer's fencing targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") # fence_dummy sets "on" as an on_target action test.add_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("remap_simple_off", "Verify sequential topology reboot skips 'on' if " "pcmk_reboot_action=off or agent doesn't support " "'on'") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass " "-o pcmk_host_list=node_fake -o pcmk_off_timeout=1 " "-o pcmk_reboot_timeout=10 -o pcmk_reboot_action=off") test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy_no_on " "-o mode=pass -o pcmk_host_list=node_fake " "-o pcmk_off_timeout=2 -o pcmk_reboot_timeout=20") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_log_pattern("Total timeout set to 3s for peer's fencing targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") # "on" should be skipped test.add_log_pattern("Not turning node_fake back on using " "true1 because the device is configured " "to stay off") test.add_log_pattern("Not turning node_fake back on using true2" " because the agent doesn't support 'on'") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("remap_automatic", "Verify remapped topology reboot skips automatic 'on'") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy_auto_unfence ' '-o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy_auto_unfence ' '-o "mode=pass" -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test.add_log_pattern("perform 'on' action targeting node_fake using", negative=True) test.add_log_pattern("'on' failure", negative=True) test = self.new_test("remap_complex_1", "Verify remapped topology reboot in second level works if non-remapped first level fails") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 2 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("remap_complex_2", "Verify remapped topology reboot failure in second level proceeds to third level") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 2 -v true1 -v false2 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 3 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using false2") test.add_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test.add_log_pattern("perform 'reboot' action targeting node_fake using true2") test.add_log_pattern("node_fake with true3", negative=True) def build_query_tests(self): """Run stonith_admin --metadata for the fence_dummy agent and check command output.""" test = self.new_test("get_metadata", "Run stonith_admin --metadata for the fence_dummy agent") test.add_cmd("stonith_admin", args="--output-as=xml -a fence_dummy --metadata", stdout_match=' 1.""" if n == 1: return "" return "S" if __name__ == '__main__': environment = CtsLab(sys.argv[1:]) iters = environment["iterations"] tests = [] # Set the signal handler signal.signal(15, sig_handler) signal.signal(10, sig_handler) # Create the Cluster Manager object cm = None if environment["Stack"] == "corosync 2+": cm = Corosync2() else: LogFactory().log(f"Unknown stack: {environment['stack']}") sys.exit(1) if environment["TruncateLog"]: if environment["OutputFile"] is None: LogFactory().log("Ignoring truncate request because no output file specified") else: LogFactory().log(f"Truncating {environment['OutputFile']}") with open(environment["OutputFile"], "w", encoding="utf-8") as outputfile: outputfile.truncate(0) audits = audit_list(cm) if environment["ListTests"]: tests = test_list(cm, audits) LogFactory().log(f"Total {len(tests)} tests") for test in tests: LogFactory().log(test.name) sys.exit(0) elif len(environment["tests"]) == 0: tests = test_list(cm, audits) else: chosen = environment["tests"] for test_case in chosen: match = None for test in test_list(cm, audits): if test.name == test_case: match = test if not match: LogFactory().log("--choose: No applicable/valid tests chosen") sys.exit(1) else: tests.append(match) # Scenario selection if environment["scenario"] == "all-once": iters = len(tests) scenario = AllOnce(cm, [BootCluster(cm, environment)], audits, tests) elif environment["scenario"] == "sequence": scenario = Sequence(cm, [BootCluster(cm, environment)], audits, tests) elif environment["scenario"] == "boot": scenario = Boot(cm, [LeaveBooted(cm, environment)], audits, []) else: scenario = RandomTests(cm, [BootCluster(cm, environment)], audits, tests) LogFactory().log(f">>>>>>>>>>>>>>>> BEGINNING {iters!r} TEST{plural_s(iters)}") LogFactory().log(f"Stack: {environment['Stack']} ({environment['Name']})") LogFactory().log(f"Schema: {environment['Schema']}") LogFactory().log(f"Scenario: {scenario.__doc__}") LogFactory().log(f"CTS Exerciser: {environment['cts-exerciser']}") LogFactory().log(f"CTS Logfile: {environment['OutputFile']}") LogFactory().log(f"Random Seed: {environment['RandSeed']}") if "syslogd" in environment: LogFactory().log(f"Syslog variant: {environment['syslogd'].strip()}") LogFactory().log(f"System log files: {environment['LogFileName']}") if "IPBase" in environment: LogFactory().log(f"Base IP for resources: {environment['IPBase']}") LogFactory().log(f"Cluster starts at boot: {environment['at-boot']}") environment.dump() rc = environment.run(scenario, iters) sys.exit(rc) -# vim: set filetype=python expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=120: +# vim: set filetype=python: diff --git a/cts/cts-regression.in b/cts/cts-regression.in index 3f1119d3ed..1e49f4648d 100644 --- a/cts/cts-regression.in +++ b/cts/cts-regression.in @@ -1,300 +1,302 @@ #!@PYTHON@ """Convenience wrapper for running Pacemaker regression tests. Usage: cts-regression [-h] [-V] [-v] [COMPONENT ...] """ # pylint doesn't like the module name "cts-regression" which is an invalid complaint for this file # but probably something we want to continue warning about elsewhere # pylint: disable=invalid-name # pacemaker imports need to come after we modify sys.path, which pylint will complain about. # pylint: disable=wrong-import-position __copyright__ = 'Copyright 2012-2025 the Pacemaker project contributors' __license__ = 'GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY' import argparse import os import subprocess import sys import textwrap # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") # pylint: disable=comparison-of-constants,comparison-with-itself,condition-evals-to-constant if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus class Component(): """A class for running regression tests on a component. "Component" refers to a Pacemaker component, such as the scheduler. :attribute name: The name of the component. :type name: str :attribute description: The description of the component. :type description: str :attribute requires_root: Whether the component's tests must be run as root. :type requires_root: bool :attribute supports_valgrind: Whether the component's tests support running under valgrind. :type supports_valgrind: bool :attribute cmd: The command to run the component's tests, along with any required options. :type cmd: list[str] :method run([verbose=False], [valgrind=False]): Run the component's regression tests and return the result. """ def __init__(self, name, description, test_home, requires_root=False, supports_valgrind=False): """Create a new :class:`Component` instance. :param name: The name of the component. :type name: str :param description: The description of the component. :type description: str :param test_home: The directory where the component's tests reside. :type test_home: str :param requires_root: Whether the component's tests must be run as root. :type requires_root: bool :param supports_valgrind: Whether the component's tests support running under valgrind. :type supports_valgrind: bool """ self.name = name self.description = description self.requires_root = requires_root self.supports_valgrind = supports_valgrind if self.name == 'pacemaker_remote': self.cmd = [os.path.join(test_home, 'cts-exec'), '-R'] else: self.cmd = [os.path.join(test_home, f"cts-{self.name}")] def run(self, verbose=False, valgrind=False): """Run the component's regression tests and return the result. :param verbose: Whether to increase test output verbosity. :type verbose: bool :param valgrind: Whether to run the test under valgrind. :type valgrind: bool :return: The exit code from the component's test suite. :rtype: :class:`ExitStatus` """ print(f"Executing the {self.name} regression tests") print('=' * 60) cmd = self.cmd if self.requires_root and os.geteuid() != 0: print('Enter the sudo password if prompted') cmd = ['sudo'] + self.cmd if verbose: cmd.append('--verbose') if self.supports_valgrind and valgrind: cmd.append('--valgrind') try: rc = ExitStatus(subprocess.call(cmd)) except OSError as err: error_print(f"Failed to execute {self.name} tests: {err}") rc = ExitStatus.NOT_INSTALLED print('=' * 60 + '\n\n') return rc class ComponentsArgAction(argparse.Action): """A class to handle `components` arguments. This class handles special cases and cleans up the `components` list. Specifically, it does the following: * Enforce a default value of ['cli', 'scheduler']. * Replace the 'all' alias with the components that it represents. * Get rid of duplicates. The main motivation is that when the `choices` argument of :meth:`parser.add_argument()` is specified, the `default` argument must contain exactly one value (not `None` and not a list). We want our default to be a list of components, namely `cli` and `scheduler`. """ def __call__(self, parser, namespace, values, option_string=None): """Process `components` arguments.""" all_components = ['attrd', 'cli', 'exec', 'fencing', 'scheduler'] default_components = ['cli', 'scheduler'] if not values: setattr(namespace, self.dest, default_components) return # If no argument is specified, the default gets passed as a # string 'default' instead of as a list ['default']. Probably # a bug in argparse. The below gives us a list. if not isinstance(values, list): values = [values] components = set(values) # If 'all', is found, replace it with the components it represents. try: components.remove('all') components.update(set(all_components)) except KeyError: pass # Same for 'default' try: components.remove('default') components.update(set(default_components)) except KeyError: pass setattr(namespace, self.dest, sorted(list(components))) def error_print(msg): """Print an error message. :param msg: Message to print. :type msg: str """ print(f" * ERROR: {msg}") def run_components(components, verbose=False, valgrind=False): """Run components' regression tests and report results for each. :param components: A list of names of components for which to run tests. :type components: list[:class:`Component`] :return: :attr:`ExitStatus.OK` if all tests were successful, :attr:`ExitStatus.ERROR` otherwise. :rtype: :class:`ExitStatus` """ failed = [] for comp in components: rc = comp.run(verbose, valgrind) if rc != ExitStatus.OK: error_print(f"{comp.name} regression tests failed ({rc})") failed.append(comp.name) if failed: print('Failed regression tests:', end='') for comp in failed: print(f" {comp}", end='') print() return ExitStatus.ERROR return ExitStatus.OK def main(): """Run Pacemaker regression tests as specified by arguments.""" try: test_home = os.path.dirname(os.readlink(sys.argv[0])) except OSError: test_home = os.path.dirname(sys.argv[0]) # Available components components = { 'attrd': Component( 'attrd', 'Attribute manager', test_home, requires_root=True, supports_valgrind=False, ), 'cli': Component( 'cli', 'Command-line tools', test_home, requires_root=False, supports_valgrind=True, ), 'exec': Component( 'exec', 'Local resource agent executor', test_home, requires_root=True, supports_valgrind=False, ), 'fencing': Component( 'fencing', 'Fencer', test_home, requires_root=True, supports_valgrind=False, ), 'scheduler': Component( 'scheduler', 'Action scheduler', test_home, requires_root=False, supports_valgrind=True, ), } if BuildOptions.REMOTE_ENABLED: components['pacemaker_remote'] = Component( 'pacemaker_remote', 'Resource agent executor in remote mode', test_home, requires_root=True, supports_valgrind=False, ) # Build up program description description = textwrap.dedent('''\ Run Pacemaker regression tests. Available components (default components are 'cli scheduler'): ''') for name, comp in sorted(components.items()): description += f"\n {name:<20} {comp.description}" description += f'\n {"all":<20} Synonym for "cli exec fencing scheduler"' # Parse the arguments parser = argparse.ArgumentParser( description=description, formatter_class=argparse.RawDescriptionHelpFormatter, ) choices = sorted(components.keys()) + ['all', 'default'] parser.add_argument('-V', '--verbose', action='store_true', help='Increase test verbosity') parser.add_argument('-v', '--valgrind', action='store_true', help='Run test commands under valgrind') parser.add_argument('components', nargs='*', choices=choices, default='default', action=ComponentsArgAction, metavar='COMPONENT', help="One of the components to test, or 'all'") args = parser.parse_args() # Run the tests selected = [components[x] for x in args.components] rc = run_components(selected, args.verbose, args.valgrind) sys.exit(rc) if __name__ == '__main__': main() + +# vim: set filetype=python: diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in index 86dba9edf7..0f4f8e4e31 100644 --- a/cts/cts-scheduler.in +++ b/cts/cts-scheduler.in @@ -1,1737 +1,1737 @@ #!@PYTHON@ """Regression tests for Pacemaker's scheduler.""" # pylint doesn't like the module name "cts-scheduler" which is an invalid complaint for this file # but probably something we want to continue warning about elsewhere # pylint: disable=invalid-name # pacemaker imports need to come after we modify sys.path, which pylint will complain about. # pylint: disable=wrong-import-position __copyright__ = "Copyright 2004-2025 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import re import sys import stat import shlex import shutil import argparse import subprocess import platform import tempfile # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") # pylint: disable=comparison-of-constants,comparison-with-itself,condition-evals-to-constant if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus class SchedulerTest: """A single scheduler test.""" def __init__(self, name, desc, args=None): """ Create a new SchedulerTest instance. Arguments: name -- A unique name for this test. desc -- A meaningful description for the test. args -- Additional arguments to pass when running this test """ self.name = name self.desc = desc if args is None: self.args = [] else: self.args = args class SchedulerTestGroup: """Collection of scheduler regression tests.""" def __init__(self, tests): """ Create a new SchedulerTestGroup instance. Arguments: tests -- A list of SchedulerTest instances to be executed as part of this group. """ self.tests = tests # Each entry in TESTS is a group of tests, where each test consists of a # test base name, test description, and additional test arguments. # Test groups will be separated by newlines in output. TESTS = [ SchedulerTestGroup([ SchedulerTest("simple1", "Offline"), SchedulerTest("simple2", "Start"), SchedulerTest("simple3", "Start 2"), SchedulerTest("simple4", "Start Failed"), SchedulerTest("simple6", "Stop Start"), SchedulerTest("simple7", "Shutdown"), SchedulerTest("simple11", "Priority (ne)"), SchedulerTest("simple12", "Priority (eq)"), SchedulerTest("simple8", "Stickiness"), ]), SchedulerTestGroup([ SchedulerTest("group1", "Group"), SchedulerTest("group2", "Group + Native"), SchedulerTest("group3", "Group + Group"), SchedulerTest("group4", "Group + Native (nothing)"), SchedulerTest("group5", "Group + Native (move)"), SchedulerTest("group6", "Group + Group (move)"), SchedulerTest("group7", "Group colocation"), SchedulerTest("group13", "Group colocation (cant run)"), SchedulerTest("group8", "Group anti-colocation"), SchedulerTest("group9", "Group recovery"), SchedulerTest("group10", "Group partial recovery"), SchedulerTest("group11", "Group target_role"), SchedulerTest("group14", "Group stop (graph terminated)"), SchedulerTest("group15", "Negative group colocation"), SchedulerTest("bug-1573", "Partial stop of a group with two children"), SchedulerTest("bug-1718", "Mandatory group ordering - Stop group_FUN"), SchedulerTest("failed-sticky-group", "Move group on last member failure despite infinite stickiness"), SchedulerTest("failed-sticky-anticolocated-group", "Move group on last member failure despite infinite stickiness and optional anti-colocation"), SchedulerTest("bug-lf-2619", "Move group on clone failure"), SchedulerTest("group-fail", "Ensure stop order is preserved for partially active groups"), SchedulerTest("group-unmanaged", "No need to restart r115 because r114 is unmanaged"), SchedulerTest("group-unmanaged-stopped", "Make sure r115 is stopped when r114 fails"), SchedulerTest("partial-unmanaged-group", "New member in partially unmanaged group"), SchedulerTest("group-dependents", "Account for the location preferences of things colocated with a group"), SchedulerTest("group-stop-ordering", "Ensure blocked group member stop does not force other member stops"), SchedulerTest("colocate-unmanaged-group", "Respect mandatory colocations even if earlier group member is unmanaged"), SchedulerTest("coloc-with-inner-group-member", "Consider explicit colocations with inner group members"), SchedulerTest("banned-group-inner-constraints", "Group banned from current node, inner member constrained"), ]), SchedulerTestGroup([ SchedulerTest("rsc_dep1", "Must not"), SchedulerTest("rsc_dep3", "Must"), SchedulerTest("rsc_dep5", "Must not 3"), SchedulerTest("rsc_dep7", "Must 3"), SchedulerTest("rsc_dep10", "Must (but cant)"), SchedulerTest("rsc_dep2", "Must (running)"), SchedulerTest("rsc_dep8", "Must (running : alt)"), SchedulerTest("rsc_dep4", "Must (running + move)"), SchedulerTest("asymmetric", "Asymmetric - require explicit location constraints"), ]), SchedulerTestGroup([ SchedulerTest("orphan-0", "Orphan ignore"), SchedulerTest("orphan-1", "Orphan stop"), SchedulerTest("orphan-2", "Orphan stop, remove failcount"), ]), SchedulerTestGroup([ SchedulerTest("params-0", "Params: No change"), SchedulerTest("params-1", "Params: Changed"), SchedulerTest("params-2", "Params: Resource definition"), SchedulerTest("params-3", "Params: Restart instead of reload if start pending"), SchedulerTest("params-4", "Params: Reload"), SchedulerTest("params-5", "Params: Restart based on probe digest"), SchedulerTest("novell-251689", "Resource definition change + target_role=stopped"), SchedulerTest("bug-lf-2106", "Restart all anonymous clone instances after config change"), SchedulerTest("params-6", "Params: Detect reload in previously migrated resource"), SchedulerTest("nvpair-id-ref", "Support id-ref in nvpair with optional name"), SchedulerTest("not-reschedule-unneeded-monitor", "Do not reschedule unneeded monitors while resource definitions have changed"), SchedulerTest("reload-becomes-restart", "Cancel reload if restart becomes required"), SchedulerTest("restart-with-extra-op-params", "Restart if with extra operation parameters upon changes of any"), ]), SchedulerTestGroup([ SchedulerTest("target-0", "Target Role : baseline"), SchedulerTest("target-1", "Target Role : promoted"), SchedulerTest("target-2", "Target Role : invalid"), ]), SchedulerTestGroup([ SchedulerTest("base-score", "Set a node's default score for all nodes"), ]), SchedulerTestGroup([ SchedulerTest("date-1", "Dates", ["-t", "2005-020"]), SchedulerTest("date-2", "Date Spec - Pass", ["-t", "2005-020T12:30"]), SchedulerTest("date-3", "Date Spec - Fail", ["-t", "2005-020T11:30"]), SchedulerTest("origin", "Timing of recurring operations", ["-t", "2014-05-07 00:28:00"]), SchedulerTest("probe-0", "Probe (anon clone)"), SchedulerTest("probe-1", "Pending Probe"), SchedulerTest("probe-2", "Correctly re-probe cloned groups"), SchedulerTest("probe-3", "Probe (pending node)"), SchedulerTest("probe-4", "Probe (pending node + stopped resource)"), SchedulerTest("probe-pending-node", "Probe (pending node + unmanaged resource)"), SchedulerTest("failed-probe-primitive", "Maskable vs. unmaskable probe failures on primitive resources"), SchedulerTest("failed-probe-clone", "Maskable vs. unmaskable probe failures on cloned resources"), SchedulerTest("expired-failed-probe-primitive", "Maskable, expired probe failure on primitive resources"), SchedulerTest("standby", "Standby"), SchedulerTest("comments", "Comments"), ]), SchedulerTestGroup([ SchedulerTest("one-or-more-0", "Everything starts"), SchedulerTest("one-or-more-1", "Nothing starts because of A"), SchedulerTest("one-or-more-2", "D can start because of C"), SchedulerTest("one-or-more-3", "D cannot start because of B and C"), SchedulerTest("one-or-more-4", "D cannot start because of target-role"), SchedulerTest("one-or-more-5", "Start A and F even though C and D are stopped"), SchedulerTest("one-or-more-6", "Leave A running even though B is stopped"), SchedulerTest("one-or-more-7", "Leave A running even though C is stopped"), SchedulerTest("bug-5140-require-all-false", "Allow basegrp:0 to stop"), SchedulerTest("clone-require-all-1", "clone B starts node 3 and 4"), SchedulerTest("clone-require-all-2", "clone B remains stopped everywhere"), SchedulerTest("clone-require-all-3", "clone B stops everywhere because A stops everywhere"), SchedulerTest("clone-require-all-4", "clone B remains on node 3 and 4 with only one instance of A remaining"), SchedulerTest("clone-require-all-5", "clone B starts on node 1 3 and 4"), SchedulerTest("clone-require-all-6", "clone B remains active after shutting down instances of A"), SchedulerTest("clone-require-all-7", "clone A and B both start at the same time. all instances of A start before B"), SchedulerTest("clone-require-all-no-interleave-1", "C starts everywhere after A and B"), SchedulerTest("clone-require-all-no-interleave-2", "C starts on nodes 1, 2, and 4 with only one active instance of B"), SchedulerTest("clone-require-all-no-interleave-3", "C remains active when instance of B is stopped on one node and started on another"), SchedulerTest("one-or-more-unrunnable-instances", "Avoid dependencies on instances that won't ever be started"), ]), SchedulerTestGroup([ SchedulerTest("location-date-rules-1", "Use location constraints with ineffective date-based rules"), SchedulerTest("location-date-rules-2", "Use location constraints with effective date-based rules"), SchedulerTest("nvpair-date-rules-1", "Use nvpair blocks with a variety of date-based rules"), SchedulerTest("value-source", "Use location constraints with node attribute expressions using value-source"), SchedulerTest("rule-dbl-as-auto-number-match", "Floating-point rule values default to number comparison: match"), SchedulerTest("rule-dbl-as-auto-number-no-match", "Floating-point rule values default to number comparison: no match"), SchedulerTest("rule-dbl-as-integer-match", "Floating-point rule values set to integer comparison: match"), SchedulerTest("rule-dbl-as-integer-no-match", "Floating-point rule values set to integer comparison: no match"), SchedulerTest("rule-dbl-as-number-match", "Floating-point rule values set to number comparison: match"), SchedulerTest("rule-dbl-as-number-no-match", "Floating-point rule values set to number comparison: no match"), SchedulerTest("rule-dbl-parse-fail-default-str-match", "Floating-point rule values fail to parse, default to string comparison: match"), SchedulerTest("rule-dbl-parse-fail-default-str-no-match", "Floating-point rule values fail to parse, default to string comparison: no match"), SchedulerTest("rule-int-as-auto-integer-match", "Integer rule values default to integer comparison: match"), SchedulerTest("rule-int-as-auto-integer-no-match", "Integer rule values default to integer comparison: no match"), SchedulerTest("rule-int-as-integer-match", "Integer rule values set to integer comparison: match"), SchedulerTest("rule-int-as-integer-no-match", "Integer rule values set to integer comparison: no match"), SchedulerTest("rule-int-as-number-match", "Integer rule values set to number comparison: match"), SchedulerTest("rule-int-as-number-no-match", "Integer rule values set to number comparison: no match"), SchedulerTest("rule-int-parse-fail-default-str-match", "Integer rule values fail to parse, default to string comparison: match"), SchedulerTest("rule-int-parse-fail-default-str-no-match", "Integer rule values fail to parse, default to string comparison: no match"), ]), SchedulerTestGroup([ SchedulerTest("order1", "Order start 1"), SchedulerTest("order2", "Order start 2"), SchedulerTest("order3", "Order stop"), SchedulerTest("order4", "Order (multiple)"), SchedulerTest("order5", "Order (move)"), SchedulerTest("order6", "Order (move w/ restart)"), SchedulerTest("order7", "Order (mandatory)"), SchedulerTest("order-optional", "Order (score=0)"), SchedulerTest("order-required", "Order (score=INFINITY)"), SchedulerTest("bug-lf-2171", "Prevent group start when clone is stopped"), SchedulerTest("order-clone", "Clone ordering should be able to prevent startup of dependent clones"), SchedulerTest("order-sets", "Ordering for resource sets"), SchedulerTest("order-serialize", "Serialize resources without inhibiting migration"), SchedulerTest("order-serialize-set", "Serialize a set of resources without inhibiting migration"), SchedulerTest("clone-order-primitive", "Order clone start after a primitive"), SchedulerTest("clone-order-16instances", "Verify ordering of 16 cloned resources"), SchedulerTest("order-optional-keyword", "Order (optional keyword)"), SchedulerTest("order-mandatory", "Order (mandatory keyword)"), SchedulerTest("bug-lf-2493", "Don't imply colocation requirements when applying ordering constraints with clones"), SchedulerTest("ordered-set-basic-startup", "Constraint set with default order settings"), SchedulerTest("ordered-set-natural", "Allow natural set ordering"), SchedulerTest("order-wrong-kind", "Order (error)"), ]), SchedulerTestGroup([ SchedulerTest("coloc-loop", "Colocation - loop"), SchedulerTest("coloc-many-one", "Colocation - many-to-one"), SchedulerTest("coloc-list", "Colocation - many-to-one with list"), SchedulerTest("coloc-group", "Colocation - groups"), SchedulerTest("coloc-unpromoted-anti", "Anti-colocation with unpromoted shouldn't prevent promoted colocation"), SchedulerTest("coloc-attr", "Colocation based on node attributes"), SchedulerTest("coloc-negative-group", "Negative colocation with a group"), SchedulerTest("coloc-intra-set", "Intra-set colocation"), SchedulerTest("bug-lf-2435", "Colocation sets with a negative score"), SchedulerTest("coloc-clone-stays-active", "Ensure clones don't get stopped/demoted because a dependent must stop"), SchedulerTest("coloc_fp_logic", "Verify floating point calculations in colocation are working"), SchedulerTest("colo_promoted_w_native", "cl#5070 - Verify promotion order is affected when colocating promoted with primitive"), SchedulerTest("colo_unpromoted_w_native", "cl#5070 - Verify promotion order is affected when colocating unpromoted with primitive"), SchedulerTest("anti-colocation-order", "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node"), SchedulerTest("anti-colocation-promoted", "Organize order of actions for promoted resources in anti-colocations"), SchedulerTest("anti-colocation-unpromoted", "Organize order of actions for unpromoted resources in anti-colocations"), SchedulerTest("group-anticolocation", "Group with failed last member anti-colocated with another group"), SchedulerTest("group-anticolocation-2", "Group with failed last member anti-colocated with another sticky group"), SchedulerTest("group-anticolocation-3", "Group with failed last member mandatorily anti-colocated with another group"), SchedulerTest("group-anticolocation-4", "Group with failed last member anti-colocated without influence with another group"), SchedulerTest("group-anticolocation-5", "Group with failed last member anti-colocated with another group (third node allowed)"), SchedulerTest("group-colocation-failure", "Group with sole member failed, colocated with another group"), SchedulerTest("enforce-colo1", "Always enforce B with A INFINITY"), SchedulerTest("complex_enforce_colo", "Always enforce B with A INFINITY. (make sure heat-engine stops)"), SchedulerTest("coloc-dependee-should-stay", "Stickiness outweighs group colocation"), SchedulerTest("coloc-dependee-should-move", "Group colocation outweighs stickiness"), SchedulerTest("colocation-influence", "Respect colocation influence"), SchedulerTest("colocation-priority-group", "Apply group colocations in order of primary priority"), SchedulerTest("colocation-vs-stickiness", "Group stickiness outweighs anti-colocation score"), SchedulerTest("promoted-with-blocked", "Promoted role colocated with a resource with blocked start"), SchedulerTest("primitive-with-group-with-clone", "Consider group dependent when colocating with clone"), SchedulerTest("primitive-with-group-with-promoted", "Consider group dependent when colocating with promoted role"), SchedulerTest("primitive-with-unrunnable-group", "Block primitive colocated with group that can't start"), SchedulerTest("coloc-cloned-group-promoted-dependent1", "Cloned group promoted role with primitive (mandatory)"), SchedulerTest("coloc-cloned-group-promoted-dependent2", "Cloned group promoted role with primitive (optional)"), SchedulerTest("coloc-optional-promoted-dependent-moves-1", "Colocation score less than promotion score difference: move"), SchedulerTest("coloc-optional-promoted-dependent-moves-2", "Colocation score greater than promotion score difference: move"), SchedulerTest("coloc-optional-promoted-dependent-stays-1", "Colocation score greater than promotion score difference: stay"), SchedulerTest("coloc-optional-promoted-dependent-stays-2", "Colocation score less than promotion score difference: stay"), ]), SchedulerTestGroup([ SchedulerTest("rsc-sets-seq-true", "Resource Sets - sequential=false"), SchedulerTest("rsc-sets-seq-false", "Resource Sets - sequential=true"), SchedulerTest("rsc-sets-clone", "Resource Sets - Clone"), SchedulerTest("rsc-sets-promoted", "Resource Sets - Promoted"), SchedulerTest("rsc-sets-clone-1", "Resource Sets - Clone (lf#2404)"), ]), SchedulerTestGroup([ SchedulerTest("attrs1", "string: eq (and)"), SchedulerTest("attrs2", "string: lt / gt (and)"), SchedulerTest("attrs3", "string: ne (or)"), SchedulerTest("attrs4", "string: exists"), SchedulerTest("attrs5", "string: not_exists"), SchedulerTest("attrs6", "is_dc: true"), SchedulerTest("attrs7", "is_dc: false"), SchedulerTest("attrs8", "score_attribute"), SchedulerTest("per-node-attrs", "Per node resource parameters"), ]), SchedulerTestGroup([ SchedulerTest("mon-rsc-1", "Schedule Monitor - start"), SchedulerTest("mon-rsc-2", "Schedule Monitor - move"), SchedulerTest("mon-rsc-3", "Schedule Monitor - pending start"), SchedulerTest("mon-rsc-4", "Schedule Monitor - move/pending start"), ]), SchedulerTestGroup([ SchedulerTest("rec-rsc-0", "Resource Recover - no start"), SchedulerTest("rec-rsc-1", "Resource Recover - start"), SchedulerTest("rec-rsc-2", "Resource Recover - monitor"), SchedulerTest("rec-rsc-3", "Resource Recover - stop - ignore"), SchedulerTest("rec-rsc-4", "Resource Recover - stop - block"), SchedulerTest("rec-rsc-5", "Resource Recover - stop - fence"), SchedulerTest("rec-rsc-6", "Resource Recover - multiple - restart"), SchedulerTest("rec-rsc-7", "Resource Recover - multiple - stop"), SchedulerTest("rec-rsc-8", "Resource Recover - multiple - block"), SchedulerTest("rec-rsc-9", "Resource Recover - group/group"), SchedulerTest("stop-unexpected", "Recover multiply active group with stop_unexpected"), SchedulerTest("stop-unexpected-2", "Resource multiply active primitve with stop_unexpected"), SchedulerTest("monitor-recovery", "on-fail=block + resource recovery detected by recurring monitor"), SchedulerTest("stop-failure-no-quorum", "Stop failure without quorum"), SchedulerTest("stop-failure-no-fencing", "Stop failure without fencing available"), SchedulerTest("stop-failure-with-fencing", "Stop failure with fencing available"), SchedulerTest("multiple-active-block-group", "Support of multiple-active=block for resource groups"), SchedulerTest("multiple-monitor-one-failed", "Consider resource failed if any of the configured monitor operations failed"), ]), SchedulerTestGroup([ SchedulerTest("quorum-1", "No quorum - ignore"), SchedulerTest("quorum-2", "No quorum - freeze"), SchedulerTest("quorum-3", "No quorum - stop"), SchedulerTest("quorum-4", "No quorum - start anyway"), SchedulerTest("quorum-5", "No quorum - start anyway (group)"), SchedulerTest("quorum-6", "No quorum - start anyway (clone)"), SchedulerTest("bug-cl-5212", "No promotion with no-quorum-policy=freeze"), SchedulerTest("suicide-needed-inquorate", "no-quorum-policy=suicide: suicide necessary"), SchedulerTest("suicide-not-needed-initial-quorum", "no-quorum-policy=suicide: suicide not necessary at initial quorum"), SchedulerTest("suicide-not-needed-never-quorate", "no-quorum-policy=suicide: suicide not necessary if never quorate"), SchedulerTest("suicide-not-needed-quorate", "no-quorum-policy=suicide: suicide necessary if quorate"), ]), SchedulerTestGroup([ SchedulerTest("rec-node-1", "Node Recover - Startup - no fence"), SchedulerTest("rec-node-2", "Node Recover - Startup - fence"), SchedulerTest("rec-node-3", "Node Recover - HA down - no fence"), SchedulerTest("rec-node-4", "Node Recover - HA down - fence"), SchedulerTest("rec-node-5", "Node Recover - CRM down - no fence"), SchedulerTest("rec-node-6", "Node Recover - CRM down - fence"), SchedulerTest("rec-node-7", "Node Recover - no quorum - ignore"), SchedulerTest("rec-node-8", "Node Recover - no quorum - freeze"), SchedulerTest("rec-node-9", "Node Recover - no quorum - stop"), SchedulerTest("rec-node-10", "Node Recover - no quorum - stop w/fence"), SchedulerTest("rec-node-11", "Node Recover - CRM down w/ group - fence"), SchedulerTest("rec-node-12", "Node Recover - nothing active - fence"), SchedulerTest("rec-node-13", "Node Recover - failed resource + shutdown - fence"), SchedulerTest("rec-node-15", "Node Recover - unknown lrm section"), SchedulerTest("rec-node-14", "Serialize all stonith's"), ]), SchedulerTestGroup([ SchedulerTest("multi1", "Multiple Active (stop/start)"), ]), SchedulerTestGroup([ SchedulerTest("migrate-begin", "Normal migration"), SchedulerTest("migrate-success", "Completed migration"), SchedulerTest("migrate-partial-1", "Completed migration, missing stop on source"), SchedulerTest("migrate-partial-2", "Successful migrate_to only"), SchedulerTest("migrate-partial-3", "Successful migrate_to only, target down"), SchedulerTest("migrate-partial-4", "Migrate from the correct host after migrate_to+migrate_from"), SchedulerTest("bug-5186-partial-migrate", "Handle partial migration when src node loses membership"), SchedulerTest("migrate-fail-2", "Failed migrate_from"), SchedulerTest("migrate-fail-3", "Failed migrate_from + stop on source"), SchedulerTest("migrate-fail-4", "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target"), SchedulerTest("migrate-fail-5", "Failed migrate_from + stop on source and target"), SchedulerTest("migrate-fail-6", "Failed migrate_to"), SchedulerTest("migrate-fail-7", "Failed migrate_to + stop on source"), SchedulerTest("migrate-fail-8", "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target"), SchedulerTest("migrate-fail-9", "Failed migrate_to + stop on source and target"), SchedulerTest("migration-ping-pong", "Old migrate_to failure + successful migrate_from on same node"), SchedulerTest("migrate-stop", "Migration in a stopping stack"), SchedulerTest("migrate-start", "Migration in a starting stack"), SchedulerTest("migrate-stop_start", "Migration in a restarting stack"), SchedulerTest("migrate-stop-complex", "Migration in a complex stopping stack"), SchedulerTest("migrate-start-complex", "Migration in a complex starting stack"), SchedulerTest("migrate-stop-start-complex", "Migration in a complex moving stack"), SchedulerTest("migrate-shutdown", "Order the post-migration 'stop' before node shutdown"), SchedulerTest("migrate-1", "Migrate (migrate)"), SchedulerTest("migrate-2", "Migrate (stable)"), SchedulerTest("migrate-3", "Migrate (failed migrate_to)"), SchedulerTest("migrate-4", "Migrate (failed migrate_from)"), SchedulerTest("novell-252693", "Migration in a stopping stack"), SchedulerTest("novell-252693-2", "Migration in a starting stack"), SchedulerTest("novell-252693-3", "Non-Migration in a starting and stopping stack"), SchedulerTest("bug-1820", "Migration in a group"), SchedulerTest("bug-1820-1", "Non-migration in a group"), SchedulerTest("migrate-5", "Primitive migration with a clone"), SchedulerTest("migrate-fencing", "Migration after Fencing"), SchedulerTest("migrate-both-vms", "Migrate two VMs that have no colocation"), SchedulerTest("migration-behind-migrating-remote", "Migrate resource behind migrating remote connection"), SchedulerTest("1-a-then-bm-move-b", "Advanced migrate logic. A then B. migrate B"), SchedulerTest("2-am-then-b-move-a", "Advanced migrate logic, A then B, migrate A without stopping B"), SchedulerTest("3-am-then-bm-both-migrate", "Advanced migrate logic. A then B. migrate both"), SchedulerTest("4-am-then-bm-b-not-migratable", "Advanced migrate logic, A then B, B not migratable"), SchedulerTest("5-am-then-bm-a-not-migratable", "Advanced migrate logic. A then B. move both, a not migratable"), SchedulerTest("6-migrate-group", "Advanced migrate logic, migrate a group"), SchedulerTest("7-migrate-group-one-unmigratable", "Advanced migrate logic, migrate group mixed with allow-migrate true/false"), SchedulerTest("8-am-then-bm-a-migrating-b-stopping", "Advanced migrate logic, A then B, A migrating, B stopping"), SchedulerTest("9-am-then-bm-b-migrating-a-stopping", "Advanced migrate logic, A then B, B migrate, A stopping"), SchedulerTest("10-a-then-bm-b-move-a-clone", "Advanced migrate logic, A clone then B, migrate B while stopping A"), SchedulerTest("11-a-then-bm-b-move-a-clone-starting", "Advanced migrate logic, A clone then B, B moving while A is start/stopping"), SchedulerTest("a-promote-then-b-migrate", "A promote then B start. migrate B"), SchedulerTest("a-demote-then-b-migrate", "A demote then B stop. migrate B"), SchedulerTest("probe-target-of-failed-migrate_to-1", "Failed migrate_to, target rejoins"), SchedulerTest("probe-target-of-failed-migrate_to-2", "Failed migrate_to, target rejoined and probed"), SchedulerTest("partial-live-migration-multiple-active", "Prevent running on multiple nodes due to partial live migration"), SchedulerTest("migration-intermediary-cleaned", "Probe live-migration intermediary with no history"), SchedulerTest("bug-lf-2422", "Dependency on partially active group - stop ocfs:*"), ]), SchedulerTestGroup([ SchedulerTest("clone-anon-probe-1", "Probe the correct (anonymous) clone instance for each node"), SchedulerTest("clone-anon-probe-2", "Avoid needless re-probing of anonymous clones"), SchedulerTest("clone-anon-failcount", "Merge failcounts for anonymous clones"), SchedulerTest("force-anon-clone-max", "Update clone-max properly when forcing a clone to be anonymous"), SchedulerTest("anon-instance-pending", "Assign anonymous clone instance numbers properly when action pending"), SchedulerTest("inc0", "Incarnation start"), SchedulerTest("inc1", "Incarnation start order"), SchedulerTest("inc2", "Incarnation silent restart, stop, move"), SchedulerTest("inc3", "Inter-incarnation ordering, silent restart, stop, move"), SchedulerTest("inc4", "Inter-incarnation ordering, silent restart, stop, move (ordered)"), SchedulerTest("inc5", "Inter-incarnation ordering, silent restart, stop, move (restart 1)"), SchedulerTest("inc6", "Inter-incarnation ordering, silent restart, stop, move (restart 2)"), SchedulerTest("inc7", "Clone colocation"), SchedulerTest("inc8", "Clone anti-colocation"), SchedulerTest("inc9", "Non-unique clone"), SchedulerTest("inc10", "Non-unique clone (stop)"), SchedulerTest("inc11", "Primitive colocation with clones"), SchedulerTest("inc12", "Clone shutdown"), SchedulerTest("cloned-group", "Make sure only the correct number of cloned groups are started"), SchedulerTest("cloned-group-stop", "Ensure stopping qpidd also stops glance and cinder"), SchedulerTest("clone-no-shuffle", "Don't prioritize allocation of instances that must be moved"), SchedulerTest("clone-recover-no-shuffle-1", "Don't shuffle instances when starting a new primitive instance"), SchedulerTest("clone-recover-no-shuffle-2", "Don't shuffle instances when starting a new group instance"), SchedulerTest("clone-recover-no-shuffle-3", "Don't shuffle instances when starting a new bundle instance"), SchedulerTest("clone-recover-no-shuffle-4", "Don't shuffle instances when starting a new primitive instance with location preference"), SchedulerTest("clone-recover-no-shuffle-5", "Don't shuffle instances when starting a new group instance with location preference"), SchedulerTest("clone-recover-no-shuffle-6", "Don't shuffle instances when starting a new bundle instance with location preference"), SchedulerTest("clone-recover-no-shuffle-7", "Don't shuffle instances when starting a new primitive instance that will be promoted"), SchedulerTest("clone-recover-no-shuffle-8", "Don't shuffle instances when starting a new group instance that will be promoted"), SchedulerTest("clone-recover-no-shuffle-9", "Don't shuffle instances when starting a new bundle instance that will be promoted"), SchedulerTest("clone-recover-no-shuffle-10", "Don't shuffle instances when starting a new primitive instance that won't be promoted"), SchedulerTest("clone-recover-no-shuffle-11", "Don't shuffle instances when starting a new group instance that won't be promoted"), SchedulerTest("clone-recover-no-shuffle-12", "Don't shuffle instances when starting a new bundle instance that won't be promoted"), SchedulerTest("clone-max-zero", "Orphan processing with clone-max=0"), SchedulerTest("clone-anon-dup", "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node"), SchedulerTest("bug-lf-2160", "Don't shuffle clones due to colocation"), SchedulerTest("bug-lf-2213", "clone-node-max enforcement for cloned groups"), SchedulerTest("bug-lf-2153", "Clone ordering constraints"), SchedulerTest("bug-lf-2361", "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable"), SchedulerTest("bug-lf-2317", "Avoid needless restart of primitive depending on a clone"), SchedulerTest("bug-lf-2453", "Enforce mandatory clone ordering without colocation"), SchedulerTest("bug-lf-2508", "Correctly reconstruct the status of anonymous cloned groups"), SchedulerTest("bug-lf-2544", "Balanced clone placement"), SchedulerTest("bug-lf-2445", "Redistribute clones with node-max > 1 and stickiness = 0"), SchedulerTest("bug-lf-2574", "Avoid clone shuffle"), SchedulerTest("bug-lf-2581", "Avoid group restart due to unrelated clone (re)start"), SchedulerTest("bug-cl-5168", "Don't shuffle clones"), SchedulerTest("bug-cl-5170", "Prevent clone from starting with on-fail=block"), SchedulerTest("clone-fail-block-colocation", "Move colocated group when failed clone has on-fail=block"), SchedulerTest("clone-interleave-1", "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)"), SchedulerTest("clone-interleave-2", "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)"), SchedulerTest("clone-interleave-3", "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)"), SchedulerTest("rebalance-unique-clones", "Rebalance unique clone instances with no stickiness"), SchedulerTest("clone-requires-quorum-recovery", "Clone with requires=quorum on failed node needing recovery"), SchedulerTest("clone-requires-quorum", "Clone with requires=quorum with presumed-inactive instance on failed node"), ]), SchedulerTestGroup([ SchedulerTest("cloned_start_one", "order first clone then clone... first clone_min=2"), SchedulerTest("cloned_start_two", "order first clone then clone... first clone_min=2"), SchedulerTest("cloned_stop_one", "order first clone then clone... first clone_min=2"), SchedulerTest("cloned_stop_two", "order first clone then clone... first clone_min=2"), SchedulerTest("clone_min_interleave_start_one", "order first clone then clone... first clone_min=2 and then has interleave=true"), SchedulerTest("clone_min_interleave_start_two", "order first clone then clone... first clone_min=2 and then has interleave=true"), SchedulerTest("clone_min_interleave_stop_one", "order first clone then clone... first clone_min=2 and then has interleave=true"), SchedulerTest("clone_min_interleave_stop_two", "order first clone then clone... first clone_min=2 and then has interleave=true"), SchedulerTest("clone_min_start_one", "order first clone then primitive... first clone_min=2"), SchedulerTest("clone_min_start_two", "order first clone then primitive... first clone_min=2"), SchedulerTest("clone_min_stop_all", "order first clone then primitive... first clone_min=2"), SchedulerTest("clone_min_stop_one", "order first clone then primitive... first clone_min=2"), SchedulerTest("clone_min_stop_two", "order first clone then primitive... first clone_min=2"), ]), SchedulerTestGroup([ SchedulerTest("unfence-startup", "Clean unfencing"), SchedulerTest("unfence-definition", "Unfencing when the agent changes"), SchedulerTest("unfence-parameters", "Unfencing when the agent parameters changes"), SchedulerTest("unfence-device", "Unfencing when a cluster has only fence devices"), ]), SchedulerTestGroup([ SchedulerTest("promoted-0", "Stopped -> Unpromoted"), SchedulerTest("promoted-1", "Stopped -> Promote"), SchedulerTest("promoted-2", "Stopped -> Promote : notify"), SchedulerTest("promoted-3", "Stopped -> Promote : promoted location"), SchedulerTest("promoted-4", "Started -> Promote : promoted location"), SchedulerTest("promoted-5", "Promoted -> Promoted"), SchedulerTest("promoted-6", "Promoted -> Promoted (2)"), SchedulerTest("promoted-7", "Promoted -> Fenced"), SchedulerTest("promoted-8", "Promoted -> Fenced -> Moved"), SchedulerTest("promoted-9", "Stopped + Promotable + No quorum"), SchedulerTest("promoted-10", "Stopped -> Promotable : notify with monitor"), SchedulerTest("promoted-11", "Stopped -> Promote : colocation"), SchedulerTest("novell-239082", "Demote/Promote ordering"), SchedulerTest("novell-239087", "Stable promoted placement"), SchedulerTest("promoted-12", "Promotion based solely on rsc_location constraints"), SchedulerTest("promoted-13", "Include preferences of colocated resources when placing promoted"), SchedulerTest("promoted-demote", "Ordering when actions depends on demoting an unpromoted resource"), SchedulerTest("promoted-ordering", "Prevent resources from starting that need a promoted"), SchedulerTest("bug-1765", "Verify promoted-with-promoted colocation does not stop unpromoted instances"), SchedulerTest("promoted-group", "Promotion of cloned groups"), SchedulerTest("bug-lf-1852", "Don't shuffle promotable instances unnecessarily"), SchedulerTest("promoted-failed-demote", "Don't retry failed demote actions"), SchedulerTest("promoted-failed-demote-2", "Don't retry failed demote actions (notify=false)"), SchedulerTest("promoted-depend", "Ensure resources that depend on promoted instance don't get allocated until that does"), SchedulerTest("promoted-reattach", "Re-attach to a running promoted"), SchedulerTest("promoted-allow-start", "Don't include promoted score if it would prevent allocation"), SchedulerTest("promoted-colocation", "Allow promoted instances placemaker to be influenced by colocation constraints"), SchedulerTest("promoted-pseudo", "Make sure promote/demote pseudo actions are created correctly"), SchedulerTest("promoted-role", "Prevent target-role from promoting more than promoted-max instances"), SchedulerTest("bug-lf-2358", "Anti-colocation of promoted instances"), SchedulerTest("promoted-promotion-constraint", "Mandatory promoted colocation constraints"), SchedulerTest("unmanaged-promoted", "Ensure role is preserved for unmanaged resources"), SchedulerTest("promoted-unmanaged-monitor", "Start correct monitor for unmanaged promoted instances"), SchedulerTest("promoted-demote-2", "Demote does not clear past failure"), SchedulerTest("promoted-move", "Move promoted based on failure of colocated group"), SchedulerTest("promoted-probed-score", "Observe the promotion score of probed resources"), SchedulerTest("colocation_constraint_stops_promoted", "cl#5054 - Ensure promoted is demoted when stopped by colocation constraint"), SchedulerTest("colocation_constraint_stops_unpromoted", "cl#5054 - Ensure unpromoted is not demoted when stopped by colocation constraint"), SchedulerTest("order_constraint_stops_promoted", "cl#5054 - Ensure promoted is demoted when stopped by order constraint"), SchedulerTest("order_constraint_stops_unpromoted", "cl#5054 - Ensure unpromoted is not demoted when stopped by order constraint"), SchedulerTest("promoted_monitor_restart", "cl#5072 - Ensure promoted monitor operation will start after promotion"), SchedulerTest("bug-rh-880249", "Handle replacement of an m/s resource with a primitive"), SchedulerTest("bug-5143-ms-shuffle", "Prevent promoted instance shuffling due to promotion score"), SchedulerTest("promoted-demote-block", "Block promotion if demote fails with on-fail=block"), SchedulerTest("promoted-dependent-ban", "Don't stop instances from being active because a dependent is banned from that host"), SchedulerTest("promoted-stop", "Stop instances due to location constraint with role=Started"), SchedulerTest("promoted-partially-demoted-group", "Allow partially demoted group to finish demoting"), SchedulerTest("bug-cl-5213", "Ensure role colocation with -INFINITY is enforced"), SchedulerTest("bug-cl-5219", "Allow unrelated resources with a common colocation target to remain promoted"), SchedulerTest("promoted-asymmetrical-order", "Fix the behaviors of multi-state resources with asymmetrical ordering"), SchedulerTest("promoted-notify", "Promotion with notifications"), SchedulerTest("promoted-score-startup", "Use permanent promoted scores without LRM history"), SchedulerTest("failed-demote-recovery", "Recover resource in unpromoted role after demote fails"), SchedulerTest("failed-demote-recovery-promoted", "Recover resource in promoted role after demote fails"), SchedulerTest("on_fail_demote1", "Recovery with on-fail=\"demote\" on healthy cluster, remote, guest, and bundle nodes"), SchedulerTest("on_fail_demote2", "Recovery with on-fail=\"demote\" with promotion on different node"), SchedulerTest("on_fail_demote3", "Recovery with on-fail=\"demote\" with no promotion"), SchedulerTest("on_fail_demote4", "Recovery with on-fail=\"demote\" on failed cluster, remote, guest, and bundle nodes"), SchedulerTest("no_quorum_demote", "Promotable demotion and primitive stop with no-quorum-policy=\"demote\""), SchedulerTest("no-promote-on-unrunnable-guest", "Don't select bundle instance for promotion when container can't run"), SchedulerTest("leftover-pending-monitor", "Prevent a leftover pending monitor from causing unexpected stop of other instances"), ]), SchedulerTestGroup([ SchedulerTest("history-1", "Correctly parse stateful-1 resource state"), ]), SchedulerTestGroup([ SchedulerTest("managed-0", "Managed (reference)"), SchedulerTest("managed-1", "Not managed - down"), SchedulerTest("managed-2", "Not managed - up"), SchedulerTest("bug-5028", "Shutdown should block if anything depends on an unmanaged resource"), SchedulerTest("bug-5028-detach", "Ensure detach still works"), SchedulerTest("bug-5028-bottom", "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack"), SchedulerTest("unmanaged-stop-1", "cl#5155 - Block the stop of resources if any depending resource is unmanaged"), SchedulerTest("unmanaged-stop-2", "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged"), SchedulerTest("unmanaged-stop-3", "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged"), SchedulerTest("unmanaged-stop-4", "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged"), SchedulerTest("unmanaged-block-restart", "Block restart of resources if any dependent resource in a group is unmanaged"), ]), SchedulerTestGroup([ SchedulerTest("interleave-0", "Interleave (reference)"), SchedulerTest("interleave-1", "coloc - not interleaved"), SchedulerTest("interleave-2", "coloc - interleaved"), SchedulerTest("interleave-3", "coloc - interleaved (2)"), SchedulerTest("interleave-pseudo-stop", "Interleaved clone during stonith"), SchedulerTest("interleave-stop", "Interleaved clone during stop"), SchedulerTest("interleave-restart", "Interleaved clone during dependency restart"), ]), SchedulerTestGroup([ SchedulerTest("notify-0", "Notify reference"), SchedulerTest("notify-1", "Notify simple"), SchedulerTest("notify-2", "Notify simple, confirm"), SchedulerTest("notify-3", "Notify move, confirm"), SchedulerTest("novell-239079", "Notification priority"), SchedulerTest("notifs-for-unrunnable", "Don't schedule notifications for an unrunnable action"), SchedulerTest("route-remote-notify", "Route remote notify actions through correct cluster node"), SchedulerTest("notify-behind-stopping-remote", "Don't schedule notifications behind stopped remote"), ]), SchedulerTestGroup([ SchedulerTest("594", "OSDL #594 - Unrunnable actions scheduled in transition"), SchedulerTest("662", "OSDL #662 - Two resources start on one node when incarnation_node_max = 1"), SchedulerTest("696", "OSDL #696 - CRM starts stonith RA without monitor"), SchedulerTest("726", "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop"), SchedulerTest("735", "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3"), SchedulerTest("764", "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1"), SchedulerTest("797", "OSDL #797 - Assert triggered: task_id_i > max_call_id"), SchedulerTest("829", "OSDL #829"), SchedulerTest("994", "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted"), SchedulerTest("994-2", "OSDL #994 - with a dependent resource"), SchedulerTest("1360", "OSDL #1360 - Clone stickiness"), SchedulerTest("1484", "OSDL #1484 - on_fail=stop"), SchedulerTest("1494", "OSDL #1494 - Clone stability"), SchedulerTest("unrunnable-1", "Unrunnable"), SchedulerTest("unrunnable-2", "Unrunnable 2"), SchedulerTest("stonith-0", "Stonith loop - 1"), SchedulerTest("stonith-1", "Stonith loop - 2"), SchedulerTest("stonith-2", "Stonith loop - 3"), SchedulerTest("stonith-3", "Stonith startup"), SchedulerTest("stonith-4", "Stonith node state"), SchedulerTest("dc-fence-ordering", "DC needs fencing while other nodes are shutting down"), SchedulerTest("bug-1572-1", "Recovery of groups depending on promotable role"), SchedulerTest("bug-1572-2", "Recovery of groups depending on promotable role when promoted is not re-promoted"), SchedulerTest("bug-1685", "Depends-on-promoted ordering"), SchedulerTest("bug-1822", "Don't promote partially active groups"), SchedulerTest("bug-pm-11", "New resource added to a m/s group"), SchedulerTest("bug-pm-12", "Recover only the failed portion of a cloned group"), SchedulerTest("bug-n-387749", "Don't shuffle clone instances"), SchedulerTest("bug-n-385265", "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped"), SchedulerTest("bug-n-385265-2", "Ensure groups are migrated instead of remaining partially active on the current node"), SchedulerTest("bug-lf-1920", "Correctly handle probes that find active resources"), SchedulerTest("bnc-515172", "Location constraint with multiple expressions"), SchedulerTest("colocate-primitive-with-clone", "Optional colocation with a clone"), SchedulerTest("use-after-free-merge", "Use-after-free in native_merge_weights"), SchedulerTest("bug-lf-2551", "STONITH ordering for stop"), SchedulerTest("bug-lf-2606", "Stonith implies demote"), SchedulerTest("bug-lf-2474", "Ensure resource op timeout takes precedence over op_defaults"), SchedulerTest("bug-suse-707150", "Prevent vm-01 from starting due to colocation/ordering"), SchedulerTest("bug-5014-A-start-B-start", "Verify when A starts B starts using symmetrical=false"), SchedulerTest("bug-5014-A-stop-B-started", "Verify when A stops B does not stop if it has already started using symmetric=false"), SchedulerTest("bug-5014-A-stopped-B-stopped", "Verify when A is stopped and B has not started, B does not start before A using symmetric=false"), SchedulerTest("bug-5014-CthenAthenB-C-stopped", "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts"), SchedulerTest("bug-5014-CLONE-A-start-B-start", "Verify when A starts B starts using clone resources with symmetric=false"), SchedulerTest("bug-5014-CLONE-A-stop-B-started", "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false"), SchedulerTest("bug-5014-GROUP-A-start-B-start", "Verify when A starts B starts when using group resources with symmetric=false"), SchedulerTest("bug-5014-GROUP-A-stopped-B-started", "Verify when A stops B does not stop if it has already started using group resources with symmetric=false"), SchedulerTest("bug-5014-GROUP-A-stopped-B-stopped", "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false"), SchedulerTest("bug-5014-ordered-set-symmetrical-false", "Verify ordered sets work with symmetrical=false"), SchedulerTest("bug-5014-ordered-set-symmetrical-true", "Verify ordered sets work with symmetrical=true"), SchedulerTest("clbz5007-promotable-colocation", "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources"), SchedulerTest("bug-5038", "Prevent restart of anonymous clones when clone-max decreases"), SchedulerTest("bug-5025-1", "Automatically clean up failcount after resource config change with reload"), SchedulerTest("bug-5025-2", "Make sure clear failcount action isn't set when config does not change"), SchedulerTest("bug-5025-3", "Automatically clean up failcount after resource config change with restart"), SchedulerTest("bug-5025-4", "Clear failcount when last failure is a start op and rsc attributes changed"), SchedulerTest("failcount", "Ensure failcounts are correctly expired"), SchedulerTest("failcount-block", "Ensure failcounts are not expired when on-fail=block is present"), SchedulerTest("per-op-failcount", "Ensure per-operation failcount is handled and not passed to fence agent"), SchedulerTest("on-fail-ignore", "Ensure on-fail=ignore works even beyond migration-threshold"), SchedulerTest("monitor-onfail-restart", "bug-5058 - Monitor failure with on-fail set to restart"), SchedulerTest("monitor-onfail-stop", "bug-5058 - Monitor failure wiht on-fail set to stop"), SchedulerTest("bug-5059", "No need to restart p_stateful1:*"), SchedulerTest("bug-5069-op-enabled", "Test on-fail=ignore with failure when monitor is enabled"), SchedulerTest("bug-5069-op-disabled", "Test on-fail-ignore with failure when monitor is disabled"), SchedulerTest("obsolete-lrm-resource", "cl#5115 - Do not use obsolete lrm_resource sections"), SchedulerTest("expire-non-blocked-failure", "Ignore failure-timeout only if the failed operation has on-fail=block"), SchedulerTest("asymmetrical-order-move", "Respect asymmetrical ordering when trying to move resources"), SchedulerTest("asymmetrical-order-restart", "Respect asymmetrical ordering when restarting dependent resource"), SchedulerTest("start-then-stop-with-unfence", "Avoid graph loop with start-then-stop constraint plus unfencing"), SchedulerTest("order-expired-failure", "Order failcount cleanup after remote fencing"), SchedulerTest("expired-stop-1", "Expired stop failure should not block resource"), SchedulerTest("ignore_stonith_rsc_order1", "cl#5056- Ignore order constraint between stonith and non-stonith rsc"), SchedulerTest("ignore_stonith_rsc_order2", "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith"), SchedulerTest("ignore_stonith_rsc_order3", "cl#5056- Ignore order constraint, stonith clone and mixed group"), SchedulerTest("ignore_stonith_rsc_order4", "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group"), SchedulerTest("honor_stonith_rsc_order1", "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)"), SchedulerTest("honor_stonith_rsc_order2", "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)"), SchedulerTest("honor_stonith_rsc_order3", "cl#5056- Honor order constraint, stonith clones with nested pure stonith group"), SchedulerTest("honor_stonith_rsc_order4", "cl#5056- Honor order constraint, between two native stonith rscs"), SchedulerTest("multiply-active-stonith", "Multiply active stonith"), SchedulerTest("probe-timeout", "cl#5099 - Default probe timeout"), SchedulerTest("order-first-probes", "cl#5301 - respect order constraints when relevant resources are being probed"), SchedulerTest("concurrent-fencing", "Allow performing fencing operations in parallel"), SchedulerTest("priority-fencing-delay", "Delay fencing targeting the more significant node"), SchedulerTest("pending-node-no-uname", "Do not fence a pending node that doesn't have an uname in node state yet"), SchedulerTest("node-pending-timeout", "Fence a pending node that has reached `node-pending-timeout`"), ]), SchedulerTestGroup([ SchedulerTest("systemhealth1", "System Health () #1"), SchedulerTest("systemhealth2", "System Health () #2"), SchedulerTest("systemhealth3", "System Health () #3"), SchedulerTest("systemhealthn1", "System Health (None) #1"), SchedulerTest("systemhealthn2", "System Health (None) #2"), SchedulerTest("systemhealthn3", "System Health (None) #3"), SchedulerTest("systemhealthm1", "System Health (Migrate On Red) #1"), SchedulerTest("systemhealthm2", "System Health (Migrate On Red) #2"), SchedulerTest("systemhealthm3", "System Health (Migrate On Red) #3"), SchedulerTest("systemhealtho1", "System Health (Only Green) #1"), SchedulerTest("systemhealtho2", "System Health (Only Green) #2"), SchedulerTest("systemhealtho3", "System Health (Only Green) #3"), SchedulerTest("systemhealthp1", "System Health (Progessive) #1"), SchedulerTest("systemhealthp2", "System Health (Progessive) #2"), SchedulerTest("systemhealthp3", "System Health (Progessive) #3"), SchedulerTest("allow-unhealthy-nodes", "System Health (migrate-on-red + allow-unhealth-nodes)"), ]), SchedulerTestGroup([ SchedulerTest("utilization", "Placement Strategy - utilization"), SchedulerTest("minimal", "Placement Strategy - minimal"), SchedulerTest("balanced", "Placement Strategy - balanced"), ]), SchedulerTestGroup([ SchedulerTest("placement-stickiness", "Optimized Placement Strategy - stickiness"), SchedulerTest("placement-priority", "Optimized Placement Strategy - priority"), SchedulerTest("placement-location", "Optimized Placement Strategy - location"), SchedulerTest("placement-capacity", "Optimized Placement Strategy - capacity"), ]), SchedulerTestGroup([ SchedulerTest("utilization-order1", "Utilization Order - Simple"), SchedulerTest("utilization-order2", "Utilization Order - Complex"), SchedulerTest("utilization-order3", "Utilization Order - Migrate"), SchedulerTest("utilization-order4", "Utilization Order - Live Migration (bnc#695440)"), SchedulerTest("utilization-complex", "Utilization with complex relationships"), SchedulerTest("utilization-shuffle", "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3"), SchedulerTest("load-stopped-loop", "Avoid transition loop due to load_stopped (cl#5044)"), SchedulerTest("load-stopped-loop-2", "cl#5235 - Prevent graph loops that can be introduced by load_stopped -> migrate_to ordering"), ]), SchedulerTestGroup([ SchedulerTest("colocated-utilization-primitive-1", "Colocated Utilization - Primitive"), SchedulerTest("colocated-utilization-primitive-2", "Colocated Utilization - Choose the most capable node"), SchedulerTest("colocated-utilization-group", "Colocated Utilization - Group"), SchedulerTest("colocated-utilization-clone", "Colocated Utilization - Clone"), SchedulerTest("utilization-check-allowed-nodes", "Only check the capacities of the nodes that can run the resource"), ]), SchedulerTestGroup([ SchedulerTest("node-maintenance-1", "cl#5128 - Node maintenance"), SchedulerTest("node-maintenance-2", "cl#5128 - Node maintenance (coming out of maintenance mode)"), SchedulerTest("shutdown-maintenance-node", "Do not fence a maintenance node if it shuts down cleanly"), SchedulerTest("rsc-maintenance", "Per-resource maintenance"), ]), SchedulerTestGroup([ SchedulerTest("not-installed-agent", "The resource agent is missing"), SchedulerTest("not-installed-tools", "Something the resource agent needs is missing"), ]), SchedulerTestGroup([ SchedulerTest("stopped-monitor-00", "Stopped Monitor - initial start"), SchedulerTest("stopped-monitor-01", "Stopped Monitor - failed started"), SchedulerTest("stopped-monitor-02", "Stopped Monitor - started multi-up"), SchedulerTest("stopped-monitor-03", "Stopped Monitor - stop started"), SchedulerTest("stopped-monitor-04", "Stopped Monitor - failed stop"), SchedulerTest("stopped-monitor-05", "Stopped Monitor - start unmanaged"), SchedulerTest("stopped-monitor-06", "Stopped Monitor - unmanaged multi-up"), SchedulerTest("stopped-monitor-07", "Stopped Monitor - start unmanaged multi-up"), SchedulerTest("stopped-monitor-08", "Stopped Monitor - migrate"), SchedulerTest("stopped-monitor-09", "Stopped Monitor - unmanage started"), SchedulerTest("stopped-monitor-10", "Stopped Monitor - unmanaged started multi-up"), SchedulerTest("stopped-monitor-11", "Stopped Monitor - stop unmanaged started"), SchedulerTest("stopped-monitor-12", "Stopped Monitor - unmanaged started multi-up (target-role=Stopped)"), SchedulerTest("stopped-monitor-20", "Stopped Monitor - initial stop"), SchedulerTest("stopped-monitor-21", "Stopped Monitor - stopped single-up"), SchedulerTest("stopped-monitor-22", "Stopped Monitor - stopped multi-up"), SchedulerTest("stopped-monitor-23", "Stopped Monitor - start stopped"), SchedulerTest("stopped-monitor-24", "Stopped Monitor - unmanage stopped"), SchedulerTest("stopped-monitor-25", "Stopped Monitor - unmanaged stopped multi-up"), SchedulerTest("stopped-monitor-26", "Stopped Monitor - start unmanaged stopped"), SchedulerTest("stopped-monitor-27", "Stopped Monitor - unmanaged stopped multi-up (target-role=Started)"), SchedulerTest("stopped-monitor-30", "Stopped Monitor - new node started"), SchedulerTest("stopped-monitor-31", "Stopped Monitor - new node stopped"), ]), SchedulerTestGroup([ # This is a combo test to check: # - probe timeout defaults to the minimum-interval monitor's # - duplicate recurring operations are ignored # - if timeout spec is bad, the default timeout is used # - failure is blocked with on-fail=block even if ISO8601 interval is specified # - started/stopped role monitors are started/stopped on right nodes SchedulerTest("intervals", "Recurring monitor interval handling"), ]), SchedulerTestGroup([ SchedulerTest("ticket-primitive-1", "Ticket - Primitive (loss-policy=stop, initial)"), SchedulerTest("ticket-primitive-2", "Ticket - Primitive (loss-policy=stop, granted)"), SchedulerTest("ticket-primitive-3", "Ticket - Primitive (loss-policy-stop, revoked)"), SchedulerTest("ticket-primitive-4", "Ticket - Primitive (loss-policy=demote, initial)"), SchedulerTest("ticket-primitive-5", "Ticket - Primitive (loss-policy=demote, granted)"), SchedulerTest("ticket-primitive-6", "Ticket - Primitive (loss-policy=demote, revoked)"), SchedulerTest("ticket-primitive-7", "Ticket - Primitive (loss-policy=fence, initial)"), SchedulerTest("ticket-primitive-8", "Ticket - Primitive (loss-policy=fence, granted)"), SchedulerTest("ticket-primitive-9", "Ticket - Primitive (loss-policy=fence, revoked)"), SchedulerTest("ticket-primitive-10", "Ticket - Primitive (loss-policy=freeze, initial)"), SchedulerTest("ticket-primitive-11", "Ticket - Primitive (loss-policy=freeze, granted)"), SchedulerTest("ticket-primitive-12", "Ticket - Primitive (loss-policy=freeze, revoked)"), SchedulerTest("ticket-primitive-13", "Ticket - Primitive (loss-policy=stop, standby, granted)"), SchedulerTest("ticket-primitive-14", "Ticket - Primitive (loss-policy=stop, granted, standby)"), SchedulerTest("ticket-primitive-15", "Ticket - Primitive (loss-policy=stop, standby, revoked)"), SchedulerTest("ticket-primitive-16", "Ticket - Primitive (loss-policy=demote, standby, granted)"), SchedulerTest("ticket-primitive-17", "Ticket - Primitive (loss-policy=demote, granted, standby)"), SchedulerTest("ticket-primitive-18", "Ticket - Primitive (loss-policy=demote, standby, revoked)"), SchedulerTest("ticket-primitive-19", "Ticket - Primitive (loss-policy=fence, standby, granted)"), SchedulerTest("ticket-primitive-20", "Ticket - Primitive (loss-policy=fence, granted, standby)"), SchedulerTest("ticket-primitive-21", "Ticket - Primitive (loss-policy=fence, standby, revoked)"), SchedulerTest("ticket-primitive-22", "Ticket - Primitive (loss-policy=freeze, standby, granted)"), SchedulerTest("ticket-primitive-23", "Ticket - Primitive (loss-policy=freeze, granted, standby)"), SchedulerTest("ticket-primitive-24", "Ticket - Primitive (loss-policy=freeze, standby, revoked)"), ]), SchedulerTestGroup([ SchedulerTest("ticket-group-1", "Ticket - Group (loss-policy=stop, initial)"), SchedulerTest("ticket-group-2", "Ticket - Group (loss-policy=stop, granted)"), SchedulerTest("ticket-group-3", "Ticket - Group (loss-policy-stop, revoked)"), SchedulerTest("ticket-group-4", "Ticket - Group (loss-policy=demote, initial)"), SchedulerTest("ticket-group-5", "Ticket - Group (loss-policy=demote, granted)"), SchedulerTest("ticket-group-6", "Ticket - Group (loss-policy=demote, revoked)"), SchedulerTest("ticket-group-7", "Ticket - Group (loss-policy=fence, initial)"), SchedulerTest("ticket-group-8", "Ticket - Group (loss-policy=fence, granted)"), SchedulerTest("ticket-group-9", "Ticket - Group (loss-policy=fence, revoked)"), SchedulerTest("ticket-group-10", "Ticket - Group (loss-policy=freeze, initial)"), SchedulerTest("ticket-group-11", "Ticket - Group (loss-policy=freeze, granted)"), SchedulerTest("ticket-group-12", "Ticket - Group (loss-policy=freeze, revoked)"), SchedulerTest("ticket-group-13", "Ticket - Group (loss-policy=stop, standby, granted)"), SchedulerTest("ticket-group-14", "Ticket - Group (loss-policy=stop, granted, standby)"), SchedulerTest("ticket-group-15", "Ticket - Group (loss-policy=stop, standby, revoked)"), SchedulerTest("ticket-group-16", "Ticket - Group (loss-policy=demote, standby, granted)"), SchedulerTest("ticket-group-17", "Ticket - Group (loss-policy=demote, granted, standby)"), SchedulerTest("ticket-group-18", "Ticket - Group (loss-policy=demote, standby, revoked)"), SchedulerTest("ticket-group-19", "Ticket - Group (loss-policy=fence, standby, granted)"), SchedulerTest("ticket-group-20", "Ticket - Group (loss-policy=fence, granted, standby)"), SchedulerTest("ticket-group-21", "Ticket - Group (loss-policy=fence, standby, revoked)"), SchedulerTest("ticket-group-22", "Ticket - Group (loss-policy=freeze, standby, granted)"), SchedulerTest("ticket-group-23", "Ticket - Group (loss-policy=freeze, granted, standby)"), SchedulerTest("ticket-group-24", "Ticket - Group (loss-policy=freeze, standby, revoked)"), ]), SchedulerTestGroup([ SchedulerTest("ticket-clone-1", "Ticket - Clone (loss-policy=stop, initial)"), SchedulerTest("ticket-clone-2", "Ticket - Clone (loss-policy=stop, granted)"), SchedulerTest("ticket-clone-3", "Ticket - Clone (loss-policy-stop, revoked)"), SchedulerTest("ticket-clone-4", "Ticket - Clone (loss-policy=demote, initial)"), SchedulerTest("ticket-clone-5", "Ticket - Clone (loss-policy=demote, granted)"), SchedulerTest("ticket-clone-6", "Ticket - Clone (loss-policy=demote, revoked)"), SchedulerTest("ticket-clone-7", "Ticket - Clone (loss-policy=fence, initial)"), SchedulerTest("ticket-clone-8", "Ticket - Clone (loss-policy=fence, granted)"), SchedulerTest("ticket-clone-9", "Ticket - Clone (loss-policy=fence, revoked)"), SchedulerTest("ticket-clone-10", "Ticket - Clone (loss-policy=freeze, initial)"), SchedulerTest("ticket-clone-11", "Ticket - Clone (loss-policy=freeze, granted)"), SchedulerTest("ticket-clone-12", "Ticket - Clone (loss-policy=freeze, revoked)"), SchedulerTest("ticket-clone-13", "Ticket - Clone (loss-policy=stop, standby, granted)"), SchedulerTest("ticket-clone-14", "Ticket - Clone (loss-policy=stop, granted, standby)"), SchedulerTest("ticket-clone-15", "Ticket - Clone (loss-policy=stop, standby, revoked)"), SchedulerTest("ticket-clone-16", "Ticket - Clone (loss-policy=demote, standby, granted)"), SchedulerTest("ticket-clone-17", "Ticket - Clone (loss-policy=demote, granted, standby)"), SchedulerTest("ticket-clone-18", "Ticket - Clone (loss-policy=demote, standby, revoked)"), SchedulerTest("ticket-clone-19", "Ticket - Clone (loss-policy=fence, standby, granted)"), SchedulerTest("ticket-clone-20", "Ticket - Clone (loss-policy=fence, granted, standby)"), SchedulerTest("ticket-clone-21", "Ticket - Clone (loss-policy=fence, standby, revoked)"), SchedulerTest("ticket-clone-22", "Ticket - Clone (loss-policy=freeze, standby, granted)"), SchedulerTest("ticket-clone-23", "Ticket - Clone (loss-policy=freeze, granted, standby)"), SchedulerTest("ticket-clone-24", "Ticket - Clone (loss-policy=freeze, standby, revoked)"), ]), SchedulerTestGroup([ SchedulerTest("ticket-promoted-1", "Ticket - Promoted (loss-policy=stop, initial)"), SchedulerTest("ticket-promoted-2", "Ticket - Promoted (loss-policy=stop, granted)"), SchedulerTest("ticket-promoted-3", "Ticket - Promoted (loss-policy-stop, revoked)"), SchedulerTest("ticket-promoted-4", "Ticket - Promoted (loss-policy=demote, initial)"), SchedulerTest("ticket-promoted-5", "Ticket - Promoted (loss-policy=demote, granted)"), SchedulerTest("ticket-promoted-6", "Ticket - Promoted (loss-policy=demote, revoked)"), SchedulerTest("ticket-promoted-7", "Ticket - Promoted (loss-policy=fence, initial)"), SchedulerTest("ticket-promoted-8", "Ticket - Promoted (loss-policy=fence, granted)"), SchedulerTest("ticket-promoted-9", "Ticket - Promoted (loss-policy=fence, revoked)"), SchedulerTest("ticket-promoted-10", "Ticket - Promoted (loss-policy=freeze, initial)"), SchedulerTest("ticket-promoted-11", "Ticket - Promoted (loss-policy=freeze, granted)"), SchedulerTest("ticket-promoted-12", "Ticket - Promoted (loss-policy=freeze, revoked)"), SchedulerTest("ticket-promoted-13", "Ticket - Promoted (loss-policy=stop, standby, granted)"), SchedulerTest("ticket-promoted-14", "Ticket - Promoted (loss-policy=stop, granted, standby)"), SchedulerTest("ticket-promoted-15", "Ticket - Promoted (loss-policy=stop, standby, revoked)"), SchedulerTest("ticket-promoted-16", "Ticket - Promoted (loss-policy=demote, standby, granted)"), SchedulerTest("ticket-promoted-17", "Ticket - Promoted (loss-policy=demote, granted, standby)"), SchedulerTest("ticket-promoted-18", "Ticket - Promoted (loss-policy=demote, standby, revoked)"), SchedulerTest("ticket-promoted-19", "Ticket - Promoted (loss-policy=fence, standby, granted)"), SchedulerTest("ticket-promoted-20", "Ticket - Promoted (loss-policy=fence, granted, standby)"), SchedulerTest("ticket-promoted-21", "Ticket - Promoted (loss-policy=fence, standby, revoked)"), SchedulerTest("ticket-promoted-22", "Ticket - Promoted (loss-policy=freeze, standby, granted)"), SchedulerTest("ticket-promoted-23", "Ticket - Promoted (loss-policy=freeze, granted, standby)"), SchedulerTest("ticket-promoted-24", "Ticket - Promoted (loss-policy=freeze, standby, revoked)"), ]), SchedulerTestGroup([ SchedulerTest("ticket-rsc-sets-1", "Ticket - Resource sets (1 ticket, initial)"), SchedulerTest("ticket-rsc-sets-2", "Ticket - Resource sets (1 ticket, granted)"), SchedulerTest("ticket-rsc-sets-3", "Ticket - Resource sets (1 ticket, revoked)"), SchedulerTest("ticket-rsc-sets-4", "Ticket - Resource sets (2 tickets, initial)"), SchedulerTest("ticket-rsc-sets-5", "Ticket - Resource sets (2 tickets, granted)"), SchedulerTest("ticket-rsc-sets-6", "Ticket - Resource sets (2 tickets, granted)"), SchedulerTest("ticket-rsc-sets-7", "Ticket - Resource sets (2 tickets, revoked)"), SchedulerTest("ticket-rsc-sets-8", "Ticket - Resource sets (1 ticket, standby, granted)"), SchedulerTest("ticket-rsc-sets-9", "Ticket - Resource sets (1 ticket, granted, standby)"), SchedulerTest("ticket-rsc-sets-10", "Ticket - Resource sets (1 ticket, standby, revoked)"), SchedulerTest("ticket-rsc-sets-11", "Ticket - Resource sets (2 tickets, standby, granted)"), SchedulerTest("ticket-rsc-sets-12", "Ticket - Resource sets (2 tickets, standby, granted)"), SchedulerTest("ticket-rsc-sets-13", "Ticket - Resource sets (2 tickets, granted, standby)"), SchedulerTest("ticket-rsc-sets-14", "Ticket - Resource sets (2 tickets, standby, revoked)"), SchedulerTest("cluster-specific-params", "Cluster-specific instance attributes based on rules"), SchedulerTest("site-specific-params", "Site-specific instance attributes based on rules"), ]), SchedulerTestGroup([ SchedulerTest("template-1", "Template - 1"), SchedulerTest("template-2", "Template - 2"), SchedulerTest("template-3", "Template - 3 (merge operations)"), SchedulerTest("template-coloc-1", "Template - Colocation 1"), SchedulerTest("template-coloc-2", "Template - Colocation 2"), SchedulerTest("template-coloc-3", "Template - Colocation 3"), SchedulerTest("template-order-1", "Template - Order 1"), SchedulerTest("template-order-2", "Template - Order 2"), SchedulerTest("template-order-3", "Template - Order 3"), SchedulerTest("template-ticket", "Template - Ticket"), SchedulerTest("template-rsc-sets-1", "Template - Resource Sets 1"), SchedulerTest("template-rsc-sets-2", "Template - Resource Sets 2"), SchedulerTest("template-rsc-sets-3", "Template - Resource Sets 3"), SchedulerTest("template-rsc-sets-4", "Template - Resource Sets 4"), SchedulerTest("template-clone-primitive", "Cloned primitive from template"), SchedulerTest("template-clone-group", "Cloned group from template"), SchedulerTest("location-sets-templates", "Resource sets and templates - Location"), SchedulerTest("tags-coloc-order-1", "Tags - Colocation and Order (Simple)"), SchedulerTest("tags-coloc-order-2", "Tags - Colocation and Order (Resource Sets with Templates)"), SchedulerTest("tags-location", "Tags - Location"), SchedulerTest("tags-ticket", "Tags - Ticket"), ]), SchedulerTestGroup([ SchedulerTest("container-1", "Container - initial"), SchedulerTest("container-2", "Container - monitor failed"), SchedulerTest("container-3", "Container - stop failed"), SchedulerTest("container-4", "Container - reached migration-threshold"), SchedulerTest("container-group-1", "Container in group - initial"), SchedulerTest("container-group-2", "Container in group - monitor failed"), SchedulerTest("container-group-3", "Container in group - stop failed"), SchedulerTest("container-group-4", "Container in group - reached migration-threshold"), SchedulerTest("container-is-remote-node", "Place resource within container when container is remote-node"), SchedulerTest("bug-rh-1097457", "Kill user defined container/contents ordering"), SchedulerTest("bug-cl-5247", "Graph loop when recovering m/s resource in a container"), SchedulerTest("bundle-order-startup", "Bundle startup ordering"), SchedulerTest("bundle-order-partial-start", "Bundle startup ordering when some dependencies are already running"), SchedulerTest("bundle-order-partial-start-2", "Bundle startup ordering when some dependencies and the container are already running"), SchedulerTest("bundle-order-stop", "Bundle stop ordering"), SchedulerTest("bundle-order-partial-stop", "Bundle startup ordering when some dependencies are already stopped"), SchedulerTest("bundle-order-stop-on-remote", "Stop nested resource after bringing up the connection"), SchedulerTest("bundle-order-startup-clone", "Prevent startup because bundle isn't promoted"), SchedulerTest("bundle-order-startup-clone-2", "Bundle startup with clones"), SchedulerTest("bundle-order-stop-clone", "Stop bundle because clone is stopping"), SchedulerTest("bundle-interleave-start", "Interleave bundle starts"), SchedulerTest("bundle-interleave-promote", "Interleave bundle promotes"), SchedulerTest("bundle-nested-colocation", "Colocation of nested connection resources"), SchedulerTest("bundle-order-fencing", "Order pseudo bundle fencing after parent node fencing if both are happening"), SchedulerTest("bundle-probe-order-1", "order 1"), SchedulerTest("bundle-probe-order-2", "order 2"), SchedulerTest("bundle-probe-order-3", "order 3"), SchedulerTest("bundle-probe-remotes", "Ensure remotes get probed too"), SchedulerTest("bundle-replicas-change", "Change bundle from 1 replica to multiple"), SchedulerTest("bundle-connection-with-container", "Don't move a container due to connection preferences"), SchedulerTest("nested-remote-recovery", "Recover bundle's container hosted on remote node"), SchedulerTest("bundle-promoted-location-1", "Promotable bundle, positive location"), SchedulerTest("bundle-promoted-location-2", "Promotable bundle, negative location"), SchedulerTest("bundle-promoted-location-3", "Promotable bundle, positive location for promoted role"), SchedulerTest("bundle-promoted-location-4", "Promotable bundle, negative location for promoted role"), SchedulerTest("bundle-promoted-location-5", "Promotable bundle, positive location for unpromoted role"), SchedulerTest("bundle-promoted-location-6", "Promotable bundle, negative location for unpromoted role"), SchedulerTest("bundle-promoted-colocation-1", "Primary promoted bundle, dependent primitive (mandatory coloc)"), SchedulerTest("bundle-promoted-colocation-2", "Primary promoted bundle, dependent primitive (optional coloc)"), SchedulerTest("bundle-promoted-colocation-3", "Dependent promoted bundle, primary primitive (mandatory coloc)"), SchedulerTest("bundle-promoted-colocation-4", "Dependent promoted bundle, primary primitive (optional coloc)"), SchedulerTest("bundle-promoted-colocation-5", "Primary and dependent promoted bundle instances (mandatory coloc)"), SchedulerTest("bundle-promoted-colocation-6", "Primary and dependent promoted bundle instances (optional coloc)"), SchedulerTest("bundle-promoted-anticolocation-1", "Primary promoted bundle, dependent primitive (mandatory anti)"), SchedulerTest("bundle-promoted-anticolocation-2", "Primary promoted bundle, dependent primitive (optional anti)"), SchedulerTest("bundle-promoted-anticolocation-3", "Dependent promoted bundle, primary primitive (mandatory anti)"), SchedulerTest("bundle-promoted-anticolocation-4", "Dependent promoted bundle, primary primitive (optional anti)"), SchedulerTest("bundle-promoted-anticolocation-5", "Primary and dependent promoted bundle instances (mandatory anti)"), SchedulerTest("bundle-promoted-anticolocation-6", "Primary and dependent promoted bundle instances (optional anti)"), ]), SchedulerTestGroup([ SchedulerTest("whitebox-fail1", "Fail whitebox container rsc"), SchedulerTest("whitebox-fail2", "Fail cluster connection to guest node"), SchedulerTest("whitebox-fail3", "Failed containers should not run nested on remote nodes"), SchedulerTest("whitebox-start", "Start whitebox container with resources assigned to it"), SchedulerTest("whitebox-stop", "Stop whitebox container with resources assigned to it"), SchedulerTest("whitebox-move", "Move whitebox container with resources assigned to it"), SchedulerTest("whitebox-asymmetric", "Verify connection rsc opts-in based on container resource"), SchedulerTest("whitebox-ms-ordering", "Verify promote/demote can not occur before connection is established"), SchedulerTest("whitebox-ms-ordering-move", "Stop/Start cycle within a moving container"), SchedulerTest("whitebox-orphaned", "Properly shutdown orphaned whitebox container"), SchedulerTest("whitebox-orphan-ms", "Properly tear down orphan ms resources on remote-nodes"), SchedulerTest("whitebox-unexpectedly-running", "Recover container nodes the cluster did not start"), SchedulerTest("whitebox-migrate1", "Migrate both container and connection resource"), SchedulerTest("whitebox-imply-stop-on-fence", "imply stop action on container node rsc when host node is fenced"), SchedulerTest("whitebox-nested-group", "Verify guest remote-node works nested in a group"), SchedulerTest("guest-node-host-dies", "Verify guest node is recovered if host goes away"), SchedulerTest("guest-node-cleanup", "Order guest node connection recovery after container probe"), SchedulerTest("guest-host-not-fenceable", "Actions on guest node are unrunnable if host is unclean and cannot be fenced"), ]), SchedulerTestGroup([ SchedulerTest("remote-startup-probes", "Baremetal remote-node startup probes"), SchedulerTest("remote-startup", "Startup a newly discovered remote-nodes with no status"), SchedulerTest("remote-fence-unclean", "Fence unclean baremetal remote-node"), SchedulerTest("remote-fence-unclean2", "Fence baremetal remote-node after cluster node fails and connection can not be recovered"), SchedulerTest("remote-fence-unclean-3", "Probe failed remote nodes (triggers fencing)"), SchedulerTest("remote-move", "Move remote-node connection resource"), SchedulerTest("remote-disable", "Disable a baremetal remote-node"), SchedulerTest("remote-probe-disable", "Probe then stop a baremetal remote-node"), SchedulerTest("remote-orphaned", "Properly shutdown orphaned connection resource"), SchedulerTest("remote-orphaned2", "verify we can handle orphaned remote connections with active resources on the remote"), SchedulerTest("remote-recover", "Recover connection resource after cluster-node fails"), SchedulerTest("remote-stale-node-entry", "Make sure we properly handle leftover remote-node entries in the node section"), SchedulerTest("remote-partial-migrate", "Make sure partial migrations are handled before ops on the remote node"), SchedulerTest("remote-partial-migrate2", "Make sure partial migration target is prefered for remote connection"), SchedulerTest("remote-recover-fail", "Make sure start failure causes fencing if rsc are active on remote"), SchedulerTest("remote-start-fail", "Make sure a start failure does not result in fencing if no active resources are on remote"), SchedulerTest("remote-unclean2", "Make monitor failure always results in fencing, even if no rsc are active on remote"), SchedulerTest("remote-fence-before-reconnect", "Fence before clearing recurring monitor failure"), SchedulerTest("remote-recovery", "Recover remote connections before attempting demotion"), SchedulerTest("remote-recover-connection", "Optimistically recovery of only the connection"), SchedulerTest("remote-recover-all", "Fencing when the connection has no home"), SchedulerTest("remote-recover-no-resources", "Fencing when the connection has no home and no active resources"), SchedulerTest("remote-recover-unknown", "Fencing when the connection has no home and the remote has no operation history"), SchedulerTest("remote-reconnect-delay", "Waiting for remote reconnect interval to expire"), SchedulerTest("remote-connection-unrecoverable", "Remote connection host must be fenced, with connection unrecoverable"), SchedulerTest("remote-connection-shutdown", "Remote connection shutdown"), SchedulerTest("cancel-behind-moving-remote", "Route recurring monitor cancellations through original node of a moving remote connection"), ]), SchedulerTestGroup([ SchedulerTest("resource-discovery", "Exercises resource-discovery location constraint option"), SchedulerTest("rsc-discovery-per-node", "Disable resource discovery per node"), SchedulerTest("shutdown-lock", "Ensure shutdown lock works properly"), SchedulerTest("shutdown-lock-expiration", "Ensure shutdown lock expiration works properly"), ]), SchedulerTestGroup([ SchedulerTest("op-defaults", "Test op_defaults conditional expressions"), SchedulerTest("op-defaults-2", "Test op_defaults AND'ed conditional expressions"), SchedulerTest("op-defaults-3", "Test op_defaults precedence"), SchedulerTest("rsc-defaults", "Test rsc_defaults conditional expressions"), SchedulerTest("rsc-defaults-2", "Test rsc_defaults conditional expressions without type"), ]), SchedulerTestGroup([ SchedulerTest("stop-all-resources", "Test stop-all-resources=true"), ]), SchedulerTestGroup([ SchedulerTest("ocf_degraded-remap-ocf_ok", "Test degraded remapped to OK"), SchedulerTest("ocf_degraded_promoted-remap-ocf_ok", "Test degraded promoted remapped to OK"), ]), ] TESTS_64BIT = [ SchedulerTestGroup([ SchedulerTest("year-2038", "Check handling of timestamps beyond 2038-01-19 03:14:08 UTC"), ]), ] def is_executable(path): """Check whether a file at a given path is executable.""" try: return os.stat(path)[stat.ST_MODE] & stat.S_IXUSR except OSError: return False def diff(file1, file2, **kwargs): """Call diff on two files.""" return subprocess.call(["diff", "-u", "-N", "--ignore-all-space", "--ignore-blank-lines", file1, file2], **kwargs) def sort_file(filename): """Sort a file alphabetically.""" with io.open(filename, "rt", encoding="utf-8") as f: lines = sorted(f) with io.open(filename, "wt", encoding="utf-8") as f: f.writelines(lines) def remove_files(filenames): """Remove a list of files.""" for filename in filenames: try: os.remove(filename) except OSError: pass def normalize(filename): """Remove text from a file that isn't important for comparison.""" if not hasattr(normalize, "patterns"): normalize.patterns = [ re.compile(r'crm_feature_set="[^"]*"'), re.compile(r'batch-limit="[0-9]*"') ] if not os.path.isfile(filename): return with io.open(filename, "rt", encoding="utf-8") as f: lines = f.readlines() with io.open(filename, "wt", encoding="utf-8") as f: for line in lines: for pattern in normalize.patterns: line = pattern.sub("", line) f.write(line) def cat(filename, dest=sys.stdout): """Copy a file to a destination file descriptor.""" with io.open(filename, "rt", encoding="utf-8") as f: shutil.copyfileobj(f, dest) class CtsScheduler: """Regression tests for Pacemaker's scheduler.""" def _parse_args(self, argv): """Parse command-line arguments.""" parser = argparse.ArgumentParser(description="Regression tests for Pacemaker's scheduler") parser.add_argument('-V', '--verbose', action='count', help='Display any differences from expected output') parser.add_argument('--run', metavar='TEST', help=('Run only single specified test (any further ' 'arguments will be passed to crm_simulate)')) parser.add_argument('--update', action='store_true', help='Update expected results with actual results') parser.add_argument('-b', '--binary', metavar='PATH', help='Specify path to crm_simulate') parser.add_argument('-i', '--io-dir', metavar='PATH', help='Specify path to regression test data directory') parser.add_argument('-o', '--out-dir', metavar='PATH', help='Specify where intermediate and output files should go') parser.add_argument('-v', '--valgrind', action='store_true', help='Run all commands under valgrind') parser.add_argument('--valgrind-dhat', action='store_true', help='Run all commands under valgrind with heap analyzer') parser.add_argument('--valgrind-skip-output', action='store_true', help='If running under valgrind, do not display output') parser.add_argument('--testcmd-options', metavar='OPTIONS', default='', help='Additional options for command under test') # argparse can't handle "everything after --run TEST", so grab that self.single_test_args = [] narg = 0 for arg in argv: narg += 1 if arg == '--run': (argv, self.single_test_args) = (argv[:narg + 1], argv[narg + 1:]) break self.args = parser.parse_args(argv[1:]) def _error(self, s): """Print an error message.""" print(f" * ERROR: {s}") def _failed(self, s): """Print a failure message.""" print(f" * FAILED: {s}") def _get_valgrind_cmd(self): """Return command arguments needed (or not) to run valgrind.""" if self.args.valgrind: os.environ['G_SLICE'] = "always-malloc" return [ "valgrind", "-q", "--gen-suppressions=all", "--time-stamp=yes", "--trace-children=no", "--show-reachable=no", "--leak-check=full", "--num-callers=20", f"--suppressions={self.test_home}/valgrind-pcmk.suppressions" ] if self.args.valgrind_dhat: os.environ['G_SLICE'] = "always-malloc" return [ "valgrind", "--tool=exp-dhat", "--time-stamp=yes", "--trace-children=no", "--show-top-n=100", "--num-callers=4" ] return [] def _get_simulator_cmd(self): """Locate the simulation binary.""" if self.args.binary is None: # pylint: disable=protected-access self.args.binary = f"{BuildOptions._BUILD_DIR}/tools/crm_simulate" if not is_executable(self.args.binary): self.args.binary = f"{BuildOptions.SBIN_DIR}/crm_simulate" if not is_executable(self.args.binary): # @TODO it would be more pythonic to raise an exception self._error(f"Test binary {self.args.binary} not found") sys.exit(ExitStatus.NOT_INSTALLED) return [self.args.binary] + shlex.split(self.args.testcmd_options) def set_schema_env(self): """Ensure schema directory environment variable is set, if possible.""" try: return os.environ['PCMK_schema_directory'] except KeyError: # pylint: disable=protected-access for d in [os.path.join(BuildOptions._BUILD_DIR, "xml"), BuildOptions.SCHEMA_DIR]: if not os.path.isdir(d): continue os.environ['PCMK_schema_directory'] = d return d return None def __init__(self, argv=sys.argv): """Create a new CtsScheduler instance.""" # Ensure all command output is in portable locale for comparison os.environ['LC_ALL'] = "C" self._parse_args(argv) # Where this executable lives self.test_home = os.path.dirname(os.path.realpath(argv[0])) # Where test data resides if self.args.io_dir is None: self.args.io_dir = os.path.join(self.test_home, "scheduler") self.xml_input_dir = os.path.join(self.args.io_dir, "xml") self.expected_dir = os.path.join(self.args.io_dir, "exp") self.dot_expected_dir = os.path.join(self.args.io_dir, "dot") self.scores_dir = os.path.join(self.args.io_dir, "scores") self.summary_dir = os.path.join(self.args.io_dir, "summary") self.stderr_expected_dir = os.path.join(self.args.io_dir, "stderr") # Create a temporary directory to store diff file self.failed_dir = tempfile.mkdtemp(prefix='cts-scheduler_') # Where to store generated files if self.args.out_dir is None: self.args.out_dir = self.args.io_dir self.failed_filename = os.path.join(self.failed_dir, "test-output.diff") else: self.failed_filename = os.path.join(self.args.out_dir, "test-output.diff") os.environ['CIB_shadow_dir'] = self.args.out_dir self.failed_file = None self.outfile_out_dir = os.path.join(self.args.out_dir, "out") self.dot_out_dir = os.path.join(self.args.out_dir, "dot") self.scores_out_dir = os.path.join(self.args.out_dir, "scores") self.summary_out_dir = os.path.join(self.args.out_dir, "summary") self.stderr_out_dir = os.path.join(self.args.out_dir, "stderr") self.valgrind_out_dir = os.path.join(self.args.out_dir, "valgrind") # Single test mode (if requested) try: # User can give test base name or file name of a test input self.args.run = os.path.splitext(os.path.basename(self.args.run))[0] except (AttributeError, TypeError): pass # --run was not specified self.set_schema_env() # Arguments needed (or not) to run commands self.valgrind_args = self._get_valgrind_cmd() self.simulate_args = self._get_simulator_cmd() # Test counters self.num_failed = 0 self.num_tests = 0 # Ensure that the main output directory exists # We don't want to create it with os.makedirs below if not os.path.isdir(self.args.out_dir): self._error("Output directory missing; can't create output files") sys.exit(ExitStatus.CANTCREAT) # Create output subdirectories if they don't exist try: os.makedirs(self.outfile_out_dir, 0o755, True) os.makedirs(self.dot_out_dir, 0o755, True) os.makedirs(self.scores_out_dir, 0o755, True) os.makedirs(self.summary_out_dir, 0o755, True) os.makedirs(self.stderr_out_dir, 0o755, True) if self.valgrind_args: os.makedirs(self.valgrind_out_dir, 0o755, True) except OSError as ex: self._error(f"Unable to create output subdirectory: {ex}") remove_files([ self.outfile_out_dir, self.dot_out_dir, self.scores_out_dir, self.summary_out_dir, self.stderr_out_dir, ]) sys.exit(ExitStatus.CANTCREAT) def _compare_files(self, filename1, filename2): """Add any file differences to failed results.""" if diff(filename1, filename2, stdout=subprocess.DEVNULL) != 0: diff(filename1, filename2, stdout=self.failed_file, stderr=subprocess.DEVNULL) self.failed_file.write("\n") return True return False def _file_missing(self, path): """Return True if path does not exist or is empty.""" return not os.path.isfile(path) or os.path.getsize(path) == 0 def run_one(self, test_name, test_desc, test_args): """Run one scheduler test.""" # pylint: disable=too-many-locals print(f" Test {f'{test_name}:':41} {test_desc}") did_fail = False self.num_tests += 1 # Test inputs input_filename = os.path.join(self.xml_input_dir, f"{test_name}.xml") expected_filename = os.path.join(self.expected_dir, f"{test_name}.exp") dot_expected_filename = os.path.join(self.dot_expected_dir, f"{test_name}.dot") scores_filename = os.path.join(self.scores_dir, f"{test_name}.scores") summary_filename = os.path.join(self.summary_dir, f"{test_name}.summary") stderr_expected_filename = os.path.join(self.stderr_expected_dir, f"{test_name}.stderr") # (Intermediate) test outputs output_filename = os.path.join(self.outfile_out_dir, f"{test_name}.out") dot_output_filename = os.path.join(self.dot_out_dir, f"{test_name}.dot.pe") score_output_filename = os.path.join(self.scores_out_dir, f"{test_name}.scores.pe") summary_output_filename = os.path.join(self.summary_out_dir, f"{test_name}.summary.pe") stderr_output_filename = os.path.join(self.stderr_out_dir, f"{test_name}.stderr.pe") valgrind_output_filename = os.path.join(self.valgrind_out_dir, f"{test_name}.valgrind") # Common arguments for running test test_cmd = [] if self.valgrind_args: test_cmd = self.valgrind_args + [f"--log-file={valgrind_output_filename}"] test_cmd += self.simulate_args # @TODO It would be more pythonic to raise exceptions for errors, # then perhaps it would be nice to make a single-test class # Ensure necessary test inputs exist if not os.path.isfile(input_filename): self._error("No input") self.num_failed += 1 return ExitStatus.NOINPUT if not self.args.update and not os.path.isfile(expected_filename): self._error("no stored output") return ExitStatus.NOINPUT # Run simulation to generate summary output test_cmd_full = test_cmd + ['-x', input_filename, '-S'] + test_args if self.args.run: # Single test mode print(" ".join(test_cmd_full)) with io.open(summary_output_filename, "wt", encoding="utf-8") as f: subprocess.run(test_cmd_full, stdout=f, stderr=subprocess.STDOUT, env=os.environ, check=False) if self.args.run: cat(summary_output_filename) # Re-run simulation to generate dot, graph, and scores test_cmd_full = test_cmd + ['-x', input_filename, '-D', dot_output_filename, '-G', output_filename, '-sSQ'] + test_args with io.open(stderr_output_filename, "wt", encoding="utf-8") as f_stderr, \ io.open(score_output_filename, "wt", encoding="utf-8") as f_score: rc = subprocess.call(test_cmd_full, stdout=f_score, stderr=f_stderr, env=os.environ) # Check for test command failure if rc != ExitStatus.OK: self._failed(f"Test returned: {rc}") did_fail = True print(" ".join(test_cmd_full)) # Check for valgrind errors if self.valgrind_args and not self.args.valgrind_skip_output: if os.path.getsize(valgrind_output_filename) > 0: self._failed("Valgrind reported errors") did_fail = True cat(valgrind_output_filename) else: remove_files([valgrind_output_filename]) # Check for core dump if os.path.isfile("core"): self._failed(f"Core-file detected: core.{test_name}") did_fail = True os.rename("core", f"{self.test_home}/core.{test_name}") # Check any stderr output if os.path.isfile(stderr_expected_filename): if self._compare_files(stderr_expected_filename, stderr_output_filename): self._failed("stderr changed") did_fail = True elif os.path.getsize(stderr_output_filename) > 0: self._failed("Output was written to stderr") did_fail = True cat(stderr_output_filename) remove_files([stderr_output_filename]) # Check whether output graph exists, and normalize it if self._file_missing(output_filename): self._error("No graph produced") did_fail = True self.num_failed += 1 remove_files([output_filename]) return ExitStatus.ERROR normalize(output_filename) # Check whether dot output exists, and sort it if self._file_missing(dot_output_filename): self._error("No dot-file summary produced") did_fail = True self.num_failed += 1 remove_files([dot_output_filename, output_filename]) return ExitStatus.ERROR with io.open(dot_output_filename, "rt", encoding="utf-8") as f: first_line = f.readline() # "digraph" line with opening brace lines = f.readlines() last_line = lines[-1] # closing brace del lines[-1] lines = sorted(set(lines)) # unique sort with io.open(dot_output_filename, "wt", encoding="utf-8") as f: f.write(first_line) f.writelines(lines) f.write(last_line) # Check whether score output exists, and sort it if self._file_missing(score_output_filename): self._error("No allocation scores produced") did_fail = True self.num_failed += 1 remove_files([score_output_filename, output_filename]) return ExitStatus.ERROR sort_file(score_output_filename) if self.args.update: shutil.copyfile(output_filename, expected_filename) shutil.copyfile(dot_output_filename, dot_expected_filename) shutil.copyfile(score_output_filename, scores_filename) shutil.copyfile(summary_output_filename, summary_filename) print(" Updated expected outputs") if self._compare_files(summary_filename, summary_output_filename): self._failed("summary changed") did_fail = True if self._compare_files(dot_expected_filename, dot_output_filename): self._failed("dot-file summary changed") did_fail = True else: remove_files([dot_output_filename]) if self._compare_files(expected_filename, output_filename): self._failed("xml-file changed") did_fail = True if self._compare_files(scores_filename, score_output_filename): self._failed("scores-file changed") did_fail = True remove_files([output_filename, dot_output_filename, score_output_filename, summary_output_filename]) if did_fail: self.num_failed += 1 return ExitStatus.ERROR return ExitStatus.OK def run_all(self): """Run all defined tests.""" if platform.architecture()[0] == "64bit": TESTS.extend(TESTS_64BIT) for group in TESTS: for test in group.tests: self.run_one(test.name, test.desc, test.args) print() def _print_summary(self): """Print a summary of parameters for this test run.""" print(f"Test home is:\t{self.test_home}") print(f"Test binary is:\t{self.args.binary}") if 'PCMK_schema_directory' in os.environ: print(f"Schema home is:\t{os.environ['PCMK_schema_directory']}") if self.valgrind_args: print("Activating memory testing with valgrind") print() def _test_results(self): """Report test results.""" if self.num_failed == 0: shutil.rmtree(self.failed_dir) return ExitStatus.OK if self._file_missing(self.failed_filename): self._error(f"{self.num_failed} (of {self.num_tests}) tests failed (no diff results)") if os.path.isfile(self.failed_filename): shutil.rmtree(self.failed_dir) elif self.args.verbose: self._error(f"Results of {self.num_failed} failed tests (out of {self.num_tests}):") cat(self.failed_filename) else: self._error(f"Results of {self.num_failed} failed tests (out of {self.num_tests}) " f"are in {self.failed_filename}") self._error("Use -V to display them after running the tests") return ExitStatus.ERROR def find_test(self, name): """Return the SchedulerTest object with the given name.""" if platform.architecture()[0] == "64bit": TESTS.extend(TESTS_64BIT) for group in TESTS: for test in group.tests: if test.name == name: return test return None def run(self): """Run test(s) as specified.""" # Check for pre-existing core so we don't think it's from us if os.path.exists("core"): self._failed(f"Can't run with core already present in {self.test_home}") return ExitStatus.OSFILE self._print_summary() # Zero out the error log # pylint: disable=consider-using-with self.failed_file = io.open(self.failed_filename, "wt", encoding="utf-8") if self.args.run is None: print(f"Performing the following tests from {self.args.io_dir}") print() self.run_all() print() self.failed_file.close() rc = self._test_results() else: # Find the test we were asked to run test = self.find_test(self.args.run) if test is None: print(f"No test named {self.args.run}") return ExitStatus.INVALID_PARAM # If no arguments were given on the command line, default to the ones # contained in the test if self.single_test_args: args = self.single_test_args else: args = test.args rc = self.run_one(test.name, test.desc, args) self.failed_file.close() if self.num_failed > 0: print(f"\nFailures:\nThese have also been written to: {self.failed_filename}\n") cat(self.failed_filename) shutil.rmtree(self.failed_dir) return rc if __name__ == "__main__": sys.exit(CtsScheduler().run()) -# vim: set filetype=python expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=120: +# vim: set filetype=python: diff --git a/cts/cts-schemas.in b/cts/cts-schemas.in index 5d48731155..72dc88d380 100755 --- a/cts/cts-schemas.in +++ b/cts/cts-schemas.in @@ -1,573 +1,575 @@ #!@BASH_PATH@ # # Copyright 2018-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # Note on portable usage of sed: GNU/POSIX/*BSD sed have a limited subset of # compatible functionality. Do not use the -i option, alternation (\|), # \0, or character sequences such as \n or \s. # Exit immediately if a command fails, with some exceptions (for example, when # part of an if or while condition). Treat unset variables as errors during # expansion. See bash(1) man page for details. set -eu # If readlink supports -e, use it readlink -e / >/dev/null 2>/dev/null if [ $? -eq 0 ]; then test_home=$(dirname "$(readlink -e "$0")") else test_home=$(dirname "$0") fi suites_dir="$test_home/schemas" src_dir=$(dirname "$test_home") if [ -d "$src_dir/xml" ]; then export PCMK_schema_directory="$src_dir/xml" echo "Using local schemas from: $PCMK_schema_directory" else export PCMK_schema_directory=@PCMK_SCHEMA_DIR@ fi DIFF="diff -u" DIFF_PAGER="less -LRX" RNG_VALIDATOR="xmllint --noout --relaxng" XSLT_PROCESSOR="xsltproc --nonet" # Available test suites tests="test2to3 test3to4" # # commons # emit_result() { local howmany=${1:?} # how many errors (0/anything else incl. strings) local subject=${2:?} local prefix=${3-} if [ -n "$prefix" ]; then prefix="$prefix: " fi if [ "$howmany" = "0" ]; then printf "%s%s finished OK\n" "$prefix" "$subject" else printf "%s%s encountered $howmany errors\n" "$prefix" "$subject" fi } emit_error() { local msg=${1:?} printf "%s\n" "$msg" >&2 } # @TODO We can probably drop the log functions. It's unclear why they're needed. # returns 1 + floor of base 2 logarithm if the argument is between 1 and 255, or # 0 the argument is 0 log2_or_0_return() { local i=${1:?} return $(((!(i >> 1) && i) * 1 \ + (!(i >> 2) && i & (1 << 1)) * 2 \ + (!(i >> 3) && i & (1 << 2)) * 3 \ + (!(i >> 4) && i & (1 << 3)) * 4 \ + (!(i >> 5) && i & (1 << 4)) * 5 \ + (!(i >> 6) && i & (1 << 5)) * 6 \ + (!(i >> 7) && i & (1 << 6)) * 7 \ + !!(i >> 7) * 7 )) } # rough addition of two base 2 logarithms log2_or_0_add() { local op1=${1:?} local op2=${2:?} if [ "$op1" -gt "$op2" ]; then return $op1 elif [ "$op2" -gt "$op1" ]; then return $op2 elif [ "$op1" -gt 0 ]; then return $((op1 + 1)) else return $op1 fi } # # test phases # # stdin: input file per line test_cleaner() { local source="" local source_basename="" local source_dir="" local ref_dir="" local ref_err_dir="" while read source; do source_basename=$(basename "$source") source_dir=$(dirname "$source") ref_dir="${source_dir/%xml/ref}" ref_err_dir="${source_dir/%xml/ref.err}" rm -f "$ref_dir/${source_basename%.*}".up* \ "$ref_err_dir/${source_basename%.*}".up.err* done } test_explanation() { local template="" while [ $# -gt 0 ]; do case "$1" in -o=*) template="$PCMK_schema_directory/upgrade-${1#-o=}.xsl";; esac shift done $XSLT_PROCESSOR "$PCMK_schema_directory/upgrade-detail.xsl" "$template" } cleanup_module_error() { # Work around a libxml2 bug. At least as of libxslt-1.1.41 and # libxml2-2.10.4, if the stylesheet contains a user-defined top-level # element (that is, one with a namespace other than the XSL namespace), # libxslt tries to load the namespace URI as an XML module. If this fails, # libxml2 logs a "module error: failed to open ..." message. # # This appears to be fixed in libxml2 v2.13 with commit ecb4c9fb. sed "/module error/d" "$1" > "$1.new" mv -- "$1.new" "$1" } test_runner_upgrade_one() { local source=${1:?} local input=${2:?} local transform=${3:?} local mode=${4:?} # extra modes wrt. "referential" outcome, see below local transform_num="${transform##*-}" transform_num="${transform_num%.xsl}" local source_dir=$(dirname "$source") local ref_dir="${source_dir/%xml/ref}" local ref_err_dir="${source_dir/%xml/ref.err}" local source_basename=$(basename "$source") local ref_basename="${source_basename%.*}.ref-$transform_num" local ref_err_basename="${source_basename%.*}.ref.err-$transform_num" local ref="$ref_dir/$ref_basename" local ref_err="$ref_err_dir/$ref_err_basename" local target="${ref/.ref/.up}" local target_err="${ref_err/.ref.err/.up.err}" local proc_rc=0 local diff_rc=0 local answer="" if ! [ "$((mode & (1 << 0)))" -ne 0 ] && ! [ -f "$ref_err" ]; then ref_err="/dev/null" fi $XSLT_PROCESSOR "$transform" "$input" > "$target" 2> "$target_err" \ || proc_rc=$? cleanup_module_error "$target_err" if [ "$proc_rc" -ne 0 ]; then echo "$target_err" return "$proc_rc" fi if [ "$mode" -ne 0 ]; then if [ "$((mode & (1 << 0)))" -ne 0 ]; then cp -a "$target" "$ref" cp -a "$target_err" "$ref_err" fi if [ "$((mode & (1 << 1)))" -ne 0 ]; then { $DIFF "$input" "$ref" && printf '\n(files match)\n'; } \ | $DIFF_PAGER >&2 if [ $? -ne 0 ]; then printf "\npager failure\n" >&2 return 1 fi printf '\nIs comparison OK? ' >&2 if read answer &2; return 1;; esac else return 1 fi fi elif [ -f "$ref" ] && [ -e "$ref_err" ]; then _output=$(cat "$ref") echo "$_output" | $DIFF - "$target" >&2 || diff_rc=$? if [ "$diff_rc" -eq 0 ]; then $DIFF "$ref_err" "$target_err" >&2 || diff_rc=$? fi if [ "$diff_rc" -ne 0 ]; then emit_error "Outputs differ from referential ones" echo "/dev/null" return 1 fi else emit_error "Referential file(s) missing: $ref" echo "/dev/null" return 1 fi echo "$target" } # stdout: filename of the transformed file test_runner_upgrade() { local template=${1:?} local source=${2:?} # filename local mode=${3:?} # extra modes wrt. "referential" outcome, see below local target="" local transform="" local rc=0 local transforms=$(ls "$PCMK_schema_directory"/upgrade-$template-*.xsl \ | grep -v "upgrade-$template-common" | sort -n) local input=$(mktemp) cp "$source" "$input" for transform in $transforms; do target=$(test_runner_upgrade_one "$source" "$input" "$transform" \ "$mode") rc=$? if [ "$rc" -ne 0 ]; then break; fi cp "$target" "$input" done rm -f "$input" echo "$target" return "$rc" } test_runner_validate() { local schema=${1:?} local target=${2:?} # filename if ! $RNG_VALIDATOR "$schema" "$target" 2>/dev/null; then $RNG_VALIDATOR "$schema" "$target" fi } # -o= ... which conventional version to deem as the transform origin # -t= ... which conventional version to deem as the transform target # -D # -G ... see usage # stdin: input file per line test_runner() { local template="" local schema_o="" local schema_t="" local mode=0 local ret=0 local origin="" local target="" while [ $# -gt 0 ]; do case "$1" in -o=*) template="${1#-o=}" schema_o="$PCMK_schema_directory/pacemaker-${1#-o=}.rng";; -t=*) schema_t="$PCMK_schema_directory/pacemaker-${1#-t=}.rng";; -G) mode=$((mode | (1 << 0)));; -D) mode=$((mode | (1 << 1)));; esac shift done if [ ! -f "${schema_o:?}" ] || [ ! -f "${schema_t:?}" ]; then emit_error "Origin and/or target schema missing, rerun make" return 1 fi while read origin; do printf '%-60s' "$origin... " # pre-validate if ! test_runner_validate "$schema_o" "$origin"; then ret=$((ret + 1)); echo "E:pre-validate"; continue fi # upgrade if ! target=$(test_runner_upgrade "$template" "$origin" "$mode"); then ret=$((ret + 1)); if [ -z "$target" ]; then break fi echo "E:upgrade" if [ -s "$target" ]; then echo --- cat "$target" || : echo --- fi continue fi # post-validate if ! test_runner_validate "$schema_t" "$target"; then ret=$((ret + 1)); echo "E:post-validate"; continue fi echo "OK" echo "$origin" | test_cleaner done log2_or_0_return "$ret" } # # particular test variations # -C # -X # stdin: granular test specification(s) if any # test2to3() { local spec="" local pattern="" while read spec; do spec=${spec%.xml} spec=${spec%\*} pattern="$pattern -name ${spec}*.xml -o" done if [ -n "$pattern" ]; then pattern="( ${pattern%-o} )" fi find "$suites_dir/test-2/xml" -name xml -o -type d -prune \ -o -name '*.xml' $pattern -print \ | env LC_ALL=C sort \ | { case " $* " in *\ -C\ *) test_cleaner;; *\ -X\ *) test_explanation -o=2.10;; *) test_runner -o=2.10 -t=3.0 "$@" || return $?;; esac; } } # # -C # -X # stdin: granular test specification(s) if any # test3to4() { local spec="" local pattern="" while read spec; do spec=${spec%.xml} spec=${spec%\*} pattern="$pattern -name ${spec}*.xml -o" done if [ -n "$pattern" ]; then pattern="( ${pattern%-o} )" fi find "$suites_dir/test-3/xml" -name xml -o -type d -prune \ -o -name '*.xml' $pattern -print \ | env LC_ALL=C sort \ | { case " $* " in *\ -C\ *) test_cleaner;; *\ -X\ *) test_explanation -o=3.10;; *) test_runner -o=3.10 -t=4.0 "$@" || return $?;; esac; } } # # "framework" # # option-likes ... options to be passed down # argument-likes ... drives a test selection test_suite() { local pass="" local select="" local select_full="" local spec="" local _test="" local global_ret=0 local ret=0 local test_spec="" local test_specs="" local test_full="" while [ $# -gt 0 ]; do case "$1" in -) printf '%s\n' 'waiting for tests specified at stdin...'; while read spec; do select="${spec}@$1" done;; -*) pass="$pass $1";; *) select_full="${select_full}@$1" select="${select}@${1%%/*}";; esac shift done # select contains a '@'-delimited list of test suite names from CLI select="${select}@" # select_full contains a '@'-delimited list of test names select_full="${select_full}@" for _test in ${tests}; do while true; do case "$select" in *@${_test}@*) # A known test suite _test was found in the list of # requested test suites select. Strip it out of select. # # The purpose of this seems to be to prevent the later # select_full loop from selecting specific tests from this # suite, if the user also requested the entire suite. test_specs="${select%%@${_test}@*}@${select#*@${_test}@}" if [ "$test_specs" = "@" ]; then select= # nothing left else select="$test_specs" fi continue ;; @) case "$_test" in test*) break;; esac # filter ;; esac if [ -n "$test_specs" ]; then break fi continue 2 # move on to matching with next local test done test_specs= while true; do case "$select_full" in *@${_test}/*) # A test was requested from a known test suite. This does # not mean the requested test actually exists, but rather # that it was requested as the form "/...". # Strip extraneous data from test path test_full="${_test}/${select_full#*@${_test}/}" test_full="${test_full%%@*}" # Strip the requested test out of select_full select_full="${select_full%%@${test_full}@*}"\ "@${select_full#*@${test_full}@}" # Strip the test suite name and slash from the test spec test_specs="$test_specs ${test_full#*/}" ;; *) break ;; esac done # Feed the test specs (if any) as stdin to the respective test suite # function _test() for test_spec in $test_specs; do printf '%s\n' "$test_spec" done | "$_test" $pass || ret=$? if [ "$ret" = 0 ]; then emit_result "$ret" "$_test" else emit_result "at least 2^$((ret - 1))" "$_test" fi log2_or_0_add "$global_ret" "$ret" global_ret=$? done if [ -n "${select#@}" ]; then emit_error "Non-existing test(s):$(echo "${select}" | tr '@' ' ')" log2_or_0_add "$global_ret" 1 || global_ret=$? fi return "$global_ret" } # NOTE: big letters are dedicated for per-test-set behaviour, # small ones for generic/global behaviour usage() { printf \ '%s\n%s\n %s\n %s\n %s\n %s\n %s\n %s\n %s\n %s\n' \ "usage: $0 [-{C,D,G,X}]* \\" \ " [-|{${tests## }}*]" \ "- when no suites (arguments) provided, \"test*\" ones get used" \ "- with '-' suite specification the actual ones grabbed on stdin" \ "- use '-C' to only cleanup ephemeral byproducts" \ "- use '-D' to review originals vs. \"referential\" outcomes" \ "- use '-G' to generate \"referential\" outcomes" \ "- use '-X' to show explanatory details about the upgrade" \ "- test specification can be granular, e.g. 'test2to3/022'" } main() { local pass="" local bailout=0 local ret=0 while [ $# -gt 0 ]; do case "$1" in -h) usage; exit;; -C|-G|-X) bailout=1;; esac pass="$pass $1" shift done test_suite $pass || ret=$? if [ "$bailout" -eq 0 ]; then test_suite -C $pass >/dev/null || true fi if [ "$ret" = 0 ]; then emit_result "$ret" "Overall suite" else emit_result "at least 2^$((ret - 1))" "Overall suite" fi return "$ret" } main "$@" + +# vim: set filetype=sh: diff --git a/cts/cts.in b/cts/cts.in index 24339aac73..20dcb1554f 100755 --- a/cts/cts.in +++ b/cts/cts.in @@ -1,404 +1,406 @@ #!@BASH_PATH@ # # Copyright 2012-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # # e.g. /etc/sysconfig or /etc/default CONFIG_DIR=@CONFIGDIR@ cts_root=`dirname $0` logfile=0 summary=0 verbose=0 watch=0 saved=0 tests="" install=0 clean=0 kill=0 run=0 boot=0 target=rhel-7 cmd="" trace="" custom_log="" patterns="-e CTS:" function sed_in_place_remotely() { cluster-helper -g $cluster_name -- cp -p "\"$1\"" "\"$1.sed\"" \&\& sed -e "\"$2\"" "\"$1\"" \> "\"$1.sed\"" \&\& mv "\"$1.sed\"" "\"$1\"" } helpmsg=$(cat </dev/null if [ $? != 0 ]; then echo $0 needs the cluster-helper script to be in your path exit 1 fi which cluster-clean &>/dev/null if [ $? != 0 ]; then echo $0 needs the cluster-clean script to be in your path exit 1 fi if [ "x$cluster_name" = x ] || [ "x$cluster_name" = xpick ]; then clusters=`ls -1 ~/.dsh/group/[a-z]+[0-9] | sed s/.*group.// | tr '\n' ' ' ` echo "custom) interactively define a cluster" for i in $clusters; do echo "$i) `cluster-helper --list short -g $i`" done read -p "Choose a cluster [custom]: " cluster_name echo fi if [ -z $cluster_name ]; then cluster_name=custom fi case $cluster_name in custom) read -p "Cluster name: " cluster_name read -p "Cluster hosts: " cluster_hosts read -p "Cluster log file: " cluster_log cluster-helper add -g "$cluster_name" -w "$cluster_hosts" ;; *) cluster_hosts=`cluster-helper --list short -g $cluster_name` cluster_log=~/cluster-$cluster_name.log ;; esac # NOTES ABOUT THESE AWESOME REGULAR EXPRESSIONS: # # * We can't assume GNU sed. Unfortunately, + and * are GNU extensions. Thus, # we have to use {1,} for + and {0,} for *. # * You don't need to add an extra set of escaped quotes around the sed expression # arguments here - sed_in_place_remotely will do that for you. # * Only literal quotes need the triple backslashes. All other special characters # are fine with just a single one. # * sed needs a LOT of characters escaped - \, {, }, (, ), and | at least. if [ x$cmd != x ]; then config="${CONFIG_DIR}/pacemaker" case $cmd in trace-ls|tls) cluster-helper -g $cluster_name -- grep "^[[:space:]]*PCMK_trace_functions" $config ;; trace-add|tadd) echo "Adding $trace to PCMK_trace_functions" # Note that this only works if there's already a PCMK_trace_functions line. # If there isn't one, create it with trace-set first. # # Match optional whitespace; then PCMK_trace_functions; then an equals # surrounded by optional whitespace; then an optional quote; then whatever # else (presumably, this is the list of previously traced functions with # an optional trailing quote). Replace the entire line with # PCMK_trace_functions=, sed_in_place_remotely "$config" "s/^[ \t]\{0,\}PCMK_trace_functions[ \t]\{0,\}=[ \t]\{0,\}\(\\\"\{0,1\}\)\(.\{1,\}\)/PCMK_trace_functions=\1$trace,\2/" ;; trace-rm|trm) echo "Removing $trace from PCMK_trace_functions" # A bunch of simple regexes are easier to follow than one giant one. # Look for $trace in the following places on any line containing # PCMK_trace_functions near the beginning: # # (1) At the start of a list - # Match one of a leading quote, or an equals followed by optional # whitespace; then $trace; then a comma. Replace $trace with whatever # came before it. # (2) In the middle of a list - # Match a comma; then $trace; then a comma. Replace $trace with a # single comma. # (3) At the end of a list - # Match a comma; then $trace; then one of a quote, whitespace, or # the EOL. Replace $trace with whatever came after it. # (4) All by itself - # Match one of a leading quote, whitespace, or equals followed by # optional whitespace; then $trace; then one of a trailing quote, # whitespace, or the EOL. Replace $trace with whatever came before # and after it. sed_in_place_remotely "$config" "/^[ \t]\{0,\}PCMK_trace_functions/ { \ s/\(\\\"\|=\|[ \t]\{1,\}\)$trace,/\1/ ; \ s/,$trace,/,/ ; \ s/,$trace\(\\\"\|[ \t]\{1,\}\|$\)/\1/ ; \ s/\(\\\"\|[ \t]\{1,\}\|=[ \t]\{0,\}\)$trace\(\\\"\|[ \t]\{1,\}\|$\)/\1\2/ }" ;; trace-set|tset) echo "Setting PCMK_trace_functions to '$trace'" # Do this in two separate sed commands: # # (1) Unconditionally remove any existing PCMK_trace_functions= lines. # (2) Add a new line with $trace after the example line, which therefore # must exist. Note that GNU sed would support "a PCMK_trace_functions=$trace", # but that's an extension. For all other seds, we have to put the # command and the text on separate lines. sed_in_place_remotely "$config" "/^[ \t]*PCMK_trace_functions/ d ; /^# Example: PCMK_trace_functions/ a\\\ PCMK_trace_functions=\\\"$trace\\\"" ;; esac exit 0 fi if [ $run = 1 ]; then install=1 clean=1 fi if [ $clean = 1 ]; then rm -f $cluster_log cluster-clean -g $cluster_name --kill elif [ $kill = 1 ]; then cluster-clean -g $cluster_name --kill-only exit 0 fi if [ $install = 1 ]; then cluster-helper -g $cluster_name -- yum install -y pacemaker pacemaker-debuginfo pacemaker-cts libqb libqb-debuginfo fi if [ $boot = 1 ]; then $cts_root/cts-lab -r -c -g $cluster_name --boot rc=$? if [ $rc = 0 ]; then echo "The cluster is ready..." fi exit $rc elif [ $run = 1 ]; then $cts_root/cts-lab -r -c -g $cluster_name 500 "$@" exit $? elif [ $clean = 1 ]; then exit 0 fi screen -ls | grep cts-$cluster_name &>/dev/null active=$? if [ ! -z $custom_log ]; then cluster_log=$custom_log fi if [ "x$tests" != x ] && [ "x$tests" != "x " ]; then for t in $tests; do echo "crm_report --cts-log $cluster_log -d -T $t" crm_report --cts-log $cluster_log -d -T $t done elif [ $logfile = 1 ]; then echo $cluster_log elif [ $summary = 1 ]; then files=$cluster_log if [ $saved = 1 ]; then files=`ls -1tr ~/CTS-*/cluster-log.txt` fi for f in $files; do echo $f case $verbose in 0) cat -n $f | grep $patterns | grep -v "CTS: debug:" ;; 1) cat -n $f | grep $patterns | grep -v "CTS:.* cmd:" ;; *) cat -n $f | grep $patterns ;; esac echo "" done elif [ $watch = 1 ]; then case $verbose in 0) tail -F $cluster_log | grep $patterns | grep -v "CTS: debug:" ;; 1) tail -F $cluster_log | grep $patterns | grep -v "CTS:.* cmd:" ;; *) tail -F $cluster_log | grep $patterns ;; esac elif [ $active = 0 ]; then screen -x cts-$cluster_name else touch $cluster_log export cluster_name cluster_hosts cluster_log screen -S cts-$cluster_name bash fi + +# vim: set filetype=sh: diff --git a/cts/support/LSBDummy.in b/cts/support/LSBDummy.in index 19f99207c2..ea34ea1e86 100644 --- a/cts/support/LSBDummy.in +++ b/cts/support/LSBDummy.in @@ -1,85 +1,87 @@ #!/bin/sh # # # Dummy LSB RA. Does nothing but touch and remove a state file # # Copyright 2006-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. ####################################################################### # Initialization: desc="Dummy LSB service" . @PCMK_OCF_ROOT@/resource.d/heartbeat/.ocf-directories : ${HA_VARRUN=/tmp} # Backup in case .ocf-directories doesn't exist ####################################################################### success() { printf "[ OK ]\r" } failure() { printf "[FAILED]\r" } dummy_usage() { cat <= offset: logfile.seek(offset) else: print("%sFile truncated from %d to %d" % (prefix, offset, newsize)) if (newsize * 1.05) < offset: logfile.seek(0) # Don't block when we reach EOF fcntl.fcntl(logfile.fileno(), fcntl.F_SETFL, os.O_NONBLOCK) count = 0 while True: if logfile.tell() >= newsize: break if limit and count >= limit: break line = logfile.readline() if not line: break print(line.strip()) count += 1 print("%sLast read: %d, limit=%d, count=%d" % (prefix, logfile.tell(), limit, count)) def build_options(): """Handle command line arguments.""" # Create the top-level parser parser = argparse.ArgumentParser(description="Support tool for CTS") subparsers = parser.add_subparsers(dest="subparser_name") # Create the parser for the "install" command subparsers.add_parser("install", help="Install support files") # Create the parser for the "uninstall" command subparsers.add_parser("uninstall", help="Remove support files") # Create the parser for the "watch" command watch_parser = subparsers.add_parser("watch", help="Remote log watcher") watch_parser.add_argument("-f", "--filename", default="/var/log/messages", help="File to watch") watch_parser.add_argument("-l", "--limit", type=int, default=0, help="Maximum number of lines to read") watch_parser.add_argument("-o", "--offset", default=0, help="Which line number to start reading from") watch_parser.add_argument("-p", "--prefix", default="", help="String to add to the beginning of each line") args = parser.parse_args() return args if __name__ == "__main__": opts = build_options() if os.geteuid() != 0: print("This command must be run as root") sys.exit(ExitStatus.ERROR) # If the install directory doesn't exist, assume we're in a build directory. data_dir = "%s/pacemaker/tests/cts" % BuildOptions.DATA_DIR if not os.path.exists(data_dir): data_dir = "%s/pacemaker/tests/cts" % BuildOptions._BUILD_DIR if opts.subparser_name == "install": cmd_install(data_dir) if opts.subparser_name == "uninstall": cmd_uninstall() if opts.subparser_name == "watch": cmd_watch(opts.filename, opts.limit, opts.offset, opts.prefix) -# vim: set filetype=python expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=120: +# vim: set filetype=python: diff --git a/cts/support/fence_dummy.in b/cts/support/fence_dummy.in index 842fe47504..c30dc2aafb 100644 --- a/cts/support/fence_dummy.in +++ b/cts/support/fence_dummy.in @@ -1,509 +1,511 @@ #!@PYTHON@ """Dummy fence agent for testing.""" __copyright__ = "Copyright 2012-2025 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import re import sys import time import random import atexit import getopt import contextlib AGENT_VERSION = "4.1.0" OCF_VERSION = "1.0" SHORT_DESC = "Dummy fence agent" LONG_DESC = """fence_dummy is a fake fencing agent which reports success based on its mode (pass|fail|random) without doing anything.""" # Short options used: difhmnoqsvBDHMRUV ALL_OPT = { "quiet": { "getopt": "q", "help": "", "order": 50 }, "verbose": { "getopt": "v", "longopt": "verbose", "help": "-v, --verbose Verbose mode", "required": "0", "shortdesc": "Verbose mode", "order": 51 }, "debug": { "getopt": "D:", "longopt": "debug-file", "help": "-D, --debug-file=[debugfile] Debugging to output file", "required": "0", "shortdesc": "Write debug information to given file", "order": 52 }, "version": { "getopt": "V", "longopt": "version", "help": "-V, --version Display version information and exit", "required": "0", "shortdesc": "Display version information and exit", "order": 53 }, "help": { "getopt": "h", "longopt": "help", "help": "-h, --help Display this help and exit", "required": "0", "shortdesc": "Display help and exit", "order": 54 }, "action": { "getopt": "o:", "longopt": "action", "help": "-o, --action=[action] Action: validate-all, status, list, reboot (default), off or on", "required": "1", "shortdesc": "Fencing Action", "default": "reboot", "order": 1 }, "nodename": { "getopt": "N:", "longopt": "nodename", "help": "-N, --nodename Node name of fence target (ignored)", "required": "0", "shortdesc": "The node name of fence target (ignored)", "order": 2 }, "mode": { "getopt": "M:", "longopt": "mode", "required": "0", "help": "-M, --mode=(pass|fail|random) Exit status to return for non-monitor operations", "shortdesc": "Whether fence operations should always pass, always fail, or fail at random", "order": 3 }, "monitor_mode": { "getopt": "m:", "longopt": "monitor_mode", "help": "-m, --monitor_mode=(pass|fail|random) Exit status to return for monitor operations", "required": "0", "shortdesc": "Whether monitor operations should always pass, always fail, or fail at random", "order": 3 }, "random_sleep_range": { "getopt": "R:", "required": "0", "longopt": "random_sleep_range", "help": "-R, --random_sleep_range=[seconds] Sleep between 1 and [seconds] before returning", "shortdesc": "Wait randomly between 1 and [seconds]", "order": 3 }, "mock_dynamic_hosts": { "getopt": "H:", "longopt": "mock_dynamic_hosts", "help": "-H, --mock_dynamic_hosts=[list] What to return when dynamically queried for possible targets", "required": "0", "shortdesc": "A list of hosts we can fence", "order": 3 }, "delay": { "getopt": "f:", "longopt": "delay", "help": "-f, --delay [seconds] Wait X seconds before fencing is started", "required": "0", "shortdesc": "Wait X seconds before fencing is started", "default": "0", "order": 3 }, "monitor_delay": { "getopt": "d:", "longopt": "monitor_delay", "help": "-d, --monitor_delay [seconds] Wait X seconds before monitor completes", "required": "0", "shortdesc": "Wait X seconds before monitor completes", "default": "0", "order": 3 }, "off_delay": { "getopt": "F:", "longopt": "off_delay", "help": "-F, --off_delay [seconds] Wait additional X seconds before off action", "required": "0", "shortdesc": "Wait additional X seconds before off action", "default": "0", "order": 3 }, "plug": { "getopt": "n:", "longopt": "plug", "help": "-n, --plug=[id] Physical plug number on device (ignored)", "required": "1", "shortdesc": "Ignored", "order": 4 }, "port": { "getopt": "n:", "longopt": "plug", "help": "-n, --plug=[id] Physical plug number on device (ignored)", "required": "1", "shortdesc": "Ignored", "order": 4 }, "switch": { "getopt": "s:", "longopt": "switch", "help": "-s, --switch=[id] Physical switch number on device (ignored)", "required": "0", "shortdesc": "Ignored", "order": 4 }, "uuid": { "getopt": "U:", "longopt": "uuid", "help": "-U, --uuid UUID of the VM to fence (ignored)", "required": "0", "shortdesc": "Ignored", "order": 4 } } def agent(): """Return name this file was run as.""" return os.path.basename(sys.argv[0]) def fail_usage(message): """Print a usage message and exit.""" sys.exit("%s\nPlease use '-h' for usage" % message) def show_docs(options, auto_unfence, no_reboot, no_on): """Handle informational options (display info and exit).""" device_opt = options["device_opt"] if "-h" in options: usage(device_opt) sys.exit(0) if "-o" in options and options["-o"].lower() == "metadata": f = "%s.fail" % __file__ if not os.path.exists(f): metadata(device_opt, options, auto_unfence, no_reboot, no_on) else: os.remove(f) sys.exit(0) if "-V" in options: print(AGENT_VERSION) sys.exit(0) def sorted_options(avail_opt): """Return a list of all options, in their internally specified order.""" sorted_list = [(key, ALL_OPT[key]) for key in avail_opt] sorted_list.sort(key=lambda x: x[1]["order"]) return sorted_list def usage(avail_opt): """Print a usage message.""" print("Usage:") print("\t%s [options]" % agent()) print("Options:") for (_, value) in sorted_options(avail_opt): if len(value["help"]) != 0: print(" %s" % value["help"]) def metadata(avail_opt, options, auto_unfence, no_reboot, no_on): """Print agent metadata.""" # This log is just for testing handling of stderr output print("asked for fence_dummy metadata", file=sys.stderr) print(""" %s %s """ % (agent(), SHORT_DESC, AGENT_VERSION, OCF_VERSION, LONG_DESC)) for (option, _) in sorted_options(avail_opt): if "shortdesc" not in ALL_OPT[option]: continue print(' ' % (option, ALL_OPT[option]["required"])) default = "" default_name_arg = "-%s" % ALL_OPT[option]["getopt"][:-1] default_name_no_arg = "-%s" % ALL_OPT[option]["getopt"] if "default" in ALL_OPT[option]: default = 'default="%s"' % ALL_OPT[option]["default"] elif options.get(default_name_arg) is not None: try: default = 'default="%s"' % options[default_name_arg] except TypeError: # @todo/@note: Currently there is no clean way how to handle lists # we can create a string from it but we can't set it on command line default = 'default="%s"' % str(options[default_name_arg]) elif default_name_no_arg in options: default = 'default="true"' mixed = ALL_OPT[option]["help"] # split it between option and help text res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) if res is not None: mixed = res.group(1) mixed = mixed.replace("<", "<").replace(">", ">") print(' ' % mixed) if ALL_OPT[option]["getopt"].count(":") > 0: print(' ' % default) else: print(' ' % default) print(' %s' % ALL_OPT[option]["shortdesc"]) print(' ') print(' \n ') if not no_on: if auto_unfence: attr_name = 'automatic' else: attr_name = 'on_target' print(' ' % attr_name) print(' ') if not no_reboot: print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print('') def option_longopt(option): """Return the getopt-compatible long-option name of the given option.""" if ALL_OPT[option]["getopt"].endswith(":"): return ALL_OPT[option]["longopt"] + "=" return ALL_OPT[option]["longopt"] def opts_from_command_line(argv, avail_opt): """Read options from command-line arguments.""" # Prepare list of options for getopt getopt_string = "" longopt_list = [] for k in avail_opt: if k in ALL_OPT: getopt_string += ALL_OPT[k]["getopt"] else: fail_usage("Parse error: unknown option '%s'" % k) if k in ALL_OPT and "longopt" in ALL_OPT[k]: longopt_list.append(option_longopt(k)) try: (opt, _) = getopt.gnu_getopt(argv, getopt_string, longopt_list) except getopt.GetoptError as error: fail_usage("Parse error: %s" % error.msg) # Transform longopt to short one which are used in fencing agents old_opt = opt opt = {} for old_option in dict(old_opt): if old_option.startswith("--"): for rec in ALL_OPT.values(): if rec.get("longopt") is None: continue long = "--%s" % rec["longopt"] if long == old_option: short = "-%s" % rec["getopt"][0] opt[short] = dict(old_opt)[old_option] else: opt[old_option] = dict(old_opt)[old_option] # Compatibility Layer (with what? probably not needed for fence_dummy) new_opt = dict(opt) if "-T" in new_opt: new_opt["-o"] = "status" if "-n" in new_opt: new_opt["-m"] = new_opt["-n"] opt = new_opt return opt def opts_from_stdin(avail_opt): """Read options from standard input.""" opt = {} name = "" for line in sys.stdin.readlines(): line = line.strip() if line.startswith("#") or (len(line) == 0): continue (name, value) = (line + "=").split("=", 1) value = value[:-1] # Compatibility Layer (with what? probably not needed for fence_dummy) if name == "option": name = "action" if name not in avail_opt: print("Parse error: Ignoring unknown option '%s'" % line, file=sys.stderr) continue if ALL_OPT[name]["getopt"].endswith(":"): short = "-%s" % ALL_OPT[name]["getopt"][0] opt[short] = value elif value.lower() in ["1", "yes", "on", "true"]: short = "-%s" % ALL_OPT[name]["getopt"] opt[short] = "1" return opt def process_input(avail_opt): """Set standard environment variables, and parse all options.""" # Set standard environment os.putenv("LANG", "C") os.putenv("LC_ALL", "C") # Read options from command line or standard input if len(sys.argv) > 1: return opts_from_command_line(sys.argv[1:], avail_opt) return opts_from_stdin(avail_opt) def atexit_handler(): """Close stdout on exit.""" try: sys.stdout.close() os.close(1) except IOError: sys.exit("%s failed to close standard output" % agent()) def success_mode(options, option, default_value): """Return exit code specified by option.""" if option in options: test_value = options[option] else: test_value = default_value if test_value == "pass": exitcode = 0 elif test_value == "fail": exitcode = 1 else: exitcode = random.randint(0, 1) return exitcode def write_options(options): """Write out all options to debug file.""" with contextlib.suppress(IOError): with io.open(options["-D"], "at", encoding="utf-8") as debugfile: debugfile.write("### %s ###\n" % time.strftime("%Y-%m-%d %H:%M:%S")) for option in sorted(options): debugfile.write("%s=%s\n" % (option, options[option])) debugfile.write("###\n") def main(): """Run the dummy fencing agent.""" auto_unfence = False no_reboot = False no_on = False # Meta-data can't take parameters, so we simulate different meta-data # behavior based on the executable name (which can be a symbolic link). if sys.argv[0].endswith("_auto_unfence"): auto_unfence = True elif sys.argv[0].endswith("_no_reboot"): no_reboot = True elif sys.argv[0].endswith("_no_on"): no_on = True device_opt = ALL_OPT.keys() # Defaults for fence agent atexit.register(atexit_handler) options = process_input(device_opt) options["device_opt"] = device_opt show_docs(options, auto_unfence, no_reboot, no_on) action = options.get("-o", "reboot") # dump input to file if "-D" in options and action != "validate-all": write_options(options) if "-f" in options and action != "validate-all": val = int(options["-f"]) print("delay sleep for %d seconds" % val, file=sys.stderr) time.sleep(val) # random sleep for testing if "-R" in options and action != "validate-all": val = int(options["-R"]) ran = random.randint(1, val) print("random sleep for %d seconds" % ran, file=sys.stderr) time.sleep(ran) if action == "monitor": if "-d" in options: time.sleep(int(options["-d"])) exitcode = success_mode(options, "-m", "pass") elif action == "list": print("fence_dummy action (list) called", file=sys.stderr) if "-H" in options: print(options["-H"]) exitcode = 0 else: print("dynamic hostlist requires mock_dynamic_hosts to be set", file=sys.stderr) exitcode = 1 elif action == "validate-all": if "-f" in options: val = int(options["-f"]) if val > 10: exitcode = 1 else: exitcode = 0 else: exitcode = 1 elif action == "off": if "-F" in options: time.sleep(int(options["-F"])) exitcode = success_mode(options, "-M", "random") else: exitcode = success_mode(options, "-M", "random") # Ensure we generate some error output on failure exit. if exitcode == 1: print("simulated %s failure" % action, file=sys.stderr) sys.exit(exitcode) if __name__ == "__main__": main() + +# vim: set filetype=python: diff --git a/cts/support/pacemaker-cts-dummyd.in b/cts/support/pacemaker-cts-dummyd.in index 53512484fd..ed1bc63320 100644 --- a/cts/support/pacemaker-cts-dummyd.in +++ b/cts/support/pacemaker-cts-dummyd.in @@ -1,62 +1,64 @@ #!@PYTHON@ """Slow-starting idle daemon that notifies systemd when it starts.""" # pylint doesn't like the module name "pacemaker-cts-dummyd" which is an invalid complaint # for this file but probably something we want to continue warning about elsewhere # pylint: disable=invalid-name __copyright__ = "Copyright 2014-2024 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" from functools import partial import signal import subprocess import sys import time have_systemd_daemon = True try: import systemd.daemon except ImportError: have_systemd_daemon = False def parse_args(): """Return the delay given on the command line, if any.""" delay = None # Lone argument is a number of seconds to delay start and stop if len(sys.argv) > 0: try: delay = float(sys.argv[1]) except ValueError: delay = None return delay def twiddle(delay): """Sleep the given number of seconds.""" if delay is not None: time.sleep(delay) def bye(_signum, _frame, delay=None): """SIGTERM signal handler.""" twiddle(delay) sys.exit(0) if __name__ == "__main__": d = parse_args() signal.signal(signal.SIGTERM, partial(bye, delay=d)) twiddle(d) if have_systemd_daemon: systemd.daemon.notify("READY=1") else: subprocess.call(["systemd-notify", "READY=1"]) # This isn't a "proper" daemon, but that would be overkill for testing purposes while True: time.sleep(600.0) + +# vim: set filetype=python: diff --git a/cts/support/pacemaker-cts-dummyd@.service.in b/cts/support/pacemaker-cts-dummyd@.service.in index 6531e46fa7..2b7356a1ec 100644 --- a/cts/support/pacemaker-cts-dummyd@.service.in +++ b/cts/support/pacemaker-cts-dummyd@.service.in @@ -1,9 +1,11 @@ [Unit] Description=Dummy daemon for Pacemaker CTS testing [Service] Type=notify ExecStart=@CRM_DAEMON_DIR@/pacemaker-cts-dummyd %i [Install] DefaultInstance=0 + +# vim: set filetype=systemd: diff --git a/daemons/execd/pacemaker_remote.in b/daemons/execd/pacemaker_remote.in index 2096c5f39c..01f3ec9fee 100644 --- a/daemons/execd/pacemaker_remote.in +++ b/daemons/execd/pacemaker_remote.in @@ -1,176 +1,178 @@ #!@BASH_PATH@ # Authors: # Andrew Beekhof # # License: Revised BSD # chkconfig: - 99 01 # description: Pacemaker Cluster Manager # processname: pacemaker-remoted # ### BEGIN INIT INFO # Provides: pacemaker_remote # Required-Start: $network $remote_fs # Should-Start: $syslog # Required-Stop: $network $remote_fs # Default-Start: # Default-Stop: # Short-Description: Manage the executor for Pacemaker Remote nodes # Description: Manage the executor for Pacemaker Remote nodes ### END INIT INFO desc="Pacemaker Remote Executor" prog="pacemaker-remoted" # set secure PATH PATH="/sbin:/bin:/usr/sbin:/usr/bin:@sbindir@" checkrc() { if [ $? = 0 ]; then success else failure fi } success() { echo -ne "[ OK ]\r" } failure() { echo -ne "[FAILED]\r" } status() { pid=$(pidof $1 2>/dev/null) local rtrn=$? if [ $rtrn -ne 0 ]; then echo "$1 is stopped" if [ -f "@localstatedir@/run/$prog.pid" ]; then rtrn=1 else rtrn=3 fi else echo "$1 (pid $pid) is running..." fi return $rtrn } if [ -d @CONFIGDIR@ ]; then [ -f @INITDIR@/functions ] && . @INITDIR@/functions set -a [ -f @CONFIGDIR@/pacemaker ] && . @CONFIGDIR@/pacemaker [ -f @CONFIGDIR@/sbd ] && . @CONFIGDIR@/sbd set +a fi LOCK_DIR="." if [ -d "@localstatedir@/lock/subsys" ]; then LOCK_DIR="@localstatedir@/lock/subsys" elif [ -d "@localstatedir@/lock" ]; then LOCK_DIR="@localstatedir@/lock" fi [ -z "$LOCK_FILE" ] && LOCK_FILE="$LOCK_DIR/pacemaker_remote" # Check if there is a valid watchdog-device configured in sbd config if [ x != "x$SBD_WATCHDOG_DEV" -a "/dev/null" != "$SBD_WATCHDOG_DEV" -a -c "$SBD_WATCHDOG_DEV" ]; then # enhance for unavailable chkconfig - don't touch sbd for now if chkconfig --list sbd_remote_helper 2>/dev/null | grep -q ":on"; then SBD_SERVICE=sbd_remote_helper fi fi start() { echo -n "Starting $desc: " # most recent distributions use tmpfs for $@localstatedir@/run # to avoid to clean it up on every boot. # they also assume that init scripts will create # required subdirectories for proper operations mkdir -p "@localstatedir@/run" if status $prog > /dev/null 2>&1; then success else $prog > /dev/null 2>&1 & # Time to connect to corosync and fail sleep 5 if status $prog > /dev/null 2>&1; then touch "$LOCK_FILE" pidof $prog > "@localstatedir@/run/$prog.pid" success else failure rtrn=1 fi fi echo [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE start } stop() { if status $prog > /dev/null 2>&1; then echo -n "Signaling $desc to terminate: " kill -TERM $(pidof $prog) > /dev/null 2>&1 success echo echo -n "Waiting for $desc to unload:" while status $prog > /dev/null 2>&1; do sleep 1 echo -n "." done else echo -n "$desc is already stopped" fi rm -f "$LOCK_FILE" rm -f "@localstatedir@/run/$prog.pid" success echo [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE stop } rtrn=0 case "$1" in start) start ;; restart|reload|force-reload) stop start ;; condrestart|try-restart) if status $prog > /dev/null 2>&1; then stop start rtrn=$? fi ;; status) status $prog rtrn=$? ;; stop) stop rtrn=$? ;; *) echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}" rtrn=2 ;; esac exit $rtrn + +# vim: set filetype=sh: diff --git a/daemons/execd/pacemaker_remote.service.in b/daemons/execd/pacemaker_remote.service.in index 1e48d1472e..632fa28002 100644 --- a/daemons/execd/pacemaker_remote.service.in +++ b/daemons/execd/pacemaker_remote.service.in @@ -1,52 +1,54 @@ [Unit] Description=Pacemaker Remote executor daemon Documentation=man:pacemaker-remoted Documentation=https://clusterlabs.org/pacemaker/doc/ # See main pacemaker unit file for descriptions of why these are needed After=network.target After=time-sync.target After=dbus.service Wants=dbus.service After=resource-agents-deps.target Wants=resource-agents-deps.target After=syslog.service After=rsyslog.service [Install] Alias=pacemaker-remote.service WantedBy=multi-user.target [Service] Type=simple KillMode=process NotifyAccess=none EnvironmentFile=-@CONFIGDIR@/pacemaker EnvironmentFile=-@CONFIGDIR@/sbd # Not actually success, but fatal failure -- this ensures no respawn SuccessExitStatus=100 ExecStart=@sbindir@/pacemaker-remoted # Systemd v227 and above can limit the number of processes spawned by a # service. That is a bad idea for an HA cluster resource manager, so disable it # by default. The administrator can create a local override if they really want # a limit. If your systemd version does not support TasksMax, and you want to # get rid of the resulting log warnings, comment out this option. TasksMax=infinity # If connected to the cluster and when the service functions properly, it will # wait to exit until the cluster notifies it all resources on the remote node # have been stopped. The default of 30min should cover most typical cluster # configurations, but it may need an increase to adapt to local conditions # (e.g. a large, clustered database could conceivably take longer to stop). TimeoutStopSec=30min TimeoutStartSec=30s # Restart options include: no, on-success, on-failure, on-abort or always Restart=on-failure # crm_perror() writes directly to stderr, so ignore it here # to avoid double-logging with the wrong format StandardError=null + +# vim: set filetype=systemd: diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in index 3fd53d9ffb..1809e179c6 100644 --- a/daemons/pacemakerd/pacemaker.service.in +++ b/daemons/pacemakerd/pacemaker.service.in @@ -1,103 +1,105 @@ [Unit] Description=Pacemaker High Availability Cluster Manager Documentation=man:pacemakerd Documentation=https://clusterlabs.org/pacemaker/doc/ # DefaultDependencies takes care of sysinit.target, # basic.target, and shutdown.target # We need networking to bind to a network address. It is recommended not to # use Wants or Requires with network.target, and not to use # network-online.target for server daemons. After=network.target # Time syncs can make the clock jump backward, which messes with logging # and failure timestamps, so wait until it's done. After=time-sync.target # Managing systemd resources requires DBus. After=dbus.service Wants=dbus.service # Some OCF resources may have dependencies that aren't managed by the cluster; # these must be started before Pacemaker and stopped after it. The # resource-agents package provides this target, which lets system adminstrators # add drop-ins for those dependencies. After=resource-agents-deps.target Wants=resource-agents-deps.target After=syslog.service After=rsyslog.service After=corosync.service Requires=corosync.service # If Pacemaker respawns repeatedly, give up after this many tries in this time StartLimitBurst=5 StartLimitIntervalSec=25s [Install] WantedBy=multi-user.target [Service] Type=simple KillMode=process NotifyAccess=main EnvironmentFile=-@CONFIGDIR@/pacemaker EnvironmentFile=-@CONFIGDIR@/sbd SuccessExitStatus=100 ExecStart=@sbindir@/pacemakerd # Systemd v227 and above can limit the number of processes spawned by a # service. That is a bad idea for an HA cluster resource manager, so disable it # by default. The administrator can create a local override if they really want # a limit. If your systemd version does not support TasksMax, and you want to # get rid of the resulting log warnings, comment out this option. TasksMax=infinity # If pacemakerd doesn't stop, it's probably waiting on a cluster # resource. Sending -KILL will just get the node fenced SendSIGKILL=no # Systemd's default of respawning a failed service after 100ms is too aggressive RestartSec=1s # If we ever hit the StartLimitInterval/StartLimitBurst limit, and the # admin wants to stop the cluster while pacemakerd is not running, it # might be a good idea to enable the ExecStopPost directive below. # # However, the node will likely end up being fenced as a result, so it's # not enabled by default. # # ExecStopPost=/usr/bin/killall -TERM pacemaker-attrd pacemaker-based \ # pacemaker-controld pacemaker-execd pacemaker-fenced \ # pacemaker-schedulerd # If you want Corosync to stop whenever Pacemaker is stopped, # uncomment the next line too: # # ExecStopPost=/bin/sh -c 'pidof pacemaker-controld || killall -TERM corosync' # Pacemaker will restart along with Corosync if Corosync is stopped while # Pacemaker is running. # In this case, if you want to be fenced always (if you do not want to restart) # uncomment ExecStopPost below. # # ExecStopPost=/bin/sh -c 'pidof corosync || \ # /usr/bin/systemctl --no-block stop pacemaker' # When the service functions properly, it will wait to exit until all resources # have been stopped on the local node, and potentially across all nodes that # are shutting down. The default of 30min should cover most typical cluster # configurations, but it may need an increase to adapt to local conditions # (e.g. a large, clustered database could conceivably take longer to stop). TimeoutStopSec=30min TimeoutStartSec=60s # Restart options include: no, on-success, on-failure, on-abort or always Restart=on-failure # crm_perror() writes directly to stderr, so ignore it here # to avoid double-logging with the wrong format StandardError=null + +# vim: set filetype=systemd: diff --git a/maint/bumplibs.in b/maint/bumplibs.in index 915b5227a5..ef71d50f61 100644 --- a/maint/bumplibs.in +++ b/maint/bumplibs.in @@ -1,298 +1,300 @@ #!@BASH_PATH@ # # Copyright 2012-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # # List regular expressions (not globs) that match all of a library's public API # headers. Any files ending in "internal.h" will be excluded from matches. declare -A HEADERS HEADERS[cib]="include/crm/cib.*.h include/crm/cib/.*.h" HEADERS[crmcluster]="include/crm/cluster.h include/crm/cluster/.*.h" HEADERS[crmcommon]="include/crm/crm.*.h include/crm/msg_xml.h include/crm/common/.*.h" HEADERS[crmservice]="include/crm/services.*.h" HEADERS[lrmd]="include/crm/lrmd.*.h" HEADERS[pacemaker]="include/pacemaker.*.h" HEADERS[pe_rules]="include/crm/pengine/ru.*.h" HEADERS[pe_status]="include/crm/pengine/.*.h" HEADERS[stonithd]="include/crm/stonith-ng.h include/crm/fencing/.*.h" yesno() { local RESPONSE read -p "$1 " RESPONSE case $(echo "$RESPONSE" | tr '[:upper:]' '[:lower:]') in y|yes|ano|ja|si|oui) return 0 ;; *) return 1 ;; esac } prompt_to_continue() { yesno "Continue?" || exit 0 } sed_in_place() { cp -p "$1" "$1.$$" sed -e "$2" "$1" > "$1.$$" mv "$1.$$" "$1" } find_last_release() { if [ -n "$1" ]; then echo "$1" else git tag -l | grep Pacemaker | grep -v rc | sort -Vr | head -n 1 fi } find_libs() { find lib -name "*.am" -exec grep "lib.*_la_LDFLAGS.*version-info" \{\} \; \ | sed -e 's/lib\(.*\)_la_LDFLAGS.*/\1/' } find_makefile() { find lib -name Makefile.am -exec grep -l "lib${1}_la.*version-info" \{\} \; } find_sources() { local LIB="$1" local AMFILE="$2" local SOURCES # Library makefiles should use "+=" to break up long sources lines rather # than backslashed continuation lines, to allow this script to detect # source files correctly. Warn if that's not the case. if grep "lib${LIB}_la_SOURCES.*\\\\" "$AMFILE" then echo -e "\033[1;35m -- Sources list for lib$LIB is probably truncated! --\033[0m" echo "Edit to use '+=' rather than backslashed continuation lines" prompt_to_continue fi SOURCES=$(grep "^lib${LIB}_la_SOURCES" "$AMFILE" \ | sed -e 's/.*=//' -e 's/\\//' -e 's:\.\./gnu/:lib/gnu/:') for SOURCE in $SOURCES; do if echo "$SOURCE" | grep -q "/" then echo "$SOURCE" else echo "$(dirname "$AMFILE")/$SOURCE" fi done } find_headers_as_of() { local TAG local LIB local FILE local PATTERN TAG="$1" LIB="$2" for FILE in $(git ls-tree -r --name-only "$TAG"); do for PATTERN in ${HEADERS[$LIB]}; do if [[ $FILE =~ $PATTERN ]] && [[ ! $FILE =~ internal.h$ ]]; then echo "$FILE" break fi done done } extract_version() { grep "lib${1}_la.*version-info" | sed -e 's/.*version-info\s*\(\S*\)/\1/' } shared_lib_name() { local LIB="$1" local VERSION="$2" echo "lib${LIB}.so.$(echo "$VERSION" | cut -d: -f 1)" } process_lib() { local LIB="$1" local LAST_RELEASE="$2" local AMFILE local SOURCES local HEADERS_LAST local HEADERS_HEAD local HEADERS_DIFF local HEADERS_GONE local HEADERS_ADDED local CHANGE local DEFAULT_CHANGE if [ -z "${HEADERS[$LIB]}" ]; then echo "Can't check lib$LIB until this script is updated with its headers" prompt_to_continue fi AMFILE="$(find_makefile "$LIB")" # Get current shared library version VER_NOW=$(extract_version "$LIB" < "$AMFILE") # Check whether library existed at last release if ! git cat-file -e "$LAST_RELEASE:$AMFILE" 2>/dev/null; then echo "lib$LIB is new, not changing version ($VER_NOW)" prompt_to_continue echo "" return fi HEADERS_LAST="$(find_headers_as_of "$LAST_RELEASE" "$LIB")" HEADERS_HEAD="$(find_headers_as_of "HEAD" "$LIB")" HEADERS_DIFF="$(diff <(echo "$HEADERS_LAST") <(echo "$HEADERS_HEAD"))" HEADERS_GONE="$(echo "$HEADERS_DIFF" | sed -n -e 's/^< //p')" HEADERS_ADDED="$(echo "$HEADERS_DIFF" | sed -n -e 's/^> //p')" # Check whether there were any changes to headers or sources SOURCES="$(find_sources "$LIB" "$AMFILE")" if [ -n "$HEADERS_GONE" ]; then DEFAULT_CHANGE="i" # Removed public header is incompatible change elif [ -n "$HEADERS_ADDED" ]; then DEFAULT_CHANGE="c" # Additions are likely compatible elif git diff --quiet -w "$LAST_RELEASE..HEAD" $HEADERS_HEAD $SOURCES ; then echo "No changes to $LIB interface" prompt_to_continue echo "" return else DEFAULT_CHANGE="f" # Sources changed, so it's at least a fix fi # Show all header changes since last release echo "- Changes in lib$LIB public headers since $LAST_RELEASE:" if [ -n "$HEADERS_GONE" ]; then for HEADER in $HEADERS_GONE; do echo "-- $HEADER was removed" done fi if [ -n "$HEADERS_ADDED" ]; then for HEADER in $HEADERS_ADDED; do echo "++ $HEADER is new" done fi git --no-pager diff --color -w "$LAST_RELEASE..HEAD" $HEADERS_HEAD echo "" if yesno "Show commits (minus refactor/build/merge) touching lib$LIB since $LAST_RELEASE [y/N]?" then git log --color "$LAST_RELEASE..HEAD" -z $HEADERS_HEAD $SOURCES "$AMFILE" \ | grep -vzE "Refactor:|Build:|Merge pull request" echo prompt_to_continue fi # @TODO this seems broken ... #echo "" #if yesno "Show merged PRs touching lib$LIB since $LAST_RELEASE [y/N]?" #then # git log --merges $LAST_RELEASE..HEAD $HEADERS_HEAD $SOURCES $AMFILE # echo # prompt_to_continue #fi # Show summary of source changes since last release echo "" echo "- Headers: $HEADERS_HEAD" echo "- Changed sources since $LAST_RELEASE:" git --no-pager diff --color -w "$LAST_RELEASE..HEAD" --stat $SOURCES echo "" # Ask for human guidance echo "Are the changes to lib$LIB:" read -p "[c]ompatible additions, [i]ncompatible additions/removals or [f]ixes? [$DEFAULT_CHANGE]: " CHANGE [ -z "$CHANGE" ] && CHANGE="$DEFAULT_CHANGE" # Get (and show) shared library version at last release VER=$(git show "$LAST_RELEASE:$AMFILE" | extract_version "$LIB") VER_1=$(echo "$VER" | awk -F: '{print $1}') VER_2=$(echo "$VER" | awk -F: '{print $2}') VER_3=$(echo "$VER" | awk -F: '{print $3}') echo "lib$LIB version at $LAST_RELEASE: $VER" # Show current shared library version if changed if [ "$VER_NOW" != "$VER" ]; then echo "lib$LIB version currently: $VER_NOW" fi # Calculate new library version case $CHANGE in i|I) echo "New backwards-incompatible version: x+1:0:0" (( VER_1++ )) VER_2=0 VER_3=0 # Some headers define constants for shared library names, # update them if the name changed for H in $HEADERS_HEAD; do sed_in_place "$H" "s/$(shared_lib_name "$LIB" "$VER_NOW")/$(shared_lib_name "$LIB" "$VER_1:0:0")/" done ;; c|C) echo "New version with backwards-compatible extensions: x+1:0:z+1" (( VER_1++ )) VER_2=0 (( VER_3++ )) ;; F|f) echo "Code changed though interfaces didn't: x:y+1:z" (( VER_2++ )) ;; *) echo "Not updating lib$LIB version" prompt_to_continue CHANGE="" ;; esac VER_NEW=$VER_1:$VER_2:$VER_3 if [ -n "$CHANGE" ]; then if [ "$VER_NEW" != "$VER_NOW" ]; then echo "Updating lib$LIB version from $VER_NOW to $VER_NEW" prompt_to_continue sed_in_place "$AMFILE" "s/version-info\s*$VER_NOW/version-info $VER_NEW/" else echo "No version change needed for lib$LIB" prompt_to_continue fi fi echo "" } echo "Definitions:" echo "- Compatible additions: new public API functions, structs, etc." echo "- Incompatible additions/removals: new arguments to public API functions," echo " new members added to the middle of public API structs," echo " removal of any public API, etc." echo "- Fixes: any other code changes at all" echo "" echo "When possible, improve backward compatibility first:" echo "- move new members to the end of structs" echo "- use bitfields instead of booleans" echo "- when adding arguments, create a new function that the old one can wrap" echo "" prompt_to_continue LAST_RELEASE=$(find_last_release "$1") for LIB in $(find_libs); do process_lib "$LIB" "$LAST_RELEASE" done # Show all proposed changes git --no-pager diff --color -w + +# vim: set filetype=sh: diff --git a/python/pacemaker/buildoptions.py.in b/python/pacemaker/buildoptions.py.in index 02f5552a43..178d87cdb8 100644 --- a/python/pacemaker/buildoptions.py.in +++ b/python/pacemaker/buildoptions.py.in @@ -1,89 +1,91 @@ """A module providing information on build-time configuration of pacemaker.""" __all__ = ["BuildOptions"] __copyright__ = "Copyright 2023-2024 the Pacemaker project contributors" __license__ = "GNU Lesser General Public License version 2.1 or later (LGPLv2.1+)" class BuildOptions: """ Variables generated as part of the ./configure && make process. These affect how pacemaker was configured and where its various parts get installed. """ BASH_PATH = "@BASH_PATH@" """Path to the bash shell.""" _BUILD_DIR = "@abs_top_builddir@" """ Top-level build directory. NOTE: This is not especially useful on installed systems, but is useful for running various programs from a source checkout """ CIB_DIR = "@CRM_CONFIG_DIR@" """Where CIB files are stored.""" CIB_SCHEMA_VERSION = "@CIB_VERSION@" """Latest supported CIB schema version number.""" COROSYNC_CONFIG_FILE = "@PCMK__COROSYNC_CONF@" """Path to the corosync config file.""" DAEMON_DIR = "@CRM_DAEMON_DIR@" """Where Pacemaker daemons are installed.""" DAEMON_USER = "@CRM_DAEMON_USER@" """User to run Pacemaker daemons as.""" DATA_DIR = "@datadir@" """Where read-only architecture-independent data is stored.""" _FENCE_BINDIR = "@PCMK__FENCE_BINDIR@" """Where executable fence agents are installed.""" # pylint: disable-msg=using-constant-test INIT_DIR = "@INITDIR@" if "@INITDIR@" else None """Where LSB init scripts are stored.""" LIBEXEC_DIR = "@libexecdir@" """Where programs started by other programs are stored.""" LOCAL_STATE_DIR = "@localstatedir@" """Where miscellaneous temporary state files are stored.""" LOG_DIR = "@CRM_LOG_DIR@" """Where Pacemaker log files are stored.""" OCF_RA_INSTALL_DIR = "@OCF_RA_INSTALL_DIR@" """Where resource agents are installed.""" OCF_ROOT_DIR = "@PCMK_OCF_ROOT@" """Root directory for OCF resource agents and libraries.""" PACEMAKER_CONFIG_DIR = "@PACEMAKER_CONFIG_DIR@" """Where configuration files such as authkey are kept.""" RSC_TMP_DIR = "@PCMK__OCF_TMP_DIR@" """Where resource agents should keep state files.""" REMOTE_ENABLED = True """True if Pacemaker Remote support is enabled.""" RUNTIME_STATE_DIR = "@runstatedir@" """Where runtime data is stored.""" SBIN_DIR = "@sbindir@" """Where administrative programs are installed.""" SCHEMA_DIR = "@PCMK_SCHEMA_DIR@" """Where Relax-NG schema files are stored.""" UNIT_DIR = "@systemdsystemunitdir@" """Where system-wide systemd unit files are stored.""" XMLLINT_PATH = "@XMLLINT_PATH@" """Path to the xmllint program.""" + +# vim: set filetype=python: diff --git a/python/setup.py.in b/python/setup.py.in index e9d61d0a68..165d9212d5 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,20 +1,22 @@ #!@PYTHON@ import re from setuptools import setup # This will match things like "2.1.3" and "2.1.3-100", but not things like # "2.1.3-100.deadbeef". Any other formats (or lack of a match) will result # in an exception during package building, which is probably okay. That's an # error on our part and is something we should fix. ver = re.match("[0-9.]+[0-9-]*", "@PACKAGE_VERSION@")[0] setup(name='pacemaker', version=ver, author='The Pacemaker project contributors', author_email='@PACKAGE_BUGREPORT@', license='LGPLv2.1+', url='https://clusterlabs.org/pacemaker/', description='Python libraries for Pacemaker', packages=['pacemaker', 'pacemaker._cts', 'pacemaker._cts.tests'], ) + +# vim: set filetype=python: diff --git a/tools/cibsecret.in b/tools/cibsecret.in index cbc6d33f7a..264667fbc5 100644 --- a/tools/cibsecret.in +++ b/tools/cibsecret.in @@ -1,440 +1,440 @@ #!@BASH_PATH@ # Copyright 2011-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # # cibsecret # # Manage the secrets directory (by default, /var/lib/pacemaker/lrm/secrets). # Secrets are ASCII files, holding one value per file: # // # These constants must track crm_exit_t values CRM_EX_OK=0 CRM_EX_ERROR=1 CRM_EX_NOT_INSTALLED=5 CRM_EX_USAGE=64 CRM_EX_UNAVAILABLE=69 CRM_EX_OSFILE=72 CRM_EX_CONFIG=78 CRM_EX_DIGEST=104 CRM_EX_NOSUCH=105 CRM_EX_EXISTS=108 LRM_CIBSECRETS="@PCMK__CIB_SECRETS_DIR@" PROG="$(basename "$0")" SSH_OPTS="-o StrictHostKeyChecking=no" MAGIC="lrm://" usage() { cat <] [] Options: --help Show this message, then exit --version Display version information, then exit -C Don't read or write the CIB Commands and their parameters: set Set the value of a sensitive resource parameter. get Display the locally stored value of a sensitive resource parameter. check Verify that the locally stored value of a sensitive resource parameter matches its locally stored MD5 hash. stash Make a non-sensitive resource parameter that is already in the CIB sensitive (move its value to a locally stored and protected file). This may not be used with -C. unstash Make a sensitive resource parameter that is already in the CIB non-sensitive (move its value from the locally stored file to the CIB). This may not be used with -C. delete Remove a sensitive resource parameter value. sync Copy all locally stored secrets to all other nodes. This command manages sensitive resource parameter values that should not be stored directly in Pacemaker's Cluster Information Base (CIB). Such values are handled by storing a special string directly in the CIB that tells Pacemaker to look in a separate, protected file for the actual value. The secret files are not encrypted, but protected by file system permissions such that only root can read or modify them. Since the secret files are stored locally, they must be synchronized across all cluster nodes. This command handles the synchronization using (in order of preference) pssh, pdsh, or ssh, so one of those must be installed. Before synchronizing, this command will ping the cluster nodes to determine which are alive, using fping if it is installed, otherwise the ping command. Installing fping is strongly recommended for better performance. Known limitations: This command can only be run from full cluster nodes (not Pacemaker Remote nodes). Changes are not atomic, so the cluster may use different values while a change is in progress. To avoid problems, it is recommended to put the cluster in maintenance mode when making changes with this command. Changes in secret values do not trigger an agent reload or restart of the affected resource, since they do not change the CIB. If a response is desired before the next cluster recheck interval, any CIB change (such as setting a node attribute) will trigger it. If any node is down when changes to secrets are made, or a new node is later added to the cluster, it may have different values when it joins the cluster, before "$PROG sync" is run. To avoid this, it is recommended to run the sync command (from another node) before starting Pacemaker on the node. Examples: $PROG set ipmi_node1 passwd SecreT_PASS $PROG get ipmi_node1 passwd $PROG check ipmi_node1 passwd $PROG stash ipmi_node2 passwd $PROG sync EOF exit "$1" } check_usage() { case "$1" in set) [ "$2" -ne 4 ] && [ "$2" -ne 3 ] && usage 1 ;; get) [ "$2" -ne 3 ] && usage 1 ;; check) [ "$2" -ne 3 ] && usage 1 ;; stash) [ "$2" -ne 3 ] && usage 1 ;; unstash) [ "$2" -ne 3 ] && usage 1 ;; delete) [ "$2" -ne 3 ] && usage 1 ;; sync) [ "$2" -ne 1 ] && usage 1 ;; --help) usage $CRM_EX_OK ;; --version) crm_attribute --version; exit $? ;; *) usage $CRM_EX_USAGE ;; esac } fatal() { rc=$1 shift echo "ERROR: $*" exit $rc } warn() { echo "WARNING: $*" } info() { echo "INFO: $*" } check_env() { which md5sum >/dev/null 2>&1 || fatal $CRM_EX_NOT_INSTALLED "please install md5sum to run $PROG" if which pssh >/dev/null 2>&1; then rsh=pssh_fun rcp_to_from=pscp_fun # -q is a SUSE patch not present in upstream pssh PSSH_QUIET_OPTION="" pssh -q 2>&1|grep "no such option: -q" > /dev/null || PSSH_QUIET_OPTION="-q" elif which pdsh >/dev/null 2>&1; then rsh=pdsh_fun rcp_to_from=pdcp_fun elif which ssh >/dev/null 2>&1; then rsh=ssh_fun rcp_to_from=scp_fun else fatal $CRM_EX_NOT_INSTALLED "please install pssh, pdsh, or ssh to run $PROG" fi ps axww | grep '[p]acemaker-controld' >/dev/null || fatal $CRM_EX_UNAVAILABLE "pacemaker not running? $PROG needs pacemaker" } # This must be called (and return success) before calling $rsh or $rcp_to_from get_live_peers() { # Get local node name GLP_LOCAL_NODE="$(crm_node -n)" [ $? -eq 0 ] || fatal $CRM_EX_UNAVAILABLE "couldn't get local node name" # Get a list of all other cluster nodes GLP_ALL_PEERS="$(crmadmin -N -q)" [ $? -eq 0 ] || fatal $CRM_EX_UNAVAILABLE "couldn't determine cluster nodes" GLP_ALL_PEERS="$(echo "$GLP_ALL_PEERS" | grep -v "^${GLP_LOCAL_NODE}$")" # Make a list of those that respond to pings if [ "$(id -u)" = "0" ] && which fping >/dev/null 2>&1; then LIVE_NODES=$(fping -a $GLP_ALL_PEERS 2>/dev/null) else LIVE_NODES="" for GLP_NODE in $GLP_ALL_PEERS; do \ ping -c 2 -q "$GLP_NODE" >/dev/null 2>&1 && LIVE_NODES="$LIVE_NODES $GLP_NODE" done fi # Warn the user about any that didn't respond to pings GLP_DOWN="$( (for GLP_NODE in $LIVE_NODES $GLP_ALL_PEERS; do echo "$GLP_NODE"; done) | sort | uniq -u)" if [ "$(echo "$GLP_DOWN" | wc -w)" = "1" ]; then warn "node $GLP_DOWN is down" warn "you'll need to update it using \"$PROG sync\" later" elif [ -n "$GLP_DOWN" ]; then warn "nodes $(echo "$GLP_DOWN" | tr '\n' ' ')are down" warn "you'll need to update them using \"$PROG sync\" later" fi if [ "$LIVE_NODES" = "" ]; then info "no other nodes live" return 1 fi return 0 } pssh_fun() { pssh $PSSH_QUIET_OPTION -i -H "$LIVE_NODES" -x "$SSH_OPTS" -- "$@" } pscp_fun() { PSCP_DEST="$1" shift pscp $PSSH_QUIET_OPTION -H "$LIVE_NODES" -x "-pr" -x "$SSH_OPTS" -- "$@" "$PSCP_DEST" } pdsh_fun() { PDSH_NODES=$(echo "$LIVE_NODES" | tr '[:space:]' ',') export PDSH_SSH_ARGS_APPEND="$SSH_OPTS" pdsh -w "$PDSH_NODES" -- "$@" } pdcp_fun() { PDCP_DEST="$1" shift PDCP_NODES=$(echo "$LIVE_NODES" | tr '[:space:]' ',') export PDSH_SSH_ARGS_APPEND="$SSH_OPTS" pdcp -pr -w "$PDCP_NODES" -- "$@" "$PDCP_DEST" } ssh_fun() { for SSH_NODE in $LIVE_NODES; do ssh $SSH_OPTS "$SSH_NODE" -- "$@" || return done } scp_fun() { SCP_DEST="$1" shift for SCP_NODE in $LIVE_NODES; do scp -pqr $SSH_OPTS "$@" "$SCP_NODE:$SCP_DEST" || return done } # TODO: this procedure should be replaced with csync2 # provided that csync2 has already been configured sync_files() { get_live_peers || return if [ "$cmd" != "delete" ]; then info "syncing $LRM_CIBSECRETS to $(echo "$LIVE_NODES" | tr '\n' ' ') ..." else info "deleting $LRM_CIBSECRETS from $(echo "$LIVE_NODES" | tr '\n' ' ') ..." fi $rsh rm -rf "$LRM_CIBSECRETS" && $rsh mkdir -p "$(dirname "$LRM_CIBSECRETS")" && $rcp_to_from "$(dirname "$LRM_CIBSECRETS")" "$LRM_CIBSECRETS" } sync_one() { SO_FILE="$1" get_live_peers || return if [ "$cmd" != "delete" ]; then info "syncing $SO_FILE to $(echo "$LIVE_NODES" | tr '\n' ' ') ..." else info "deleting $SO_FILE from $(echo "$LIVE_NODES" | tr '\n' ' ') ..." fi $rsh mkdir -p "$(dirname "$SO_FILE")" && if [ -f "$SO_FILE" ]; then $rcp_to_from "$(dirname "$SO_FILE")" "$SO_FILE" "${SO_FILE}.sign" else $rsh rm -f "$SO_FILE" "${SO_FILE}.sign" fi } is_secret() { # assume that the secret is in the CIB if we cannot talk to cib [ "$NO_CRM" ] || test "$1" = "$MAGIC" } check_cib_rsc() { CCR_OUT="$($NO_CRM crm_resource -r "$1" -W 2>&1)" || fatal $CRM_EX_NOSUCH "$CCR_OUT" } get_cib_param() { GCP_RSC="$1" GCP_PARAM="$2" $NO_CRM crm_resource -r "$GCP_RSC" -g "$GCP_PARAM" 2>/dev/null } set_cib_param() { SET_RSC="$1" SET_PARAM="$2" SET_VAL="$3" $NO_CRM crm_resource -r "$SET_RSC" -p "$SET_PARAM" -v "$SET_VAL" 2>/dev/null } remove_cib_param() { RM_RSC="$1" RM_PARAM="$2" $NO_CRM crm_resource -r "$RM_RSC" -d "$RM_PARAM" 2>/dev/null } localfiles() { LF_CMD="$1" LF_RSC="$2" LF_PARAM="$3" LF_VALUE="$4" LF_FILE="$LRM_CIBSECRETS/$LF_RSC/$LF_PARAM" case "$LF_CMD" in get) cat "$LF_FILE" 2>/dev/null true ;; getsum) cat "${LF_FILE}.sign" 2>/dev/null true ;; set) LF_SUM="$(printf %s "$LF_VALUE" | md5sum)" || fatal $CRM_EX_ERROR "md5sum failed to produce hash for resource $LF_RSC parameter $LF_PARAM" LF_SUM="$(echo "$LF_SUM" | awk '{print $1}')" mkdir -p "$(dirname "$LF_FILE")" && echo "$LF_VALUE" > "$LF_FILE" && echo "$LF_SUM" > "${LF_FILE}.sign" && sync_one "$LF_FILE" ;; remove) rm -f "$LF_FILE" "${LF_FILE}.sign" sync_one "$LF_FILE" ;; esac } cibsecret_set() { CS_VALUE="$1" if [ "$2" -ne 4 ]; then read -p "Enter value: " CS_VALUE fi check_cib_rsc "$rsc" CIBSET_CURRENT="$(get_cib_param "$rsc" "$param")" [ -z "$NO_CRM" ] && [ ! -z "$CIBSET_CURRENT" ] && [ "$CIBSET_CURRENT" != "$MAGIC" ] && [ "$CIBSET_CURRENT" != "$CS_VALUE" ] && fatal $CRM_EX_CONFIG "CIB value <$CIBSET_CURRENT> different for $rsc parameter $param; please delete it first" localfiles set "$rsc" "$param" "$CS_VALUE" && set_cib_param "$rsc" "$param" "$MAGIC" } cibsecret_check() { check_cib_rsc "$rsc" is_secret "$(get_cib_param "$rsc" "$param")" || fatal $CRM_EX_CONFIG "resource $rsc parameter $param not set as secret, nothing to check" CSC_LOCAL_SUM="$(localfiles getsum "$rsc" "$param")" [ "$CSC_LOCAL_SUM" ] || fatal $CRM_EX_OSFILE "no MD5 hash for resource $rsc parameter $param" CSC_LOCAL_VALUE="$(localfiles get "$rsc" "$param")" CSC_CALC_SUM="$(printf "%s" "$CSC_LOCAL_VALUE" | md5sum | awk '{print $1}')" [ "$CSC_CALC_SUM" = "$CSC_LOCAL_SUM" ] || fatal $CRM_EX_DIGEST "MD5 hash mismatch for resource $rsc parameter $param" } cibsecret_get() { cibsecret_check localfiles get "$rsc" "$param" } cibsecret_delete() { check_cib_rsc "$rsc" localfiles remove "$rsc" "$param" && remove_cib_param "$rsc" "$param" } cibsecret_stash() { [ "$NO_CRM" ] && fatal $CRM_EX_USAGE "no access to Pacemaker, stash not supported" check_cib_rsc "$rsc" CIBSTASH_CURRENT="$(get_cib_param "$rsc" "$param")" [ "$CIBSTASH_CURRENT" = "" ] && fatal $CRM_EX_NOSUCH "nothing to stash for resource $rsc parameter $param" is_secret "$CIBSTASH_CURRENT" && fatal $CRM_EX_EXISTS "resource $rsc parameter $param already set as secret, nothing to stash" cibsecret_set "$CIBSTASH_CURRENT" 4 } cibsecret_unstash() { [ "$NO_CRM" ] && fatal $CRM_EX_USAGE "no access to Pacemaker, unstash not supported" UNSTASH_LOCAL_VALUE="$(localfiles get "$rsc" "$param")" [ "$UNSTASH_LOCAL_VALUE" = "" ] && fatal $CRM_EX_NOSUCH "nothing to unstash for resource $rsc parameter $param" check_cib_rsc "$rsc" is_secret "$(get_cib_param "$rsc" "$param")" || warn "resource $rsc parameter $param not set as secret, but we have local value so proceeding anyway" localfiles remove "$rsc" "$param" && set_cib_param "$rsc" "$param" "$UNSTASH_LOCAL_VALUE" } cibsecret_sync() { sync_files } # Grab arguments if [ "$1" = "-C" ]; then NO_CRM=':' shift fi cmd="$1" rsc="$2" param="$3" value="$4" # Ensure we have everything we need check_usage "$cmd" $# check_env umask 0077 # for dirname() function (@TODO why are we replacing dirname?) . "@PCMK_OCF_ROOT@/lib/heartbeat/ocf-shellfuncs" "cibsecret_$cmd" "$value" $# rc=$? if [ $rc -ne 0 ]; then fatal $CRM_EX_ERROR "$cmd(): failed with rc: $rc" fi -# vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/tools/cluster-clean.in b/tools/cluster-clean.in index 4b75edf279..91a629479a 100755 --- a/tools/cluster-clean.in +++ b/tools/cluster-clean.in @@ -1,99 +1,101 @@ #!@BASH_PATH@ # # Copyright 2011-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # hosts= group= kill=0 while true; do case "$1" in -x) set -x; shift;; -w) for h in $2; do hosts="$hosts -w $h"; done shift; shift;; -g) group=$2; shift; shift;; --kill) kill=1; shift;; --kill-only) kill=2; shift;; "") break;; *) echo "unknown option: $1"; exit 1;; esac done if [ x"$group" = x -a x"$hosts" = x ]; then group=$CTS_GROUP fi if [ x"$hosts" != x ]; then echo `date` ": Cleaning up hosts:" target=$hosts elif [ x"$group" != x ]; then echo `date` ": Cleaning up group: $group" target="-g $group" else echo "You didn't specify any nodes to clean up" exit 1 fi cluster-helper --list bullet $target if [ $kill != 0 ]; then echo "Cleaning processes" # Bah. Force systemd to actually look at the process and realize it's dead cluster-helper $target -- "service corosync stop" &> /dev/null & cluster-helper $target -- "service pacemaker stop" &> /dev/null & cluster-helper $target -- "killall -q -9 corosync pacemakerd pacemaker-attrd pacemaker-based pacemaker-controld pacemaker-execd pacemaker-fenced pacemaker-remoted pacemaker-schedulerd dlm_controld gfs_controld" &> /dev/null cluster-helper $target -- 'kill -9 `pidof valgrind`' &> /dev/null if [ $kill == 2 ]; then exit 0 fi fi #logrotate -f $cluster_rotate echo "Cleaning files" log_files="" log_files="$log_files 'messages*'" log_files="$log_files 'localmessages*'" log_files="$log_files 'cluster*.log'" log_files="$log_files 'corosync.log*'" log_files="$log_files 'pacemaker.log*'" log_files="$log_files '*.journal'" log_files="$log_files '*.journal~'" log_files="$log_files 'secure-*'" state_files="" state_files="$state_files 'cib.xml*'" state_files="$state_files 'valgrind-*'" state_files="$state_files 'cib-*'" state_files="$state_files 'core.*'" state_files="$state_files 'cts.*'" state_files="$state_files 'pe*.bz2'" state_files="$state_files 'fdata-*'" for f in $log_files; do cluster-helper $target -- "find /var/log -name '$f' -exec rm -f \{\} \;" done for f in $state_files; do cluster-helper $target -- "find /var/lib -name '$f' -exec rm -f \{\} \;" done cluster-helper $target -- "find /dev/shm -name 'qb-*' -exec rm -f \{\} \;" cluster-helper $target -- "find @CRM_BLACKBOX_DIR@ -name '*-*' -exec rm -f \{\} \;" cluster-helper $target -- "find /tmp -name '*.valgrind' -exec rm -f \{\} \;" cluster-helper $target -- 'service rsyslog restart' > /dev/null 2>&1 cluster-helper $target -- 'systemctl restart systemd-journald.socket' > /dev/null 2>&1 cluster-helper $target -- logger -i -p daemon.info __clean_logs__ #touch $cluster_log echo `date` ": Clean complete" + +# vim: set filetype=sh: diff --git a/tools/cluster-helper.in b/tools/cluster-helper.in index 5bfe89046b..8cee1b4cb1 100755 --- a/tools/cluster-helper.in +++ b/tools/cluster-helper.in @@ -1,201 +1,203 @@ #!@BASH_PATH@ # # Copyright 2011-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # hosts= group=$cluster_name user=root pdsh=`which pdsh 2>/dev/null` ssh=`which qarsh 2>/dev/null` scp=`which qacp 2>/dev/null` command=list format=oneline replace="{}" if [ x$ssh = "x" ]; then ssh=ssh scp=scp fi function helptext() { echo "cluster-helper - A tool for running commands on multiple hosts" echo "" echo "Attempt to use pdsh, qarsh, or ssh (in that order) to execute commands" echo "on multiple hosts" echo "" echo "DSH groups can be configured and specified with -g instead of listing" echo "the individual hosts every time" echo "" echo "Usage: cluster-helper [options] [command]" echo "" echo "Options:" echo "--ssh Force the use of ssh instead of qarsh even if it available" echo "-g, --group Specify the group to operate on/with" echo "-w, --host Specify a host to operate on/with. May be specified multiple times" echo "-f, --format Specifiy the output format When listing hosts or group contents" echo " Allowed values: [oneline], long, short, pdsh, bullet" echo "" echo "" echo "Commands:" echo "--list format List the contents of a group in the specified format" echo "--add name Add supplied (-w) hosts to the named group" echo "--create name Create the named group with the supplied (-w) hosts" echo "--run, -- Treat all subsequent arguments as a command to perform on" echo " the specified command on the hosts or group" echo "--xargs Run the supplied command having replaced any occurrences" echo " of {} with the node name" echo "" echo "--copy file(s) host:file Pass subsequent arguments to scp or qacp" echo " Any occurrences of {} are replaced with the node name" echo "--key Install an ssh key" echo "" exit $1 } while true ; do case "$1" in --help|-h|-\?) helptext 0;; -x) set -x; shift;; --ssh) ssh="ssh"; scp="scp"; pdsh=""; shift;; -g|--group) group="$2"; shift; shift;; -w|--host) for h in $2; do hosts="$hosts $h"; done shift; shift;; -f|--format) format=$2; shift; shift;; -I) replace=$2; shift; shift;; --list|list) format=$2; command=list; shift; shift;; --add|add) command=group-add; shift;; --create|create) group="$2"; command=group-create; shift; shift;; --run|run) command=run; shift;; --copy|copy) command=copy; shift; break ;; --key|key) command=key; shift; break ;; --xargs) command=xargs; shift; break ;; --) command=run; shift; break ;; "") break;; *) helptext 1;; esac done if [ x"$group" = x -a x"$hosts" = x ]; then group=$CTS_GROUP fi function expand() { fmt=$1 if [ x$group != x -a -f ~/.dsh/group/$group ]; then hosts=`cat ~/.dsh/group/$group` elif [ x$group != x ]; then echo "Unknown group: $group" >&2 exit 1 fi if [ "x$hosts" != x -a $fmt = oneline ]; then echo $hosts elif [ "x$hosts" != x -a $fmt = short ]; then ( for h in $hosts; do echo $h | sed 's:\..*::' done ) | tr '\n' ' ' echo "" elif [ "x$hosts" != x -a $fmt = pdsh ]; then ( for h in $hosts; do echo "-w $h" done ) | tr '\n' ' ' echo "" elif [ "x$hosts" != x -a $fmt = long ]; then for h in $hosts; do echo $h done elif [ "x$hosts" != x -a $fmt = bullet ]; then for h in $hosts; do echo " * $h" done elif [ "x$hosts" != x ]; then echo "Unknown format: $fmt" >&2 fi } if [ $command = list ]; then expand $format elif [ $command = key ]; then hosts=`expand oneline` for h in $hosts; do ssh-copy-id root@$h done elif [ $command = group-create ]; then f=`mktemp` mkdir -p ~/.dsh/group if [ -f ~/.dsh/group/$group ]; then echo "Overwriting existing group $group" fi for h in $hosts; do echo $h >> $f done echo "Creating group $group in ~/.dsh/group" sort -u $f > ~/.dsh/group/$group rm -f $f elif [ $command = group-add ]; then if [ x$group = x ]; then echo "Please specify a group to append to" exit 1 fi f=`mktemp` mkdir -p ~/.dsh/group if [ -f ~/.dsh/group/$group ]; then cat ~/.dsh/group/$group > $f fi for h in $hosts; do echo $h >> $f done echo "Appending hosts to group $group in ~/.dsh/group" sort -u $f > ~/.dsh/group/$group rm -f $f elif [ $command = run ]; then if [ x$pdsh != x ]; then hosts=`expand pdsh` $pdsh -l $user $hosts -- $* else hosts=`expand oneline` for n in $hosts; do $ssh -l $user $n -- $* < /dev/null done if [ x"$hosts" = x ]; then echo "No hosts specified" fi fi elif [ $command = copy ]; then hosts=`expand oneline` for n in $hosts; do $scp `echo $* | sed 's@'$replace'@'$n'@'` done elif [ $command = xargs ]; then hosts=`expand oneline` for n in $hosts; do eval `echo $* | sed 's@'$replace'@'$n'@'` done fi + +# vim: set filetype=sh: diff --git a/tools/crm_failcount.in b/tools/crm_failcount.in index 85ad18742f..b8a6b3199f 100755 --- a/tools/crm_failcount.in +++ b/tools/crm_failcount.in @@ -1,293 +1,295 @@ #!@BASH_PATH@ # # Copyright 2009-2018 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # USAGE_TEXT="Usage: crm_failcount [] Common options: --help Display this text, then exit --version Display version information, then exit -V, --verbose Specify multiple times to increase debug output -q, --quiet Print only the value (if querying) Commands: -G, --query Query the current value of the resource's fail count -D, --delete Delete resource's recorded failures Additional Options: -r, --resource=value Name of the resource to use (required) -n, --operation=value Name of operation to use (instead of all operations) -I, --interval=value If operation is specified, its interval -N, --node=value Use failcount on named node (instead of local node)" HELP_TEXT="crm_failcount - Query or delete resource fail counts $USAGE_TEXT" # These constants must track crm_exit_t values CRM_EX_OK=0 CRM_EX_USAGE=64 CRM_EX_NOSUCH=105 exit_usage() { if [ $# -gt 0 ]; then echo "error:" "$@" >&2 fi echo echo "$USAGE_TEXT" exit $CRM_EX_USAGE } warn() { echo "warning:" "$@" >&2 } interval_re() { echo "^[[:blank:]]*([0-9]+)[[:blank:]]*(${1})[[:blank:]]*$" } # This function should follow crm_get_interval() as closely as possible parse_interval() { INT_S="$1" INT_8601RE="^P(([0-9]+)Y)?(([0-9]+)M)?(([0-9]+)D)?T?(([0-9]+)H)?(([0-9]+)M)?(([0-9]+)S)?$" if [[ $INT_S =~ $(interval_re "") ]]; then echo $(( ${BASH_REMATCH[1]} * 1000 )) elif [[ $INT_S =~ $(interval_re "s|sec") ]]; then echo $(( ${BASH_REMATCH[1]} * 1000 )) elif [[ $INT_S =~ $(interval_re "ms|msec") ]]; then echo "${BASH_REMATCH[1]}" elif [[ $INT_S =~ $(interval_re "m|min") ]]; then echo $(( ${BASH_REMATCH[1]} * 60000 )) elif [[ $INT_S =~ $(interval_re "h|hr") ]]; then echo $(( ${BASH_REMATCH[1]} * 3600000 )) elif [[ $INT_S =~ $(interval_re "us|usec") ]]; then echo $(( ${BASH_REMATCH[1]} / 1000 )) elif [[ $INT_S =~ ^P([0-9]+)W$ ]]; then echo $(( ${BASH_REMATCH[1]} * 604800000 )) elif [[ $INT_S =~ $INT_8601RE ]]; then echo $(( ( ${BASH_REMATCH[2]:-0} * 31536000000 ) \ + ( ${BASH_REMATCH[4]:-0} * 2592000000 ) \ + ( ${BASH_REMATCH[6]:-0} * 86400000 ) \ + ( ${BASH_REMATCH[8]:-0} * 3600000 ) \ + ( ${BASH_REMATCH[10]:-0} * 60000 ) \ + ( ${BASH_REMATCH[12]:-0} * 1000 ) )) else warn "Unrecognized interval, using 0" echo "0" fi } query_single_attr() { QSR_TARGET="$1" QSR_ATTR="$2" crm_attribute $VERBOSE --quiet --query -t status -d 0 \ -N "$QSR_TARGET" -n "$QSR_ATTR" } query_attr_sum() { QAS_TARGET="$1" QAS_PREFIX="$2" # Build xpath to match all transient node attributes with prefix QAS_XPATH="/cib/status/node_state[@uname='${QAS_TARGET}']" QAS_XPATH="${QAS_XPATH}/transient_attributes/instance_attributes" QAS_XPATH="${QAS_XPATH}/nvpair[starts-with(@name,'$QAS_PREFIX')]" # Query attributes that match xpath # @TODO We ignore stderr because we don't want "no results" to look # like an error, but that also makes $VERBOSE pointless. QAS_ALL=$(cibadmin --query --xpath="$QAS_XPATH" 2>/dev/null) QAS_EX=$? # "No results" is not an error if [ $QAS_EX -ne $CRM_EX_OK ] && [ $QAS_EX -ne $CRM_EX_NOSUCH ]; then echo "error: could not query CIB for fail counts" >&2 exit $QAS_EX fi # Extract the attribute values (one per line) from the output QAS_VALUE=$(echo "$QAS_ALL" | sed -n -e \ 's/.*.*/\1/p') # Sum the values QAS_SUM=0 for i in 0 $QAS_VALUE; do if [ "$i" = "INFINITY" ]; then QAS_SUM="INFINITY" break else QAS_SUM=$(($QAS_SUM + $i)) fi done if [ "$QAS_SUM" = "INFINITY" ]; then echo $QAS_SUM elif [ "$QAS_SUM" -ge 1000000 ]; then echo "INFINITY" else echo $QAS_SUM fi } query_failcount() { QF_TARGET="$1" QF_RESOURCE="$2" QF_OPERATION="$3" QF_INTERVAL="$4" QF_ATTR_RSC="fail-count-${QF_RESOURCE}" if [ -n "$QF_OPERATION" ]; then QF_ATTR_DISPLAY="${QF_ATTR_RSC}#${QF_OPERATION}_${QF_INTERVAL}" QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_DISPLAY") else QF_ATTR_DISPLAY="$QF_ATTR_RSC" QF_COUNT=$(query_attr_sum "$QF_TARGET" "${QF_ATTR_RSC}#") fi # @COMPAT attributes set < 1.1.17: # If we didn't find any per-operation failcount, # check whether there is a legacy per-resource failcount. if [ "$QF_COUNT" = "0" ]; then QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_RSC") if [ "$QF_COUNT" != "0" ]; then QF_ATTR_DISPLAY="$QF_ATTR_RSC" fi fi # Echo result (comparable to crm_attribute, for backward compatibility) if [ -n "$QUIET" ]; then echo $QF_COUNT else echo "scope=status name=$QF_ATTR_DISPLAY value=$QF_COUNT" fi } clear_failcount() { CF_TARGET="$1" CF_RESOURCE="$2" CF_OPERATION="$3" CF_INTERVAL="$4" if [ -n "$CF_OPERATION" ]; then CF_OPERATION="-n $CF_OPERATION -I ${CF_INTERVAL}ms" fi crm_resource $QUIET $VERBOSE --cleanup \ -N "$CF_TARGET" -r "$CF_RESOURCE" $CF_OPERATION } QUIET="" VERBOSE="" command="" resource="" operation="" interval="0" target=$(crm_node -n 2>/dev/null) SHORTOPTS="qDGQVN:U:v:i:l:r:n:I:" LONGOPTS_COMMON="help,version,verbose,quiet" LONGOPTS_COMMANDS="query,delete" LONGOPTS_OTHER="resource:,node:,operation:,interval:" LONGOPTS_COMPAT="delete-attr,get-value,resource-id:,uname:,lifetime:,attr-value:,attr-id:" LONGOPTS="$LONGOPTS_COMMON,$LONGOPTS_COMMANDS,$LONGOPTS_OTHER,$LONGOPTS_COMPAT" TEMP=$(@GETOPT_PATH@ -o $SHORTOPTS --long $LONGOPTS -n crm_failcount -- "$@") if [ $? -ne 0 ]; then exit_usage fi eval set -- "$TEMP" # Quotes around $TEMP are essential while true ; do case "$1" in --help) echo "$HELP_TEXT" exit $CRM_EX_OK ;; --version) crm_attribute --version exit $? ;; -q|-Q|--quiet) QUIET="--quiet" shift ;; -V|--verbose) VERBOSE="$VERBOSE $1" shift ;; -G|--query|--get-value) command="--query" shift ;; -D|--delete|--delete-attr) command="--delete" shift ;; -r|--resource|--resource-id) resource="$2" shift 2 ;; -n|--operation) operation="$2" shift 2 ;; -I|--interval) interval="$2" shift 2 ;; -N|--node|-U|--uname) target="$2" shift 2 ;; -v|--attr-value) if [ "$2" = "0" ]; then command="--delete" else warn "ignoring deprecated option '$1' with nonzero value" fi shift 2 ;; -i|--attr-id|-l|--lifetime) warn "ignoring deprecated option '$1'" shift 2 ;; --) shift break ;; *) exit_usage "unknown option '$1'" ;; esac done [ -n "$command" ] || exit_usage "must specify a command" [ -n "$resource" ] || exit_usage "resource name required" [ -n "$target" ] || exit_usage "node name required" interval=$(parse_interval $interval) if [ "$command" = "--query" ]; then query_failcount "$target" "$resource" "$operation" "$interval" else clear_failcount "$target" "$resource" "$operation" "$interval" fi + +# vim: set filetype=sh: diff --git a/tools/crm_master.in b/tools/crm_master.in index a4769f4473..5de941c44c 100755 --- a/tools/crm_master.in +++ b/tools/crm_master.in @@ -1,92 +1,94 @@ #!@BASH_PATH@ # # Copyright 2009-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # USAGE_TEXT="Usage: crm_master [] This command is deprecated. Use crm_attribute with the --promotion option instead." exit_usage() { if [ $# -gt 0 ]; then echo "error:" "$@" >&2 fi echo echo "$USAGE_TEXT" exit 1 } SHORTOPTS_DEPRECATED="U:Q" LONGOPTS_DEPRECATED="uname:,get-value,delete-attr,attr-value:,attr-id:" SHORTOPTS="VqGv:DN:l:i:r:" LONGOPTS="help,version,verbose,quiet,query,update:,delete,node:,lifetime:,id:,resource:" TEMP=$(@GETOPT_PATH@ -o ${SHORTOPTS}${SHORTOPTS_DEPRECATED} \ --long ${LONGOPTS},${LONGOPTS_DEPRECATED} \ -n crm_master -- "$@") if [ $? -ne 0 ]; then exit_usage fi eval set -- "$TEMP" # Quotes around $TEMP are essential # Explicitly set the (usual default) lifetime, so the attribute gets set as a # node attribute and not a cluster property. options="--lifetime forever" while true ; do case "$1" in --help) echo "crm_master - Query, update, or delete a resource's promotion score" echo echo "$USAGE_TEXT" exit 0 ;; --version) crm_attribute --version exit 0 ;; --verbose|-V|--quiet|-q|--query|-G|--delete|-D) options="$options $1" shift ;; --update|-v|--node|-N|--lifetime|-l|--id|-i) options="$options $1 $2" shift shift ;; -r|--resource) OCF_RESOURCE_INSTANCE=$2; shift shift ;; --get-value|--delete-attr|-Q) # deprecated options="$options $1" shift ;; --uname|-U|--attr-value|--attr-id) # deprecated options="$options $1 $2" shift shift ;; --) shift break ;; *) exit_usage "unknown option '$1'" ;; esac done if [ -z "$OCF_RESOURCE_INSTANCE" ]; then exit_usage "No resource specified" fi crm_attribute -n master-$OCF_RESOURCE_INSTANCE $options + +# vim: set filetype=sh: diff --git a/tools/crm_report.in b/tools/crm_report.in index 91a8d70726..1df25c044c 100644 --- a/tools/crm_report.in +++ b/tools/crm_report.in @@ -1,481 +1,481 @@ #!/bin/sh # # Copyright 2010-2019 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # TEMP=`@GETOPT_PATH@ \ -o hv?xl:f:t:n:T:L:p:c:dSCu:D:MVse: \ --long help,corosync,cts:,cts-log:,dest:,node:,nodes:,from:,to:,sos-mode,logfile:,as-directory,single-node,cluster:,user:,max-depth:,version,features,rsh: \ -n 'crm_report' -- "$@"` # The quotes around $TEMP are essential eval set -- "$TEMP" progname=$(basename "$0") rsh="ssh -T" tests="" nodes="" compress=1 cluster="any" ssh_user="root" search_logs=1 sos_mode=0 report_data=`dirname $0` maxdepth=5 extra_logs="" sanitize_patterns="passw.*" log_patterns="CRIT: ERROR:" usage() { cat< "$l_base/$HALOG_F" fi for node in $nodes; do cat <$l_base/.env LABEL="$label" REPORT_HOME="$r_base" REPORT_MASTER="$host" REPORT_TARGET="$node" LOG_START=$start LOG_END=$end REMOVE=1 SANITIZE="$sanitize_patterns" CLUSTER=$cluster LOG_PATTERNS="$log_patterns" EXTRA_LOGS="$extra_logs" SEARCH_LOGS=$search_logs SOS_MODE=$sos_mode verbose=$verbose maxdepth=$maxdepth EOF if [ $host = $node ]; then cat <>$l_base/.env REPORT_HOME="$l_base" EOF cat $l_base/.env $report_data/report.common $report_data/report.collector > $l_base/collector bash $l_base/collector else cat $l_base/.env $report_data/report.common $report_data/report.collector \ | $rsh -l $ssh_user $node -- "mkdir -p $r_base; cat > $r_base/collector; bash $r_base/collector" | (cd $l_base && tar mxf -) fi done analyze $l_base > $l_base/$ANALYSIS_F if [ -f $l_base/$HALOG_F ]; then node_events $l_base/$HALOG_F > $l_base/$EVENTS_F fi for node in $nodes; do cat $l_base/$node/$ANALYSIS_F >> $l_base/$ANALYSIS_F if [ -s $l_base/$node/$EVENTS_F ]; then cat $l_base/$node/$EVENTS_F >> $l_base/$EVENTS_F elif [ -s $l_base/$HALOG_F ]; then awk "\$4==\"$nodes\"" $l_base/$EVENTS_F >> $l_base/$n/$EVENTS_F fi done log " " if [ $compress = 1 ]; then fname=`shrink $l_base` rm -rf $l_base log "Collected results are available in $fname" log " " log "Please create a bug entry at" log " @BUG_URL@" log "Include a description of your problem and attach this tarball" log " " log "Thank you for taking time to create this report." else log "Collected results are available in $l_base" fi log " " } # # check if files have same content in the cluster # cibdiff() { d1=$(dirname $1) d2=$(dirname $2) if [ -f "$d1/RUNNING" ] && [ ! -f "$d2/RUNNING" ]; then DIFF_OK=0 elif [ -f "$d1/STOPPED" ] && [ ! -f "$d2/STOPPED" ]; then DIFF_OK=0 else DIFF_OK=1 fi if [ $DIFF_OK -eq 1 ]; then if which crm_diff > /dev/null 2>&1; then crm_diff -c -n $1 -o $2 else info "crm_diff(8) not found, cannot diff CIBs" fi else echo "can't compare cibs from running and stopped systems" fi } diffcheck() { [ -f "$1" ] || { echo "$1 does not exist" return 1 } [ -f "$2" ] || { echo "$2 does not exist" return 1 } case $(basename "$1") in $CIB_F) cibdiff $1 $2 ;; *) diff -u $1 $2 ;; esac } # # remove duplicates if files are same, make links instead # consolidate() { for n in $nodes; do if [ -f $1/$2 ]; then rm $1/$n/$2 else mv $1/$n/$2 $1 fi ln -s ../$2 $1/$n done } analyze_one() { rc=0 node0="" for n in $nodes; do if [ "$node0" ]; then diffcheck $1/$node0/$2 $1/$n/$2 rc=$(($rc+$?)) else node0=$n fi done return $rc } analyze() { flist="$MEMBERSHIP_F $CIB_F $CRM_MON_F $SYSINFO_F" for f in $flist; do printf "Diff $f... " ls $1/*/$f >/dev/null 2>&1 || { echo "no $1/*/$f :/" continue } if analyze_one $1 $f; then echo "OK" [ "$f" != $CIB_F ] && consolidate $1 $f else echo "" fi done } do_cts() { test_sets=`echo $tests | tr ',' ' '` for test_set in $test_sets; do start_time=0 start_test=`echo $test_set | tr '-' ' ' | awk '{print $1}'` end_time=0 end_test=`echo $test_set | tr '-' ' ' | awk '{print $2}'` if [ x$end_test = x ]; then msg="Extracting test $start_test" label="CTS-$start_test-`date +"%b-%d-%Y"`" end_test=`expr $start_test + 1` else msg="Extracting tests $start_test to $end_test" label="CTS-$start_test-$end_test-`date +"%b-%d-%Y"`" end_test=`expr $end_test + 1` fi if [ $start_test = 0 ]; then start_pat="BEGINNING [0-9].* TESTS" else start_pat="Running test.*\[ *$start_test\]" fi if [ x$ctslog = x ]; then ctslog=`findmsg 1 "$start_pat"` if [ x$ctslog = x ]; then fatal "No CTS control file detected" else log "Using CTS control file: $ctslog" fi fi line=`grep -n "$start_pat" $ctslog | tail -1 | sed 's/:.*//'` if [ ! -z "$line" ]; then start_time=`linetime $ctslog $line` fi line=`grep -n "Running test.*\[ *$end_test\]" $ctslog | tail -1 | sed 's/:.*//'` if [ ! -z "$line" ]; then end_time=`linetime $ctslog $line` fi if [ -z "$nodes" ]; then nodes=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u | tr '\\n' ' '` log "Calculated node list: $nodes" fi if [ $end_time -lt $start_time ]; then debug "Test didn't complete, grabbing everything up to now" end_time=`date +%s` fi if [ $start_time != 0 ];then log "$msg (`time2str $start_time` to `time2str $end_time`)" collect_data $label $start_time $end_time $ctslog else fatal "$msg failed: not found" fi done } node_names_from_xml() { awk ' /uname/ { for( i=1; i<=NF; i++ ) if( $i~/^uname=/ ) { sub("uname=.","",$i); sub("\".*","",$i); print $i; next; } } ' | tr '\n' ' ' } getnodes() { cluster="$1" # 1. Live (cluster nodes or Pacemaker Remote nodes) # TODO: This will not detect Pacemaker Remote nodes unless they # have ever had a permanent node attribute set, because it only # searches the nodes section. It should also search the config # for resources that create Pacemaker Remote nodes. cib_nodes=$(cibadmin -Q -o nodes 2>/dev/null) if [ $? -eq 0 ]; then debug "Querying CIB for nodes" echo "$cib_nodes" | node_names_from_xml return fi # 2. Saved if [ -f "@CRM_CONFIG_DIR@/cib.xml" ]; then debug "Querying on-disk CIB for nodes" grep "node " "@CRM_CONFIG_DIR@/cib.xml" | node_names_from_xml return fi # 3. logs # TODO: Look for something like crm_update_peer } if [ $compress -eq 1 ]; then require_tar fi if [ "x$tests" != "x" ]; then do_cts elif [ "x$start_time" != "x" ]; then masterlog="" if [ -z "$sanitize_patterns" ]; then log "WARNING: The tarball produced by this program may contain" log " sensitive information such as passwords." log "" log "We will attempt to remove such information if you use the" log "-p option. For example: -p \"pass.*\" -p \"user.*\"" log "" log "However, doing this may reduce the ability for the recipients" log "to diagnose issues and generally provide assistance." log "" log "IT IS YOUR RESPONSIBILITY TO PROTECT SENSITIVE DATA FROM EXPOSURE" log "" fi # If user didn't specify a cluster stack, make a best guess if possible. if [ -z "$cluster" ] || [ "$cluster" = "any" ]; then cluster=$(get_cluster_type) fi # If user didn't specify node(s), make a best guess if possible. if [ -z "$nodes" ]; then nodes=`getnodes $cluster` if [ -n "$nodes" ]; then log "Calculated node list: $nodes" else fatal "Cannot determine nodes; specify --nodes or --single-node" fi fi if echo $nodes | grep -qs $host then debug "We are a cluster node" else debug "We are a log master" masterlog=`findmsg 1 "pacemaker-controld\\|CTS"` fi if [ -z $end_time ]; then end_time=`perl -e 'print time()'` fi label="pcmk-`date +"%a-%d-%b-%Y"`" log "Collecting data from $nodes (`time2str $start_time` to `time2str $end_time`)" collect_data $label $start_time $end_time $masterlog else fatal "Not sure what to do, no tests or time ranges to extract" fi -# vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/tools/crm_standby.in b/tools/crm_standby.in index 0911b9daec..1dba198459 100755 --- a/tools/crm_standby.in +++ b/tools/crm_standby.in @@ -1,158 +1,160 @@ #!@BASH_PATH@ # # Copyright 2009-2018 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # USAGE_TEXT="Usage: crm_standby [options] Common options: --help Display this text, then exit --version Display version information, then exit -V, --verbose Specify multiple times to increase debug output -q, --quiet Print only the standby status (if querying) Commands: -G, --query Query the current value of standby mode (on/off) -v, --update=VALUE Update the value of standby mode (on/off) -D, --delete Let standby mode use default value Additional Options: -N, --node=NODE Operate on the named node instead of the current one -l, --lifetime=VALUE Until when should the setting take effect (valid values: reboot, forever) -i, --id=VALUE (Advanced) XML ID used to identify standby attribute" HELP_TEXT="crm_standby - Query, enable, or disable standby mode for a node Nodes in standby mode may not host cluster resources. $USAGE_TEXT " exit_usage() { if [ $# -gt 0 ]; then echo "error:" "$@" >&2 fi echo echo "$USAGE_TEXT" exit 1 } op="" options="" lifetime=0 target="" SHORTOPTS_DEPRECATED="U:Q" LONGOPTS_DEPRECATED="uname:,get-value,delete-attr,attr-value:,attr-id:" SHORTOPTS="VqGv:DN:l:i:" LONGOPTS="help,version,verbose,quiet,query,update:,delete,node:,lifetime:,id:" TEMP=$(@GETOPT_PATH@ -o ${SHORTOPTS}${SHORTOPTS_DEPRECATED} \ --long ${LONGOPTS},${LONGOPTS_DEPRECATED} \ -n crm_standby -- "$@") if [ $? -ne 0 ]; then exit_usage fi eval set -- "$TEMP" # Quotes around $TEMP are essential while true ; do case "$1" in --help) echo "$HELP_TEXT" exit 0 ;; --version) crm_attribute --version exit 0 ;; -q|--quiet|-V|--verbose|-Q) options="$options $1" shift ;; -N|--node|-U|--uname) target="$2" shift shift ;; -G|--query|--get-value) options="$options --query" op=g shift ;; -v|--update|--attr-value) options="$options --update $2" op=u shift shift ;; -D|--delete|--delete-attr) options="$options --delete" op=d shift ;; -l|--lifetime) options="$options --lifetime $2" lifetime=1 shift shift ;; -i|--id|--attr-id) options="$options --id $2" shift shift ;; --) shift break ;; *) exit_usage "unknown option '$1'" ;; esac done # It's important to call cluster commands only after arguments are processed, # so --version and --help work without problems even if those commands don't. if [ "$target" = "" ]; then target=$(crm_node -n) fi options="-N $target -n standby $options" if [ x$op = x ]; then options="$options -G"; op=g fi # If the user didn't explicitly specify a lifetime ... if [ $lifetime -eq 0 ]; then case $op in g) # For query, report the forever entry if one exists, otherwise # report the reboot entry if one exists, otherwise report off. crm_attribute $options -l forever >/dev/null 2>&1 if [ $? -eq 0 ]; then options="$options -l forever" else options="$options -l reboot -d off" fi ;; u) # For update, default to updating the forever entry. options="$options -l forever" ;; d) # For delete, default to deleting both forever and reboot entries. crm_attribute $options -l forever crm_attribute $options -l reboot exit 0 ;; esac fi crm_attribute $options + +# vim: set filetype=sh: diff --git a/tools/pcmk_simtimes.in b/tools/pcmk_simtimes.in index c8b0af60f5..f30ded716a 100644 --- a/tools/pcmk_simtimes.in +++ b/tools/pcmk_simtimes.in @@ -1,159 +1,159 @@ #!@PYTHON@ """ Timing comparisons for crm_simulate profiling output """ __copyright__ = "Copyright 2019-2023 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import re import sys import errno import argparse import os # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.exitstatus import ExitStatus DESC = """Compare timings from crm_simulate profiling output""" BEFORE_HELP = """Output of "crm_simulate --profile cts/scheduler --repeat " from earlier Pacemaker build""" # line like: * Testing cts/scheduler/xml/1360.xml ... 0.07 secs PATTERN = r"""^\s*\*\s+Testing\s+.*/([^/]+)\.xml\s+\.+\s+([.0-9]+)\s+secs\s*$""" def parse_args(argv=sys.argv): """ Parse command-line arguments """ parser = argparse.ArgumentParser(description=DESC) parser.add_argument('-V', '--verbose', action='count', help='Increase verbosity') parser.add_argument('-p', '--threshold-percent', type=float, default=0, help="Don't show tests with less than this percentage difference in times") parser.add_argument('-s', '--threshold-seconds', type=float, default=0, help="Don't show tests with less than this seconds difference in times") parser.add_argument('-S', '--sort', choices=['test', 'before', 'after', 'diff', 'percent'], default='test', help="Sort results by this column") parser.add_argument('-r', '--reverse', action='store_true', help="Sort results in descending order") parser.add_argument('before_file', metavar='BEFORE', type=argparse.FileType('r'), help=BEFORE_HELP) parser.add_argument('after_file', metavar='AFTER', type=argparse.FileType('r'), help='Output of same command from later Pacemaker build') return parser.parse_args(argv[1:]) def extract_times(infile): """ Extract test names and times into hash table from file """ result = {} for line in infile: match = re.search(PATTERN, line) if match is not None: result[match.group(1)] = match.group(2) return result def compare_test(test, before, after, args): """ Compare one test's timings """ try: before_time = float(before[test]) except KeyError: if args.verbose > 0: print("No previous test " + test + " to compare") return None after_time = float(after[test]) time_diff = after_time - before_time time_diff_percent = (time_diff / before_time) * 100 if ((abs(time_diff) >= args.threshold_seconds) and (abs(time_diff_percent) >= args.threshold_percent)): return { 'test': test, 'before': before_time, 'after': after_time, 'diff': time_diff, 'percent': time_diff_percent } return None def sort_diff(result): """ Sort two test results by time difference """ global sort_field return result[sort_field] def print_results(results, sort_reverse): """ Output the comparison results """ if results == []: return # Sort and print test differences results.sort(reverse=sort_reverse, key=sort_diff) for result in results: print("%-40s %6.2fs vs %6.2fs (%+.2fs = %+6.2f%%)" % (result['test'], result['before'], result['after'], result['diff'], result['percent'])) # Print average differences diff_total = sum(d['diff'] for d in results) percent_total = sum(d['percent'] for d in results) nresults = len(results) print("\nAverages: %+.2fs %+6.2f%%" % ((diff_total / nresults), (percent_total / nresults))) if __name__ == "__main__": global sort_field try: args = parse_args() if args.verbose is None: args.verbose = 0 before = extract_times(args.before_file) after = extract_times(args.after_file) sort_field = args.sort # Build a list of test differences results = [] for test in after.keys(): result = compare_test(test, before, after, args) if result is not None: results = results + [ result ] print_results(results, sort_reverse=args.reverse) except KeyboardInterrupt: pass except IOError as e: if e.errno != errno.EPIPE: raise sys.exit(ExitStatus.OK) -# vim: set filetype=python expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=120: +# vim: set filetype=python: diff --git a/tools/report.collector.in b/tools/report.collector.in index dd7acf5d06..a26b93a942 100644 --- a/tools/report.collector.in +++ b/tools/report.collector.in @@ -1,885 +1,885 @@ # # Originally based on hb_report # Copyright 2007 Dejan Muhamedagic # Later changes copyright 2010-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # if echo $REPORT_HOME | grep -qs '^/' then debug "Using full path to working directory: $REPORT_HOME" else REPORT_HOME="$HOME/$REPORT_HOME" debug "Canonicalizing working directory path: $REPORT_HOME" fi detect_host # # find files newer than a and older than b # isnumber() { echo "$*" | grep -qs '^[0-9][0-9]*$' } touchfile() { t=`mktemp` && perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' && echo $t } find_files_clean() { [ -z "$from_stamp" ] || rm -f "$from_stamp" [ -z "$to_stamp" ] || rm -f "$to_stamp" from_stamp="" to_stamp="" } find_files() { dirs= from_time=$2 to_time=$3 for d in $1; do if [ -d $d ]; then dirs="$dirs $d" fi done if [ x"$dirs" = x ]; then return fi isnumber "$from_time" && [ "$from_time" -gt 0 ] || { warning "sorry, can't find files in [ $1 ] based on time if you don't supply time" return } trap find_files_clean 0 if ! from_stamp=`touchfile $from_time`; then warning "sorry, can't create temporary file for find_files" return fi findexp="-newer $from_stamp" if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then if ! to_stamp=`touchfile $to_time`; then warning "sorry, can't create temporary file for find_files" find_files_clean return fi findexp="$findexp ! -newer $to_stamp" fi find $dirs -type f $findexp find_files_clean trap "" 0 } # # check permissions of files/dirs # pl_checkperms() { perl -e ' # check permissions and ownership # uid and gid are numeric # everything must match exactly # no error checking! (file should exist, etc) ($filename, $perms, $in_uid, $in_gid) = @ARGV; ($mode,$uid,$gid) = (stat($filename))[2,4,5]; $p=sprintf("%04o", $mode & 07777); $p ne $perms and exit(1); $uid ne $in_uid and exit(1); $gid ne $in_gid and exit(1); ' $* } num_id() { getent $1 $2 | awk -F: '{print $3}' } chk_id() { [ "$2" ] && return 0 echo "$1: id not found" return 1 } check_perms() { while read type f p uid gid; do if [ ! -e "$f" ]; then echo "$f doesn't exist" continue elif [ ! -$type "$f" ]; then echo "$f has wrong type" continue fi n_uid=`num_id passwd $uid` chk_id "$uid" "$n_uid" || continue n_gid=`num_id group $gid` chk_id "$gid" "$n_gid" || continue pl_checkperms $f $p $n_uid $n_gid || { echo "wrong permissions or ownership for $f:" ls -ld $f } done } # # coredumps # findbinary() { random_binary=`which cat 2>/dev/null` # suppose we are lucky binary=`gdb $random_binary $1 < /dev/null 2>/dev/null | grep 'Core was generated' | awk '{print $5}' | sed "s/^.//;s/[.':]*$//"` if [ x = x"$binary" ]; then debug "Could not detect the program name for core $1 from the gdb output; will try with file(1)" binary=$(file $1 | awk '/from/{ for( i=1; i<=NF; i++ ) if( $i == "from" ) { print $(i+1) break } }') binary=`echo $binary | tr -d "'"` binary=$(echo $binary | tr -d '`') if [ "$binary" ]; then binary=`which $binary 2>/dev/null` fi fi if [ x = x"$binary" ]; then warning "Could not find the program path for core $1" return fi fullpath=`which $binary 2>/dev/null` if [ x = x"$fullpath" ]; then if [ -x $CRM_DAEMON_DIR/$binary ]; then echo $CRM_DAEMON_DIR/$binary debug "Found the program at $CRM_DAEMON_DIR/$binary for core $1" else warning "Could not find the program path for core $1" fi else echo $fullpath debug "Found the program at $fullpath for core $1" fi } getbt() { which gdb > /dev/null 2>&1 || { warning "Please install gdb to get backtraces" return } for corefile; do absbinpath=`findbinary $corefile` [ x = x"$absbinpath" ] && continue echo "====================== start backtrace ======================" ls -l $corefile # Summary first... gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt"} -ex quit \ $absbinpath $corefile 2>/dev/null echo "====================== start detail ======================" # Now the unreadable details... gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \ $absbinpath $corefile 2>/dev/null echo "======================= end backtrace =======================" done } dump_status_and_config() { crm_mon -1 2>&1 | grep -v '^Last upd' > $target/$CRM_MON_F cibadmin -Q 2>/dev/null > $target/${CIB_F}.live } getconfig() { cluster=$1; shift; target=$1; shift; for cf in $*; do if [ -e "$cf" ]; then cp -a "$cf" $target/ fi done if is_running pacemaker-controld; then dump_status_and_config crm_node -p > "$target/$MEMBERSHIP_F" 2>&1 echo "$host" > $target/RUNNING elif is_running pacemaker-remoted; then dump_status_and_config echo "$host" > $target/RUNNING # Pre-2.0.0 daemon name in case we're collecting on a mixed-version cluster elif is_running pacemaker_remoted; then dump_status_and_config echo "$host" > $target/RUNNING else echo "$host" > $target/STOPPED fi } get_readable_cib() { target="$1"; shift; if [ -f "$target/$CIB_F" ]; then crm_verify -V -x "$target/$CIB_F" >"$target/$CRM_VERIFY_F" 2>&1 if which crm >/dev/null 2>&1 ; then CIB_file="$target/$CIB_F" crm configure show >"$target/$CIB_TXT_F" 2>&1 elif which pcs >/dev/null 2>&1 ; then pcs config -f "$target/$CIB_F" >"$target/$CIB_TXT_F" 2>&1 fi fi } # # remove values of sensitive attributes # # this is not proper xml parsing, but it will work under the # circumstances sanitize_xml_attrs() { sed $( for patt in $SANITIZE; do echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/" done ) } sanitize_hacf() { awk ' $1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; } {print} ' } sanitize_one_clean() { [ -z "$tmp" ] || rm -f "$tmp" tmp="" [ -z "$ref" ] || rm -f "$ref" ref="" } sanitize() { file=$1 compress="" if [ -z "$SANITIZE" ]; then return fi echo $file | grep -qs 'gz$' && compress=gzip echo $file | grep -qs 'bz2$' && compress=bzip2 if [ "$compress" ]; then decompress="$compress -dc" else compress=cat decompress=cat fi trap sanitize_one_clean 0 tmp=`mktemp` ref=`mktemp` if [ -z "$tmp" -o -z "$ref" ]; then sanitize_one_clean fatal "cannot create temporary files" fi touch -r $file $ref # save the mtime if [ "`basename $file`" = ha.cf ]; then sanitize_hacf else $decompress | sanitize_xml_attrs | $compress fi < $file > $tmp mv $tmp $file # note: cleaning $tmp up is still needed even after it's renamed # because its temp directory is still there. touch -r $ref $file sanitize_one_clean trap "" 0 } # # get some system info # distro() { if which lsb_release >/dev/null 2>&1 then lsb_release -d | sed -e 's/^Description:\s*//' debug "Using lsb_release for distribution info" return fi relf=`ls /etc/debian_version 2>/dev/null` || relf=`ls /etc/slackware-version 2>/dev/null` || relf=`ls -d /etc/*-release 2>/dev/null` && { for f in $relf; do test -f $f && { echo "`ls $f` `cat $f`" debug "Found `echo $relf | tr '\n' ' '` distribution release file(s)" return } done } warning "No lsb_release, no /etc/*-release, no /etc/debian_version: no distro information" } pkg_ver() { if which dpkg >/dev/null 2>&1 ; then pkg_mgr="deb" elif which rpm >/dev/null 2>&1 ; then pkg_mgr="rpm" elif which pkg_info >/dev/null 2>&1 ; then pkg_mgr="pkg_info" elif which pkginfo >/dev/null 2>&1 ; then pkg_mgr="pkginfo" else warning "Unknown package manager" return fi debug "The package manager is: $pkg_mgr" echo "The package manager is: $pkg_mgr" echo "Installed packages:" case $pkg_mgr in deb) dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W | sort echo for pkg in $*; do if dpkg-query -W $pkg 2>/dev/null ; then debug "Verifying installation of: $pkg" echo "Verifying installation of: $pkg" debsums -s $pkg 2>/dev/null fi done ;; rpm) rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' | sort echo for pkg in $*; do if rpm -q $pkg >/dev/null 2>&1 ; then debug "Verifying installation of: $pkg" echo "Verifying installation of: $pkg" rpm --verify $pkg 2>&1 fi done ;; pkg_info) pkg_info ;; pkginfo) pkginfo | awk '{print $3}' # format? ;; esac } getbacktraces() { debug "Looking for backtraces: $*" flist=$( for f in `find_files "$CRM_CORE_DIRS" $1 $2`; do bf=`basename $f` test `expr match $bf core` -gt 0 && echo $f done) if [ "$flist" ]; then for core in $flist; do log "Found core file: `ls -al $core`" done # Make a copy of them in case we need more data later # Luckily they compress well mkdir cores >/dev/null 2>&1 cp -a $flist cores/ shrink cores rm -rf cores # Now get as much as we can from them automagically for f in $flist; do getbt $f done fi } getpeinputs() { if [ -n "$PCMK_SCHEDULER_INPUT_DIR" ]; then flist=$( find_files "$PCMK_SCHEDULER_INPUT_DIR" "$1" "$2" | sed "s,`dirname $PCMK_SCHEDULER_INPUT_DIR`/,,g" ) if [ "$flist" ]; then (cd $(dirname "$PCMK_SCHEDULER_INPUT_DIR") && tar cf - $flist) | (cd "$3" && tar xf -) debug "found `echo $flist | wc -w` scheduler input files in $PCMK_SCHEDULER_INPUT_DIR" fi fi } getblackboxes() { flist=$( find_files $BLACKBOX_DIR $1 $2 ) for bb in $flist; do bb_short=`basename $bb` qb-blackbox $bb > $3/${bb_short}.blackbox 2>&1 info "Extracting contents of blackbox: $bb_short" done } # # some basic system info and stats # sys_info() { cluster=$1; shift echo "Platform: `uname`" echo "Kernel release: `uname -r`" echo "Architecture: `uname -m`" if [ `uname` = Linux ]; then echo "Distribution: `distro`" fi echo cibadmin --version 2>&1 | head -1 cibadmin -! 2>&1 case $cluster in corosync) /usr/sbin/corosync -v 2>&1 | head -1 ;; esac # Cluster glue version hash (if available) stonith -V 2>/dev/null # Resource agents version hash echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`" echo pkg_ver $* } sys_stats() { set -x uname -n uptime ps axf ps auxw top -b -n 1 ifconfig -a ip addr list netstat -i arp -an test -d /proc && { cat /proc/cpuinfo } lsscsi lspci lsblk mount df set +x } dlm_dump() { if which dlm_tool >/dev/null 2>&1 ; then if is_running dlm_controld; then echo "--- Lockspace overview:" dlm_tool ls -n echo "---Lockspace history:" dlm_tool dump echo "---Lockspace status:" dlm_tool status dlm_tool status -v echo "---Lockspace config:" dlm_tool dump_config dlm_tool log_plock dlm_tool ls | grep name | while read X N ; do echo "--- Lockspace $N:" dlm_tool lockdump "$N" dlm_tool lockdebug -svw "$N" done fi fi } drbd_info() { test -f /proc/drbd && { echo "--- /proc/drbd:" cat /proc/drbd 2>&1 echo } if which drbdadm >/dev/null 2>&1; then echo "--- drbdadm dump:" if [ -z "$SANITIZE"]; then drbdadm dump 2>&1 else drbdadm dump 2>&1 | sed "s/\(shared-secret[ ]*\"\)[^\"]*\";/\1****\";/" fi echo echo "--- drbdadm status:" drbdadm status 2>&1 echo echo "--- drbdadm show-gi:" for res in $(drbdsetup status | grep -e ^\\S | awk '{ print $1 }'); do echo "$res:" drbdadm show-gi $res 2>&1 echo done fi if which drbd-overview >/dev/null 2>&1; then echo "--- drbd-overview:" drbd-overview 2>&1 echo fi if which drbdsetup >/dev/null 2>&1; then echo "--- drbdsetup status:" drbdsetup status --verbose --statistics 2>&1 echo echo "--- drbdsetup events2:" drbdsetup events2 --timestamps --statistics --now 2>&1 echo fi } iscfvarset() { test "`getcfvar $1 $2`" } iscfvartrue() { getcfvar $1 $2 $3 | grep -E -qsi "^(true|y|yes|on|1)" } iscfvarfalse() { getcfvar $1 $2 $3 | grep -E -qsi "^(false|n|no|off|0)" } find_syslog() { priority="$1" # Always include system logs (if we can find them) msg="Mark:pcmk:`perl -e 'print time()'`" logger -p "$priority" "$msg" >/dev/null 2>&1 # Force buffer flush killall -HUP rsyslogd >/dev/null 2>&1 sleep 2 # Give syslog time to catch up in case it's busy findmsg 1 "$msg" } get_logfiles_cs() { if [ ! -f "$cf_file" ]; then return fi debug "Reading $cf_type log settings from $cf_file" # The default value of to_syslog is yes. if ! iscfvarfalse $cf_type to_syslog "$cf_file"; then facility_cs=$(getcfvar $cf_type syslog_facility "$cf_file") if [ -z "$facility_cs" ]; then facility_cs="daemon" fi find_syslog "$facility_cs.info" fi if [ "$SOS_MODE" = "1" ]; then return fi if iscfvartrue $cf_type to_logfile "$cf_file"; then logfile=$(getcfvar $cf_type logfile "$cf_file") if [ -f "$logfile" ]; then debug "Log settings found for cluster type $cf_type: $logfile" echo "$logfile" fi fi } get_logfiles() { cf_type=$1 cf_file="$2" case $cf_type in corosync) get_logfiles_cs;; esac . @CONFIGDIR@/pacemaker facility="$PCMK_logfacility" if [ -z "$facility" ]; then facility="daemon" fi if [ "$facility" != "$facility_cs" ]&&[ "$facility" != none ]; then find_syslog "$facility.notice" fi if [ "$SOS_MODE" = "1" ]; then return fi logfile="$PCMK_logfile" if [ "$logfile" != none ]; then if [ -z "$logfile" ]; then for logfile in "@CRM_LOG_DIR@/pacemaker.log" "/var/log/pacemaker.log"; do if [ -f "$logfile" ]; then debug "Log settings not found for Pacemaker, assuming $logfile" echo "$logfile" break fi done elif [ -f "$logfile" ]; then debug "Log settings found for Pacemaker: $logfile" echo "$logfile" fi fi # Look for detail logs: # - initial pacemakerd logs and tracing might go to a different file pattern="Starting Pacemaker" # - make sure we get something from the scheduler pattern="$pattern\\|Calculated transition" # - cib and pacemaker-execd updates # (helpful on non-DC nodes and when cluster has been up for a long time) pattern="$pattern\\|cib_perform_op\\|process_lrm_event" # - pacemaker_remote might use a different file pattern="$pattern\\|pacemaker[-_]remoted:" findmsg 3 "$pattern" } essential_files() { cat< /dev/null 2>&1 if [ $? -eq 0 ]; then cl_have_journald=1 else cl_have_journald=0 fi cl_lognames="$CL_LOGFILES" if [ $cl_have_journald -eq 1 ]; then cl_lognames="$cl_lognames journalctl" fi cl_lognames=$(trim "$cl_lognames") if [ -z "$cl_lognames" ]; then return fi # YYYY-MM-DD HH:MM:SS cl_start_ymd=$(date -d @${CL_START} +"%F %T") cl_end_ymd=$(date -d @${CL_END} +"%F %T") debug "Gathering logs from $cl_start_ymd to $cl_end_ymd:" debug " $cl_lognames" # Remove our temporary file if we get interrupted here trap '[ -z "$cl_pattfile" ] || rm -f "$cl_pattfile"' 0 # Create a temporary file with patterns to grep for cl_pattfile=$(mktemp) || fatal "cannot create temporary files" for cl_pattern in $LOG_PATTERNS; do echo "$cl_pattern" done > $cl_pattfile echo "Log pattern matches from $REPORT_TARGET:" > $ANALYSIS_F if [ -n "$CL_LOGFILES" ]; then for cl_logfile in $CL_LOGFILES; do cl_extract="$(basename $cl_logfile).extract.txt" if [ ! -f "$cl_logfile" ]; then # Not a file continue elif [ -f "$cl_extract" ]; then # We already have it continue fi dumplogset "$cl_logfile" $LOG_START $LOG_END > "$cl_extract" sanitize "$cl_extract" grep -f "$cl_pattfile" "$cl_extract" >> $ANALYSIS_F done fi # Collect systemd logs if present if [ $cl_have_journald -eq 1 ]; then journalctl --since "$cl_start_ymd" --until "$cl_end_ymd" > journal.log grep -f "$cl_pattfile" journal.log >> $ANALYSIS_F fi rm -f $cl_pattfile trap "" 0 } require_tar debug "Initializing $REPORT_TARGET subdir" if [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then if [ -e $REPORT_HOME/$REPORT_TARGET ]; then warning "Directory $REPORT_HOME/$REPORT_TARGET already exists, using /tmp/$$/$REPORT_TARGET instead" REPORT_HOME=/tmp/$$ fi fi mkdir -p $REPORT_HOME/$REPORT_TARGET cd $REPORT_HOME/$REPORT_TARGET case $CLUSTER in any) cluster=`get_cluster_type`;; *) cluster=$CLUSTER;; esac cluster_cf=`find_cluster_cf $cluster` # If cluster stack is still "any", this might be a Pacemaker Remote node, # so don't complain in that case. if [ -z "$cluster_cf" ] && [ $cluster != "any" ]; then warning "Could not determine the location of your cluster configuration" fi if [ "$SEARCH_LOGS" = "1" ]; then logfiles=$(get_logfiles "$cluster" "$cluster_cf" | sort -u) fi logfiles="$(trim "$logfiles $EXTRA_LOGS")" if [ -z "$logfiles" ]; then which journalctl > /dev/null 2>&1 if [ $? -eq 0 ]; then info "Systemd journal will be only log collected" else info "No logs will be collected" fi info "No log files found or specified with --logfile /some/path" fi debug "Config: $cluster ($cluster_cf) $logfiles" sys_info $cluster $PACKAGES > $SYSINFO_F essential_files $cluster | check_perms > $PERMISSIONS_F 2>&1 getconfig $cluster "$REPORT_HOME/$REPORT_TARGET" "$cluster_cf" "$CRM_CONFIG_DIR/$CIB_F" "/etc/drbd.conf" "/etc/drbd.d" "/etc/booth" getpeinputs $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET getbacktraces $LOG_START $LOG_END > $REPORT_HOME/$REPORT_TARGET/$BT_F getblackboxes $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET case $cluster in corosync) if is_running corosync; then corosync-blackbox >corosync-blackbox-live.txt 2>&1 # corosync-fplay > corosync-blackbox.txt tool=`pickfirst corosync-objctl corosync-cmapctl` case $tool in *objctl) $tool -a > corosync.dump 2>/dev/null;; *cmapctl) $tool > corosync.dump 2>/dev/null;; esac corosync-quorumtool -s -i > corosync.quorum 2>&1 fi ;; esac dc=`crm_mon -1 2>/dev/null | awk '/Current DC/ {print $3}'` if [ "$REPORT_TARGET" = "$dc" ]; then echo "$REPORT_TARGET" > DC fi dlm_dump > $DLM_DUMP_F 2>&1 sys_stats > $SYSSTATS_F 2>&1 drbd_info > $DRBD_INFO_F 2>&1 debug "Sanitizing files: $SANITIZE" # # replace sensitive info with '****' # cf="" if [ ! -z "$cluster_cf" ]; then cf=`basename $cluster_cf` fi for f in "$cf" "$CIB_F" "$CIB_F.live" pengine/*; do if [ -f "$f" ]; then sanitize "$f" fi done # For convenience, generate human-readable version of CIB and any XML errors # in it (AFTER sanitizing, so we don't need to sanitize this output). # sosreport does this itself, so we do not need to when run by sosreport. if [ "$SOS_MODE" != "1" ]; then get_readable_cib "$REPORT_HOME/$REPORT_TARGET" fi collect_logs "$LOG_START" "$LOG_END" $logfiles # Purge files containing no information for f in `ls -1`; do if [ -d "$f" ]; then continue elif [ ! -s "$f" ]; then case $f in *core*) log "Detected empty core file: $f";; *) debug "Removing empty file: `ls -al $f`" rm -f $f ;; esac fi done # Parse for events for l in $logfiles; do b="$(basename $l).extract.txt" node_events "$b" > $EVENTS_F # Link the first logfile to a standard name if it doesn't yet exist if [ -e "$b" -a ! -e "$HALOG_F" ]; then ln -s "$b" "$HALOG_F" fi done if [ -e $REPORT_HOME/.env ]; then debug "Localhost: $REPORT_MASTER $REPORT_TARGET" elif [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then debug "Streaming report back to $REPORT_MASTER" (cd $REPORT_HOME && tar cf - $REPORT_TARGET) if [ "$REMOVE" = "1" ]; then cd rm -rf $REPORT_HOME fi fi -# vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: diff --git a/tools/report.common.in b/tools/report.common.in index e272a9a08e..23026eae3f 100644 --- a/tools/report.common.in +++ b/tools/report.common.in @@ -1,890 +1,890 @@ # # Originally based on hb_report # Copyright 2007 Dejan Muhamedagic # Later changes copyright 2010-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # host=`uname -n` shorthost=`echo $host | sed s:\\\\..*::` if [ -z $verbose ]; then verbose=0 fi # Target Files EVENTS_F=events.txt ANALYSIS_F=analysis.txt HALOG_F=cluster-log.txt BT_F=backtraces.txt SYSINFO_F=sysinfo.txt SYSSTATS_F=sysstats.txt DLM_DUMP_F=dlm_dump.txt CRM_MON_F=crm_mon.txt MEMBERSHIP_F=members.txt CRM_VERIFY_F=crm_verify.txt PERMISSIONS_F=permissions.txt CIB_F=cib.xml CIB_TXT_F=cib.txt DRBD_INFO_F=drbd_info.txt EVENT_PATTERNS=" state do_state_transition membership pcmk_peer_update.*(lost|memb): quorum (crmd|pacemaker-controld).*crm_update_quorum pause Process.pause.detected resources (lrmd|pacemaker-execd).*rsc:(start|stop) stonith te_fence_node|fenced.*(requests|(Succeeded|Failed).to.|result=) start_stop shutdown.decision|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete " # superset of all packages of interest on all distros # (the package manager will be used to validate the installation # of any of these packages that are installed) PACKAGES="pacemaker pacemaker-libs pacemaker-cluster-libs libpacemaker3 pacemaker-remote pacemaker-pygui pacemaker-pymgmt pymgmt-client corosync corosynclib libcorosync4 resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord ocfs2-tools ocfs2-tools-o2cb ocfs2console ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen lvm2 lvm2-clvm cmirrord libdlm libdlm2 libdlm3 hawk ruby lighttpd kernel-default kernel-pae kernel-xen glibc " # Potential locations of system log and cluster daemon logs SYSLOGS=" /var/log/* /var/logs/* /var/syslog/* /var/adm/* /var/log/ha/* /var/log/cluster/* /var/log/pacemaker/* " # Whether pacemaker-remoted was found (0 = yes, 1 = no, -1 = haven't looked yet) REMOTED_STATUS=-1 # # keep the user posted # record() { if [ x != x"$REPORT_HOME" -a -d "${REPORT_HOME}/$shorthost" ]; then rec="${REPORT_HOME}/$shorthost/report.out" elif [ x != x"${l_base}" -a -d "${l_base}" ]; then rec="${l_base}/report.summary" else rec="/dev/null" fi printf "%-10s $*\n" "$shorthost:" 2>&1 >> "${rec}" } log() { printf "%-10s $*\n" "$shorthost:" 1>&2 record "$*" } debug() { if [ $verbose -gt 0 ]; then log "Debug: $*" else record "Debug: $*" fi } info() { log "$*" } warning() { log "WARN: $*" } fatal() { log "ERROR: $*" exit 1 } require_tar() { which tar >/dev/null 2>&1 if [ $? -ne 0 ]; then fatal "Required program 'tar' not found, please install and re-run" fi } # check if process of given substring in its name does exist; # only look for processes originated by user 0 (by UID), "@CRM_DAEMON_USER@" # or effective user running this script, and/or group 0 (by GID), # "@CRM_DAEMON_GROUP@" or one of the groups the effective user belongs to # (there's no business in probing any other processes) is_running() { ps -G "0 $(getent group '@CRM_DAEMON_GROUP@' 2>/dev/null | cut -d: -f3) $(id -G)" \ -u "0 @CRM_DAEMON_USER@ $(id -u)" -f \ | grep -Eqs $(echo "$1" | sed -e 's/^\(.\)/[\1]/') } has_remoted() { if [ $REMOTED_STATUS -eq -1 ]; then REMOTED_STATUS=1 if which pacemaker-remoted >/dev/null 2>&1; then REMOTED_STATUS=0 # Check for pre-2.0.0 daemon name in case we have mixed-version cluster elif which pacemaker_remoted >/dev/null 2>&1; then REMOTED_STATUS=0 elif [ -x "@sbindir@/pacemaker-remoted" ]; then REMOTED_STATUS=0 elif [ -x "@sbindir@/pacemaker_remoted" ]; then REMOTED_STATUS=0 else # @TODO: the binary might be elsewhere, # but a global search is too expensive for d in /{usr,opt}/{local/,}{s,}bin; do if [ -x "${d}/pacemaker-remoted" ]; then REMOTED_STATUS=0 elif [ -x "${d}/pacemaker_remoted" ]; then REMOTED_STATUS=0 fi done fi fi return $REMOTED_STATUS } # found_dir found_dir() { echo "$2" info "Pacemaker $1 found in: $2" } detect_daemon_dir() { info "Searching for where Pacemaker daemons live... this may take a while" for d in \ {/usr,/usr/local,/opt/local,@exec_prefix@}/{libexec,lib64,lib}/pacemaker do # pacemaker and pacemaker-cts packages can install to daemon directory, # so check for a file from each if [ -e $d/pacemaker-schedulerd ] || [ -e $d/cts-exec-helper ]; then found_dir "daemons" "$d" return fi done # Pacemaker Remote nodes don't need to install daemons if has_remoted; then info "Pacemaker daemons not found (this appears to be a Pacemaker Remote node)" return fi for f in $(find / -maxdepth $maxdepth -type f -name pacemaker-schedulerd -o -name cts-exec-helper); do d=$(dirname "$f") found_dir "daemons" "$d" return done fatal "Pacemaker daemons not found (nonstandard installation?)" } detect_cib_dir() { d="${local_state_dir}/lib/pacemaker/cib" if [ -f "$d/cib.xml" ]; then found_dir "config files" "$d" return fi # Pacemaker Remote nodes don't need a CIB if has_remoted; then info "Pacemaker config not found (this appears to be a Pacemaker Remote node)" return fi info "Searching for where Pacemaker keeps config information... this may take a while" # TODO: What about false positives where someone copied the CIB? for f in $(find / -maxdepth $maxdepth -type f -name cib.xml); do d=$(dirname $f) found_dir "config files" "$d" return done warning "Pacemaker config not found (nonstandard installation?)" } detect_state_dir() { if [ -n "$CRM_CONFIG_DIR" ]; then # Assume new layout # $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores) dirname "$CRM_CONFIG_DIR" # Pacemaker Remote nodes might not have a CRM_CONFIG_DIR elif [ -d "$local_state_dir/lib/pacemaker" ]; then echo $local_state_dir/lib/pacemaker fi } detect_pe_dir() { config_root="$1" d="$config_root/pengine" if [ -d "$d" ]; then found_dir "scheduler inputs" "$d" return fi if has_remoted; then info "Pacemaker scheduler inputs not found (this appears to be a Pacemaker Remote node)" return fi info "Searching for where Pacemaker keeps scheduler inputs... this may take a while" for d in $(find / -maxdepth $maxdepth -type d -name pengine); do found_dir "scheduler inputs" "$d" return done fatal "Pacemaker scheduler inputs not found (nonstandard installation?)" } detect_host() { local_state_dir=@localstatedir@ if [ -d $local_state_dir/run ]; then CRM_STATE_DIR=$local_state_dir/run/crm else info "Searching for where Pacemaker keeps runtime data... this may take a while" for d in `find / -maxdepth $maxdepth -type d -name run`; do local_state_dir=`dirname $d` CRM_STATE_DIR=$d/crm break done info "Found: $CRM_STATE_DIR" fi debug "Machine runtime directory: $local_state_dir" debug "Pacemaker runtime data located in: $CRM_STATE_DIR" CRM_DAEMON_DIR=$(detect_daemon_dir) CRM_CONFIG_DIR=$(detect_cib_dir) config_root=$(detect_state_dir) # Older versions had none BLACKBOX_DIR=$config_root/blackbox debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR" PCMK_SCHEDULER_INPUT_DIR=$(detect_pe_dir "$config_root") CRM_CORE_DIRS="" for d in $config_root/cores $local_state_dir/lib/corosync; do if [ -d $d ]; then CRM_CORE_DIRS="$CRM_CORE_DIRS $d" fi done debug "Core files located under: $CRM_CORE_DIRS" } time2str() { perl -e "use POSIX; print strftime('%x %X',localtime($1));" } get_time() { perl -e "\$time=\"$*\";" -e ' $unix_tm = 0; eval "use Date::Parse"; if (index($time, ":") < 0) { } elsif (!$@) { $unix_tm = str2time($time); } else { eval "use Date::Manip"; if (!$@) { $unix_tm = UnixDate(ParseDateString($time), "%s"); } } if ($unix_tm != "") { print int($unix_tm); } else { print ""; } ' } get_time_syslog() { awk '{print $1,$2,$3}' } get_time_legacy() { awk '{print $2}' | sed 's/_/ /' } get_time_iso8601() { awk '{print $1}' } get_time_format_for_string() { l="$*" t=$(get_time `echo $l | get_time_syslog`) if [ "x$t" != x ]; then echo syslog return fi t=$(get_time `echo $l | get_time_iso8601`) if [ "x$t" != x ]; then echo iso8601 return fi t=$(get_time `echo $l | get_time_legacy`) if [ "x$t" != x ]; then echo legacy return fi } get_time_format() { t=0 l="" func="" trycnt=10 while [ $trycnt -gt 0 ] && read l; do func=$(get_time_format_for_string $l) if [ "x$func" != x ]; then break fi trycnt=$(($trycnt-1)) done #debug "Logfile uses the $func time format" echo $func } get_time_from_line() { GTFL_FORMAT="$1" shift if [ "$GTFL_FORMAT" = "" ]; then GTFL_FORMAT=$(get_time_format_for_string "$@") fi case $GTFL_FORMAT in syslog|legacy|iso8601) get_time $(echo "$@" | get_time_${GTFL_FORMAT}) ;; *) warning "Unknown time format in: $@" ;; esac } get_first_time() { l="" format=$1 while read l; do ts=$(get_time_from_line "$format" "$l") if [ "x$ts" != x ]; then echo "$ts" return fi done } get_last_time() { l="" best=`date +%s` # Now format=$1 while read l; do ts=$(get_time_from_line "$format" "$l") if [ "x$ts" != x ]; then best=$ts fi done echo $best } linetime() { get_time_from_line "" $(tail -n +$2 $1 | grep -a ":[0-5][0-9]:" | head -n 1) } # # findmsg # # Print the names of up to system logs that contain , # ordered by most recently modified. # findmsg() { max=$1 pattern="$2" found=0 # List all potential system logs ordered by most recently modified. candidates=$(ls -1td $SYSLOGS 2>/dev/null) if [ -z "$candidates" ]; then debug "No system logs found to search for pattern \'$pattern\'" return fi # Portable way to handle files with spaces in their names. SAVE_IFS=$IFS IFS=" " # Check each log file for matches. logfiles="" for f in $candidates; do local cat="" # We only care about readable files with something in them. if [ ! -f "$f" ] || [ ! -r "$f" ] || [ ! -s "$f" ] ; then continue fi cat=$(find_decompressor "$f") # We want to avoid grepping through potentially huge binary logs such # as lastlog. However, control characters sometimes find their way into # text logs, so we use a heuristic of more than 256 nonprintable # characters in the file's first kilobyte. if [ $($cat "$f" 2>/dev/null | head -c 1024 | tr -d '[:print:][:space:]' | wc -c) -gt 256 ] then continue fi # Our patterns are ASCII, so we can use LC_ALL="C" to speed up grep $cat "$f" 2>/dev/null | LC_ALL="C" grep -q -e "$pattern" if [ $? -eq 0 ]; then # Add this file to the list of hits # (using newline as separator to handle spaces in names). if [ -z "$logfiles" ]; then logfiles="$f" else logfiles="$logfiles $f" fi # If we have enough hits, print them and return. found=$(($found+1)) if [ $found -ge $max ]; then break fi fi done 2>/dev/null IFS=$SAVE_IFS if [ -z "$logfiles" ]; then debug "Pattern \'$pattern\' not found in any system logs" else debug "Pattern \'$pattern\' found in: [ $logfiles ]" echo "$logfiles" fi } node_events() { if [ -e $1 ]; then Epatt=`echo "$EVENT_PATTERNS" | while read title p; do [ -n "$p" ] && echo -n "|$p"; done | sed 's/.//' ` grep -E "$Epatt" $1 fi } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } shrink() { olddir=$PWD dir=`dirname $1` base=`basename $1` target=$1.tar tar_options="cf" variant=`pickfirst bzip2 gzip xz false` case $variant in bz*) tar_options="jcf" target="$target.bz2" ;; gz*) tar_options="zcf" target="$target.gz" ;; xz*) tar_options="Jcf" target="$target.xz" ;; *) warning "Could not find a compression program, the resulting tarball may be huge" ;; esac if [ -e $target ]; then fatal "Destination $target already exists, specify an alternate name with --dest" fi cd $dir >/dev/null 2>&1 tar $tar_options $target $base >/dev/null 2>&1 if [ $? -ne 0 ]; then fatal "Could not archive $base, please investigate and collect manually" fi cd $olddir >/dev/null 2>&1 echo $target } findln_by_time() { local logf=$1 local tm=$2 local first=1 # Some logs can be massive (over 1,500,000,000 lines have been seen in the wild) # Even just 'wc -l' on these files can take 10+ minutes local fileSize=`ls -lh "$logf" | awk '{ print $5 }' | grep -ie G` if [ x$fileSize != x ]; then warning "$logf is ${fileSize} in size and could take many hours to process. Skipping." return fi local last=`wc -l < $logf` while [ $first -le $last ]; do mid=$((($last+$first)/2)) trycnt=10 while [ $trycnt -gt 0 ]; do tmid=`linetime $logf $mid` [ "$tmid" ] && break warning "cannot extract time: $logf:$mid; will try the next one" trycnt=$(($trycnt-1)) # shift the whole first-last segment first=$(($first-1)) last=$(($last-1)) mid=$((($last+$first)/2)) done if [ -z "$tmid" ]; then warning "giving up on log..." return fi if [ $tmid -gt $tm ]; then last=$(($mid-1)) elif [ $tmid -lt $tm ]; then first=$(($mid+1)) else break fi done echo $mid } dumplog() { local logf=$1 local from_line=$2 local to_line=$3 [ "$from_line" ] || return tail -n +$from_line $logf | if [ "$to_line" ]; then head -$(($to_line-$from_line+1)) else cat fi } # # find log/set of logs which are interesting for us # # # find log slices # find_decompressor() { case $1 in *bz2) echo "bzip2 -dc" ;; *gz) echo "gzip -dc" ;; *xz) echo "xz -dc" ;; *) echo "cat" ;; esac } # # check if the log contains a piece of our segment # is_our_log() { local logf=$1 local from_time=$2 local to_time=$3 local cat=`find_decompressor $logf` local format=`$cat $logf | get_time_format` local first_time=`$cat $logf | head -10 | get_first_time $format` local last_time=`$cat $logf | tail -10 | get_last_time $format` if [ x = "x$first_time" -o x = "x$last_time" ]; then warning "Skipping bad logfile '$1': Could not determine log dates" return 0 # skip (empty log?) fi if [ $from_time -gt $last_time ]; then # we shouldn't get here anyway if the logs are in order return 2 # we're past good logs; exit fi if [ $from_time -ge $first_time ]; then return 3 # this is the last good log fi # have to go further back if [ x = "x$to_time" -o $to_time -ge $first_time ]; then return 1 # include this log else return 0 # don't include this log fi } # # go through archived logs (timewise backwards) and see if there # are lines belonging to us # (we rely on untouched log files, i.e. that modify time # hasn't been changed) # arch_logs() { local logf=$1 local from_time=$2 local to_time=$3 # look for files such as: ha-log-20090308 or # ha-log-20090308.gz (.bz2) or ha-log.0, etc ls -t $logf $logf*[0-9z] 2>/dev/null | while read next_log; do is_our_log $next_log $from_time $to_time case $? in 0) ;; # noop, continue 1) echo $next_log # include log and continue debug "Found log $next_log" ;; 2) break;; # don't go through older logs! 3) echo $next_log # include log and continue debug "Found log $next_log" break ;; # don't go through older logs! esac done } # # print part of the log # drop_tmp_file() { [ -z "$tmp" ] || rm -f "$tmp" } print_logseg() { local logf=$1 local from_time=$2 local to_time=$3 # uncompress to a temp file (if necessary) local cat=`find_decompressor $logf` if [ "$cat" != "cat" ]; then tmp=`mktemp` $cat $logf > $tmp trap drop_tmp_file 0 sourcef=$tmp else sourcef=$logf tmp="" fi if [ "$from_time" = 0 ]; then FROM_LINE=1 else FROM_LINE=`findln_by_time $sourcef $from_time` fi if [ -z "$FROM_LINE" ]; then warning "couldn't find line for time $from_time; corrupt log file?" return fi TO_LINE="" if [ "$to_time" != 0 ]; then TO_LINE=`findln_by_time $sourcef $to_time` if [ -z "$TO_LINE" ]; then warning "couldn't find line for time $to_time; corrupt log file?" return fi if [ $FROM_LINE -lt $TO_LINE ]; then dumplog $sourcef $FROM_LINE $TO_LINE log "Including segment [$FROM_LINE-$TO_LINE] from $logf" else debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf" fi else dumplog $sourcef $FROM_LINE $TO_LINE log "Including all logs after line $FROM_LINE from $logf" fi drop_tmp_file trap "" 0 } # # find log/set of logs which are interesting for us # dumplogset() { local logf=$1 local from_time=$2 local to_time=$3 local logf_set=`arch_logs $logf $from_time $to_time` if [ x = "x$logf_set" ]; then return fi local num_logs=`echo "$logf_set" | wc -l` local oldest=`echo $logf_set | awk '{print $NF}'` local newest=`echo $logf_set | awk '{print $1}'` local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` # the first logfile: from $from_time to $to_time (or end) # logfiles in the middle: all # the last logfile: from beginning to $to_time (or end) case $num_logs in 1) print_logseg $newest $from_time $to_time;; *) print_logseg $oldest $from_time 0 for f in $mid_logfiles; do `find_decompressor $f` $f debug "including complete $f logfile" done print_logseg $newest 0 $to_time ;; esac } # cut out a stanza getstanza() { awk -v name="$1" ' !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start if ($1 == name) in_stanza = 1 } in_stanza { print } in_stanza && NF==1 && $1 == "}" { exit } ' } # supply stanza in $1 and variable name in $2 # (stanza is optional) getcfvar() { cf_type=$1; shift; cf_var=$1; shift; cf_file=$* [ -f "$cf_file" ] || return case $cf_type in corosync) sed 's/#.*//' < $cf_file | if [ $# -eq 2 ]; then getstanza "$cf_var" shift 1 else cat fi | awk -v varname="$cf_var" ' NF==2 && match($1,varname":$")==1 { print $2; exit; } ' ;; esac } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } # # figure out the cluster type, depending on the process list # and existence of configuration files # get_cluster_type() { if is_running corosync; then tool=`pickfirst corosync-objctl corosync-cmapctl` case $tool in *objctl) quorum=`$tool -a | grep quorum.provider | sed 's/.*=\s*//'`;; *cmapctl) quorum=`$tool | grep quorum.provider | sed 's/.*=\s*//'`;; esac stack="corosync" # Now we're guessing... # TODO: Technically these could be anywhere :-/ elif [ -f "@PCMK__COROSYNC_CONF@" ]; then stack="corosync" else # We still don't know. This might be a Pacemaker Remote node, # or the configuration might be in a nonstandard location. stack="any" fi debug "Detected the '$stack' cluster stack" echo $stack } find_cluster_cf() { case $1 in corosync) best_size=0 best_file="" # TODO: Technically these could be anywhere :-/ for cf in "@PCMK__COROSYNC_CONF@"; do if [ -f $cf ]; then size=`wc -l $cf | awk '{print $1}'` if [ $size -gt $best_size ]; then best_size=$size best_file=$cf fi fi done if [ -z "$best_file" ]; then debug "Looking for corosync configuration file. This may take a while..." for f in `find / -maxdepth $maxdepth -type f -name corosync.conf`; do best_file=$f break done fi debug "Located corosync config file: $best_file" echo "$best_file" ;; any) # Cluster type is undetermined. Don't complain, because this # might be a Pacemaker Remote node. ;; *) warning "Unknown cluster type: $1" ;; esac } # # check for the major prereq for a) parameter parsing and b) # parsing logs # t=`get_time "12:00"` if [ "$t" = "" ]; then fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)" fi # Override any locale settings so collected output is in a common language LC_ALL="C" export LC_ALL -# vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: +# vim: set filetype=sh: