diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in index 2d006cad8e..ac5e3c23fa 100644 --- a/cts/cts-scheduler.in +++ b/cts/cts-scheduler.in @@ -1,1294 +1,1295 @@ #!@BASH_PATH@ # # Copyright 2004-2018 Andrew Beekhof # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # USAGE_TEXT="Usage: cts-scheduler [] Options: --help Display this text, then exit -V, --verbose Display any differences from expected output --run TEST Run only single specified test --update Update expected results with actual results -b, --binary PATH Specify path to crm_simulate -i, --io-dir PATH Specify path to regression test data directory -v, --valgrind Run all commands under valgrind --valgrind-dhat Run all commands under valgrind with heap analyzer --valgrind-skip-output If running under valgrind, don't display output --testcmd-options Additional options for command under test" SBINDIR="@sbindir@" BUILDDIR="@abs_top_builddir@" CRM_SCHEMA_DIRECTORY="@CRM_SCHEMA_DIRECTORY@" # If readlink supports -e (i.e. GNU), use it readlink -e / >/dev/null 2>/dev/null if [ $? -eq 0 ]; then test_home="$(dirname "$(readlink -e "$0")")" else test_home="$(dirname "$0")" fi io_dir="$test_home/scheduler" failed="$test_home/.regression.failed.diff" test_binary= testcmd_options= single_test= verbose=0 num_failed=0 num_tests=0 VALGRIND_CMD="" VALGRIND_OPTS="-q --gen-suppressions=all --log-file=%q{valgrind_output} --time-stamp=yes --trace-children=no --show-reachable=no --leak-check=full --num-callers=20 --suppressions=$test_home/valgrind-pcmk.suppressions" VALGRIND_DHAT_OPTS="--tool=exp-dhat --log-file=%q{valgrind_output} --time-stamp=yes --trace-children=no --show-top-n=100 --num-callers=4" diff_opts="--ignore-all-space --ignore-blank-lines -u -N" # These constants must track crm_exit_t values CRM_EX_OK=0 CRM_EX_ERROR=1 CRM_EX_NOT_INSTALLED=5 CRM_EX_USAGE=64 CRM_EX_NOINPUT=66 EXITCODE=$CRM_EX_OK function info() { printf "$*\n" } function error() { printf " * ERROR: $*\n" } function failed() { printf " * FAILED: $*\n" } function show_test() { name=$1; shift printf " Test %-25s $*\n" "$name:" } # Normalize scheduler output for comparison normalize() { for NORMALIZE_FILE in "$@"; do # sed -i is not portable :-( sed -e 's/crm_feature_set="[^"]*"//' \ -e 's/batch-limit="[0-9]*"//' \ "$NORMALIZE_FILE" > "${NORMALIZE_FILE}.$$" mv -- "${NORMALIZE_FILE}.$$" "$NORMALIZE_FILE" done } info "Test home is:\t$test_home" create_mode="false" while [ $# -gt 0 ] ; do case "$1" in -V|--verbose) verbose=1 shift ;; -v|--valgrind) export G_SLICE=always-malloc VALGRIND_CMD="valgrind $VALGRIND_OPTS" shift ;; --valgrind-dhat) VALGRIND_CMD="valgrind $VALGRIND_DHAT_OPTS" shift ;; --valgrind-skip-output) VALGRIND_SKIP_OUTPUT=1 shift ;; --update) create_mode="true" shift ;; --run) single_test=$(basename "$2" ".xml") shift 2 break # any remaining arguments will be passed to test command ;; -b|--binary) test_binary="$2" shift 2 ;; -i|--io-dir) io_dir="$2" shift 2 ;; --help) echo "$USAGE_TEXT" exit $CRM_EX_OK ;; --testcmd-options) testcmd_options=$2 shift 2 ;; *) error "unknown option: $1" exit $CRM_EX_USAGE ;; esac done if [ -z "$PCMK_schema_directory" ]; then if [ -d "$BUILDDIR/xml" ]; then export PCMK_schema_directory="$BUILDDIR/xml" elif [ -d "$CRM_SCHEMA_DIRECTORY" ]; then export PCMK_schema_directory="$CRM_SCHEMA_DIRECTORY" fi fi if [ -z "$test_binary" ]; then if [ -x "$BUILDDIR/tools/crm_simulate" ]; then test_binary="$BUILDDIR/tools/crm_simulate" elif [ -x "$SBINDIR/crm_simulate" ]; then test_binary="$SBINDIR/crm_simulate" fi fi if [ ! -x "$test_binary" ]; then error "Test binary $test_binary not found" exit $CRM_EX_NOT_INSTALLED fi info "Test binary is:\t$test_binary" if [ -n "$PCMK_schema_directory" ]; then info "Schema home is:\t$PCMK_schema_directory" fi if [ "x$VALGRIND_CMD" != "x" ]; then info "Activating memory testing with valgrind"; fi info " " test_cmd="$VALGRIND_CMD $test_binary $testcmd_options" #echo $test_cmd if [ "$(whoami)" != "root" ]; then declare -x CIB_shadow_dir=/tmp fi do_test() { did_fail=0 expected_rc=0 num_tests=$(( $num_tests + 1 )) base=$1; shift name=$1; shift input=$io_dir/${base}.xml output=$io_dir/${base}.out expected=$io_dir/${base}.exp dot_expected=$io_dir/${base}.dot dot_output=$io_dir/${base}.pe.dot scores=$io_dir/${base}.scores score_output=$io_dir/${base}.scores.pe stderr_expected=$io_dir/${base}.stderr stderr_output=$io_dir/${base}.stderr.pe summary=$io_dir/${base}.summary summary_output=$io_dir/${base}.summary.pe valgrind_output=$io_dir/${base}.valgrind export valgrind_output if [ "x$1" = "x--rc" ]; then expected_rc=$2 shift; shift; fi show_test "$base" "$name" if [ ! -f $input ]; then error "No input"; did_fail=1 num_failed=$(( $num_failed + 1 )) return $CRM_EX_NOINPUT; fi if [ "$create_mode" != "true" ] && [ ! -f "$expected" ]; then error "no stored output"; return $CRM_EX_NOINPUT; fi # ../admin/crm_verify -X $input if [ ! -z "$single_test" ]; then echo "CIB_shadow_dir=\"$io_dir\" $test_cmd -x \"$input\" -D \"$dot_output\" -G \"$output\" -S" "$@" CIB_shadow_dir="$io_dir" $test_cmd -x "$input" -D "$dot_output" \ -G "$output" -S "$@" 2>&1 | tee "$summary_output" else CIB_shadow_dir="$io_dir" $test_cmd -x "$input" -S &> "$summary_output" fi CIB_shadow_dir="$io_dir" $test_cmd -x "$input" -D "$dot_output" \ -G "$output" -SQ -s "$@" 2> "$stderr_output" > "$score_output" rc=$? if [ $rc -ne $expected_rc ]; then failed "Test returned: $rc"; did_fail=1 echo "CIB_shadow_dir=\"$io_dir\" $test_cmd -x \"$input\" -D \"$dot_output\" -G \"$output\" -SQ -s" "$@" fi if [ -z "$VALGRIND_SKIP_OUTPUT" ]; then if [ -s "${valgrind_output}" ]; then error "Valgrind reported errors"; did_fail=1 cat ${valgrind_output} fi rm -f ${valgrind_output} fi if [ -s core ]; then error "Core-file detected: core.${base}"; did_fail=1 rm -f $test_home/core.$base mv core $test_home/core.$base fi if [ -e "$stderr_expected" ]; then diff $diff_opts $stderr_expected $stderr_output >/dev/null rc2=$? if [ $rc2 -ne 0 ]; then failed "stderr changed"; diff $diff_opts $stderr_expected $stderr_output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 fi elif [ -s "$stderr_output" ]; then error "Output was written to stderr" did_fail=1 cat $stderr_output fi rm -f $stderr_output if [ ! -s $output ]; then error "No graph produced"; did_fail=1 num_failed=$(( $num_failed + 1 )) rm -f $output return $CRM_EX_ERROR; fi if [ ! -s $dot_output ]; then error "No dot-file summary produced"; did_fail=1 num_failed=$(( $num_failed + 1 )) rm -f $output return $CRM_EX_ERROR; else echo "digraph \"g\" {" > $dot_output.sort LC_ALL=POSIX sort -u $dot_output | grep -v -e '^}$' -e digraph >> $dot_output.sort echo "}" >> $dot_output.sort mv -f $dot_output.sort $dot_output fi if [ ! -s $score_output ]; then error "No allocation scores produced"; did_fail=1 num_failed=$(( $num_failed + 1 )) rm $output return $CRM_EX_ERROR; else LC_ALL=POSIX sort $score_output > $score_output.sorted mv -f $score_output.sorted $score_output fi if [ "$create_mode" = "true" ]; then cp "$output" "$expected" cp "$dot_output" "$dot_expected" cp "$score_output" "$scores" cp "$summary_output" "$summary" info " Updated expected outputs" fi diff $diff_opts $summary $summary_output >/dev/null rc2=$? if [ $rc2 -ne 0 ]; then failed "summary changed"; diff $diff_opts $summary $summary_output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 fi diff $diff_opts $dot_expected $dot_output >/dev/null rc=$? if [ $rc -ne 0 ]; then failed "dot-file summary changed"; diff $diff_opts $dot_expected $dot_output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 else rm -f $dot_output fi normalize "$expected" "$output" diff $diff_opts $expected $output >/dev/null rc2=$? if [ $rc2 -ne 0 ]; then failed "xml-file changed"; diff $diff_opts $expected $output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 fi diff $diff_opts $scores $score_output >/dev/null rc=$? if [ $rc -ne 0 ]; then failed "scores-file changed"; diff $diff_opts $scores $score_output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 fi rm -f $output $score_output $summary_output if [ $did_fail -eq 1 ]; then num_failed=$(( $num_failed + 1 )) return $CRM_EX_ERROR fi return $CRM_EX_OK } function test_results { if [ $num_failed -ne 0 ]; then if [ -s "$failed" ]; then if [ $verbose -eq 1 ]; then error "Results of $num_failed failed tests (out of $num_tests):" cat $failed else error "Results of $num_failed failed tests (out of $num_tests) are in $failed" error "Use -V to display them after running the tests" fi else error "$num_failed (of $num_tests) tests failed (no diff results)" rm $failed fi EXITCODE=$CRM_EX_ERROR fi } # zero out the error log true > $failed if [ -n "$single_test" ]; then do_test "$single_test" "Single shot" "$@" TEST_RC=$? cat "$failed" exit $TEST_RC fi DO_VERSIONED_TESTS=0 info Performing the following tests from $io_dir echo "" do_test simple1 "Offline " do_test simple2 "Start " do_test simple3 "Start 2 " do_test simple4 "Start Failed" do_test simple6 "Stop Start " do_test simple7 "Shutdown " #do_test simple8 "Stonith " #do_test simple9 "Lower version" #do_test simple10 "Higher version" do_test simple11 "Priority (ne)" do_test simple12 "Priority (eq)" do_test simple8 "Stickiness" echo "" do_test group1 "Group " do_test group2 "Group + Native " do_test group3 "Group + Group " do_test group4 "Group + Native (nothing)" do_test group5 "Group + Native (move) " do_test group6 "Group + Group (move) " do_test group7 "Group colocation" do_test group13 "Group colocation (cant run)" do_test group8 "Group anti-colocation" do_test group9 "Group recovery" do_test group10 "Group partial recovery" do_test group11 "Group target_role" do_test group14 "Group stop (graph terminated)" do_test group15 "Negative group colocation" do_test bug-1573 "Partial stop of a group with two children" do_test bug-1718 "Mandatory group ordering - Stop group_FUN" do_test bug-lf-2613 "Move group on failure" do_test bug-lf-2619 "Move group on clone failure" do_test group-fail "Ensure stop order is preserved for partially active groups" do_test group-unmanaged "No need to restart r115 because r114 is unmanaged" do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails" do_test group-dependents "Account for the location preferences of things colocated with a group" echo "" do_test rsc_dep1 "Must not " do_test rsc_dep3 "Must " do_test rsc_dep5 "Must not 3 " do_test rsc_dep7 "Must 3 " do_test rsc_dep10 "Must (but cant)" do_test rsc_dep2 "Must (running) " do_test rsc_dep8 "Must (running : alt) " do_test rsc_dep4 "Must (running + move)" do_test asymmetric "Asymmetric - require explicit location constraints" echo "" do_test orphan-0 "Orphan ignore" do_test orphan-1 "Orphan stop" do_test orphan-2 "Orphan stop, remove failcount" echo "" do_test params-0 "Params: No change" do_test params-1 "Params: Changed" do_test params-2 "Params: Resource definition" do_test params-4 "Params: Reload" do_test params-5 "Params: Restart based on probe digest" do_test novell-251689 "Resource definition change + target_role=stopped" do_test bug-lf-2106 "Restart all anonymous clone instances after config change" do_test params-6 "Params: Detect reload in previously migrated resource" do_test nvpair-id-ref "Support id-ref in nvpair with optional name" do_test not-reschedule-unneeded-monitor "Do not reschedule unneeded monitors while resource definitions have changed" do_test reload-becomes-restart "Cancel reload if restart becomes required" echo "" do_test target-0 "Target Role : baseline" do_test target-1 "Target Role : master" do_test target-2 "Target Role : invalid" echo "" do_test base-score "Set a node's default score for all nodes" echo "" do_test date-1 "Dates" -t "2005-020" do_test date-2 "Date Spec - Pass" -t "2005-020T12:30" do_test date-3 "Date Spec - Fail" -t "2005-020T11:30" do_test origin "Timing of recurring operations" -t "2014-05-07 00:28:00" do_test probe-0 "Probe (anon clone)" do_test probe-1 "Pending Probe" do_test probe-2 "Correctly re-probe cloned groups" do_test probe-3 "Probe (pending node)" do_test probe-4 "Probe (pending node + stopped resource)" do_test standby "Standby" do_test comments "Comments" echo "" do_test one-or-more-0 "Everything starts" do_test one-or-more-1 "Nothing starts because of A" do_test one-or-more-2 "D can start because of C" do_test one-or-more-3 "D cannot start because of B and C" do_test one-or-more-4 "D cannot start because of target-role" do_test one-or-more-5 "Start A and F even though C and D are stopped" do_test one-or-more-6 "Leave A running even though B is stopped" do_test one-or-more-7 "Leave A running even though C is stopped" do_test bug-5140-require-all-false "Allow basegrp:0 to stop" do_test clone-require-all-1 "clone B starts node 3 and 4" do_test clone-require-all-2 "clone B remains stopped everywhere" do_test clone-require-all-3 "clone B stops everywhere because A stops everywhere" do_test clone-require-all-4 "clone B remains on node 3 and 4 with only one instance of A remaining." do_test clone-require-all-5 "clone B starts on node 1 3 and 4" do_test clone-require-all-6 "clone B remains active after shutting down instances of A" do_test clone-require-all-7 "clone A and B both start at the same time. all instances of A start before B." do_test clone-require-all-no-interleave-1 "C starts everywhere after A and B" do_test clone-require-all-no-interleave-2 "C starts on nodes 1, 2, and 4 with only one active instance of B" do_test clone-require-all-no-interleave-3 "C remains active when instance of B is stopped on one node and started on another." do_test one-or-more-unrunnable-instances "Avoid dependencies on instances that won't ever be started" echo "" do_test order1 "Order start 1 " do_test order2 "Order start 2 " do_test order3 "Order stop " do_test order4 "Order (multiple) " do_test order5 "Order (move) " do_test order6 "Order (move w/ restart) " do_test order7 "Order (mandatory) " do_test order-optional "Order (score=0) " do_test order-required "Order (score=INFINITY) " do_test bug-lf-2171 "Prevent group start when clone is stopped" do_test order-clone "Clone ordering should be able to prevent startup of dependent clones" do_test order-sets "Ordering for resource sets" do_test order-serialize "Serialize resources without inhibiting migration" do_test order-serialize-set "Serialize a set of resources without inhibiting migration" do_test clone-order-primitive "Order clone start after a primitive" do_test clone-order-16instances "Verify ordering of 16 cloned resources" do_test order-optional-keyword "Order (optional keyword)" do_test order-mandatory "Order (mandatory keyword)" do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones" do_test ordered-set-basic-startup "Constraint set with default order settings." do_test ordered-set-natural "Allow natural set ordering" do_test order-wrong-kind "Order (error)" echo "" do_test coloc-loop "Colocation - loop" do_test coloc-many-one "Colocation - many-to-one" do_test coloc-list "Colocation - many-to-one with list" do_test coloc-group "Colocation - groups" do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation" do_test coloc-attr "Colocation based on node attributes" do_test coloc-negative-group "Negative colocation with a group" do_test coloc-intra-set "Intra-set colocation" do_test bug-lf-2435 "Colocation sets with a negative score" do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependent must stop" do_test coloc_fp_logic "Verify floating point calculations in colocation are working" do_test colo_master_w_native "cl#5070 - Verify promotion order is affected when colocating master to native rsc." do_test colo_slave_w_native "cl#5070 - Verify promotion order is affected when colocating slave to native rsc." do_test anti-colocation-order "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node" do_test anti-colocation-master "Organize order of actions for master resources in anti-colocations" do_test anti-colocation-slave "Organize order of actions for slave resources in anti-colocations" do_test enforce-colo1 "Always enforce B with A INFINITY." do_test complex_enforce_colo "Always enforce B with A INFINITY. (make sure heat-engine stops)" echo "" do_test rsc-sets-seq-true "Resource Sets - sequential=false" do_test rsc-sets-seq-false "Resource Sets - sequential=true" do_test rsc-sets-clone "Resource Sets - Clone" do_test rsc-sets-master "Resource Sets - Master" do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)" #echo "" #do_test agent1 "version: lt (empty)" #do_test agent2 "version: eq " #do_test agent3 "version: gt " echo "" do_test attrs1 "string: eq (and) " do_test attrs2 "string: lt / gt (and)" do_test attrs3 "string: ne (or) " do_test attrs4 "string: exists " do_test attrs5 "string: not_exists " do_test attrs6 "is_dc: true " do_test attrs7 "is_dc: false " do_test attrs8 "score_attribute " do_test per-node-attrs "Per node resource parameters" echo "" do_test mon-rsc-1 "Schedule Monitor - start" do_test mon-rsc-2 "Schedule Monitor - move " do_test mon-rsc-3 "Schedule Monitor - pending start " do_test mon-rsc-4 "Schedule Monitor - move/pending start" echo "" do_test rec-rsc-0 "Resource Recover - no start " do_test rec-rsc-1 "Resource Recover - start " do_test rec-rsc-2 "Resource Recover - monitor " do_test rec-rsc-3 "Resource Recover - stop - ignore" do_test rec-rsc-4 "Resource Recover - stop - block " do_test rec-rsc-5 "Resource Recover - stop - fence " do_test rec-rsc-6 "Resource Recover - multiple - restart" do_test rec-rsc-7 "Resource Recover - multiple - stop " do_test rec-rsc-8 "Resource Recover - multiple - block " do_test rec-rsc-9 "Resource Recover - group/group" do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor" do_test stop-failure-no-quorum "Stop failure without quorum" do_test stop-failure-no-fencing "Stop failure without fencing available" do_test stop-failure-with-fencing "Stop failure with fencing available" do_test multiple-active-block-group "Support of multiple-active=block for resource groups" do_test multiple-monitor-one-failed "Consider resource failed if any of the configured monitor operations failed" echo "" do_test quorum-1 "No quorum - ignore" do_test quorum-2 "No quorum - freeze" do_test quorum-3 "No quorum - stop " do_test quorum-4 "No quorum - start anyway" do_test quorum-5 "No quorum - start anyway (group)" do_test quorum-6 "No quorum - start anyway (clone)" do_test bug-cl-5212 "No promotion with no-quorum-policy=freeze" do_test suicide-needed-inquorate "no-quorum-policy=suicide: suicide necessary" do_test suicide-not-needed-initial-quorum "no-quorum-policy=suicide: suicide not necessary at initial quorum" do_test suicide-not-needed-never-quorate "no-quorum-policy=suicide: suicide not necessary if never quorate" do_test suicide-not-needed-quorate "no-quorum-policy=suicide: suicide necessary if quorate" echo "" do_test rec-node-1 "Node Recover - Startup - no fence" do_test rec-node-2 "Node Recover - Startup - fence " do_test rec-node-3 "Node Recover - HA down - no fence" do_test rec-node-4 "Node Recover - HA down - fence " do_test rec-node-5 "Node Recover - CRM down - no fence" do_test rec-node-6 "Node Recover - CRM down - fence " do_test rec-node-7 "Node Recover - no quorum - ignore " do_test rec-node-8 "Node Recover - no quorum - freeze " do_test rec-node-9 "Node Recover - no quorum - stop " do_test rec-node-10 "Node Recover - no quorum - stop w/fence" do_test rec-node-11 "Node Recover - CRM down w/ group - fence " do_test rec-node-12 "Node Recover - nothing active - fence " do_test rec-node-13 "Node Recover - failed resource + shutdown - fence " do_test rec-node-15 "Node Recover - unknown lrm section" do_test rec-node-14 "Serialize all stonith's" echo "" do_test multi1 "Multiple Active (stop/start)" echo "" do_test migrate-begin "Normal migration" do_test migrate-success "Completed migration" do_test migrate-partial-1 "Completed migration, missing stop on source" do_test migrate-partial-2 "Successful migrate_to only" do_test migrate-partial-3 "Successful migrate_to only, target down" do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from" do_test bug-5186-partial-migrate "Handle partial migration when src node loses membership" do_test migrate-fail-2 "Failed migrate_from" do_test migrate-fail-3 "Failed migrate_from + stop on source" do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-5 "Failed migrate_from + stop on source and target" do_test migrate-fail-6 "Failed migrate_to" do_test migrate-fail-7 "Failed migrate_to + stop on source" do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-9 "Failed migrate_to + stop on source and target" do_test migrate-stop "Migration in a stopping stack" do_test migrate-start "Migration in a starting stack" do_test migrate-stop_start "Migration in a restarting stack" do_test migrate-stop-complex "Migration in a complex stopping stack" do_test migrate-start-complex "Migration in a complex starting stack" do_test migrate-stop-start-complex "Migration in a complex moving stack" do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown" do_test migrate-1 "Migrate (migrate)" do_test migrate-2 "Migrate (stable)" do_test migrate-3 "Migrate (failed migrate_to)" do_test migrate-4 "Migrate (failed migrate_from)" do_test novell-252693 "Migration in a stopping stack" do_test novell-252693-2 "Migration in a starting stack" do_test novell-252693-3 "Non-Migration in a starting and stopping stack" do_test bug-1820 "Migration in a group" do_test bug-1820-1 "Non-migration in a group" do_test migrate-5 "Primitive migration with a clone" do_test migrate-fencing "Migration after Fencing" do_test migrate-both-vms "Migrate two VMs that have no colocation" do_test migration-behind-migrating-remote "Migrate resource behind migrating remote connection" do_test 1-a-then-bm-move-b "Advanced migrate logic. A then B. migrate B." do_test 2-am-then-b-move-a "Advanced migrate logic, A then B, migrate A without stopping B" do_test 3-am-then-bm-both-migrate "Advanced migrate logic. A then B. migrate both" do_test 4-am-then-bm-b-not-migratable "Advanced migrate logic, A then B, B not migratable" do_test 5-am-then-bm-a-not-migratable "Advanced migrate logic. A then B. move both, a not migratable" do_test 6-migrate-group "Advanced migrate logic, migrate a group" do_test 7-migrate-group-one-unmigratable "Advanced migrate logic, migrate group mixed with allow-migrate true/false" do_test 8-am-then-bm-a-migrating-b-stopping "Advanced migrate logic, A then B, A migrating, B stopping" do_test 9-am-then-bm-b-migrating-a-stopping "Advanced migrate logic, A then B, B migrate, A stopping" do_test 10-a-then-bm-b-move-a-clone "Advanced migrate logic, A clone then B, migrate B while stopping A" do_test 11-a-then-bm-b-move-a-clone-starting "Advanced migrate logic, A clone then B, B moving while A is start/stopping" do_test a-promote-then-b-migrate "A promote then B start. migrate B" do_test a-demote-then-b-migrate "A demote then B stop. migrate B" if [ $DO_VERSIONED_TESTS -eq 1 ]; then do_test migrate-versioned "Disable migration for versioned resources" fi #echo "" #do_test complex1 "Complex " do_test bug-lf-2422 "Dependency on partially active group - stop ocfs:*" echo "" do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" do_test clone-anon-failcount "Merge failcounts for anonymous clones" do_test force-anon-clone-max "Update clone-max properly when forcing a clone to be anonymous" do_test anon-instance-pending "Assign anonymous clone instance numbers properly when action pending" do_test inc0 "Incarnation start" do_test inc1 "Incarnation start order" do_test inc2 "Incarnation silent restart, stop, move" do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)" do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)" do_test inc7 "Clone colocation" do_test inc8 "Clone anti-colocation" do_test inc9 "Non-unique clone" do_test inc10 "Non-unique clone (stop)" do_test inc11 "Primitive colocation with clones" do_test inc12 "Clone shutdown" do_test cloned-group "Make sure only the correct number of cloned groups are started" do_test cloned-group-stop "Ensure stopping qpidd also stops glance and cinder" do_test clone-no-shuffle "Don't prioritize allocation of instances that must be moved" do_test clone-max-zero "Orphan processing with clone-max=0" do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" do_test bug-lf-2160 "Don't shuffle clones due to colocation" do_test bug-lf-2213 "clone-node-max enforcement for cloned groups" do_test bug-lf-2153 "Clone ordering constraints" do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable" do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone" do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)" do_test clone-colocate-instance-2 "Colocation with a specific clone instance" do_test clone-order-instance "Ordering with specific clone instances" do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation" do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" do_test bug-lf-2544 "Balanced clone placement" do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0" do_test bug-lf-2574 "Avoid clone shuffle" do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start" do_test bug-cl-5168 "Don't shuffle clones" do_test bug-cl-5170 "Prevent clone from starting with on-fail=block" do_test clone-fail-block-colocation "Move colocated group when failed clone has on-fail=block" do_test clone-interleave-1 "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)" do_test rebalance-unique-clones "Rebalance unique clone instances with no stickiness" do_test clone-requires-quorum-recovery "Clone with requires=quorum on failed node needing recovery" do_test clone-requires-quorum "Clone with requires=quorum with presumed-inactive instance on failed node" echo "" do_test cloned_start_one "order first clone then clone... first clone_min=2" do_test cloned_start_two "order first clone then clone... first clone_min=2" do_test cloned_stop_one "order first clone then clone... first clone_min=2" do_test cloned_stop_two "order first clone then clone... first clone_min=2" do_test clone_min_interleave_start_one "order first clone then clone... first clone_min=2 and then has interleave=true" do_test clone_min_interleave_start_two "order first clone then clone... first clone_min=2 and then has interleave=true" do_test clone_min_interleave_stop_one "order first clone then clone... first clone_min=2 and then has interleave=true" do_test clone_min_interleave_stop_two "order first clone then clone... first clone_min=2 and then has interleave=true" do_test clone_min_start_one "order first clone then primitive... first clone_min=2" do_test clone_min_start_two "order first clone then primitive... first clone_min=2" do_test clone_min_stop_all "order first clone then primitive... first clone_min=2" do_test clone_min_stop_one "order first clone then primitive... first clone_min=2" do_test clone_min_stop_two "order first clone then primitive... first clone_min=2" echo "" do_test unfence-startup "Clean unfencing" do_test unfence-definition "Unfencing when the agent changes" do_test unfence-parameters "Unfencing when the agent parameters changes" do_test unfence-device "Unfencing when a cluster has only fence devices" echo "" do_test master-0 "Stopped -> Slave" do_test master-1 "Stopped -> Promote" do_test master-2 "Stopped -> Promote : notify" do_test master-3 "Stopped -> Promote : master location" do_test master-4 "Started -> Promote : master location" do_test master-5 "Promoted -> Promoted" do_test master-6 "Promoted -> Promoted (2)" do_test master-7 "Promoted -> Fenced" do_test master-8 "Promoted -> Fenced -> Moved" do_test master-9 "Stopped + Promotable + No quorum" do_test master-10 "Stopped -> Promotable : notify with monitor" do_test master-11 "Stopped -> Promote : colocation" do_test novell-239082 "Demote/Promote ordering" do_test novell-239087 "Stable master placement" do_test master-12 "Promotion based solely on rsc_location constraints" do_test master-13 "Include preferences of colocated resources when placing master" do_test master-demote "Ordering when actions depends on demoting a slave resource" do_test master-ordering "Prevent resources from starting that need a master" do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" do_test master-group "Promotion of cloned groups" do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily" do_test master-failed-demote "Don't retry failed demote actions" do_test master-failed-demote-2 "Don't retry failed demote actions (notify=false)" do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does" do_test master-reattach "Re-attach to a running master" do_test master-allow-start "Don't include master score if it would prevent allocation" do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints" do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly" do_test master-role "Prevent target-role from promoting more than master-max instances" do_test bug-lf-2358 "Master-Master anti-colocation" do_test master-promotion-constraint "Mandatory master colocation constraints" do_test unmanaged-master "Ensure role is preserved for unmanaged resources" do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters" do_test master-demote-2 "Demote does not clear past failure" do_test master-move "Move master based on failure of colocated group" do_test master-probed-score "Observe the promotion score of probed resources" do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint" do_test colocation_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint" do_test order_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by order constraint" do_test order_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by order constraint" do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion." do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive" do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score" do_test master-demote-block "Block promotion if demote fails with on-fail=block" do_test master-dependent-ban "Don't stop instances from being active because a dependent is banned from that host" do_test master-stop "Stop instances due to location constraint with role=Started" do_test master-partially-demoted-group "Allow partially demoted group to finish demoting" do_test bug-cl-5213 "Ensure role colocation with -INFINITY is enforced" do_test bug-cl-5219 "Allow unrelated resources with a common colocation target to remain promoted" do_test master-asymmetrical-order "Fix the behaviors of multi-state resources with asymmetrical ordering" do_test master-notify "Master promotion with notifies" do_test master-score-startup "Use permanent master scores without LRM history" do_test failed-demote-recovery "Recover resource in slave role after demote fails" do_test failed-demote-recovery-master "Recover resource in master role after demote fails" echo "" do_test history-1 "Correctly parse stateful-1 resource state" echo "" do_test managed-0 "Managed (reference)" do_test managed-1 "Not managed - down " do_test managed-2 "Not managed - up " do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource" do_test bug-5028-detach "Ensure detach still works" do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged " do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged " do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged " do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged " do_test unmanaged-block-restart "Block restart of resources if any dependent resource in a group is unmanaged" echo "" do_test interleave-0 "Interleave (reference)" do_test interleave-1 "coloc - not interleaved" do_test interleave-2 "coloc - interleaved " do_test interleave-3 "coloc - interleaved (2)" do_test interleave-pseudo-stop "Interleaved clone during stonith" do_test interleave-stop "Interleaved clone during stop" do_test interleave-restart "Interleaved clone during dependency restart" echo "" do_test notify-0 "Notify reference" do_test notify-1 "Notify simple" do_test notify-2 "Notify simple, confirm" do_test notify-3 "Notify move, confirm" do_test novell-239079 "Notification priority" #do_test notify-2 "Notify - 764" do_test notifs-for-unrunnable "Don't schedule notifications for an unrunnable action" do_test route-remote-notify "Route remote notify actions through correct cluster node" echo "" do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition" do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1" do_test 696 "OSDL #696 - CRM starts stonith RA without monitor" do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop" do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3" do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1" do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id" do_test 829 "OSDL #829" do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted" do_test 994-2 "OSDL #994 - with a dependent resource" do_test 1360 "OSDL #1360 - Clone stickiness" do_test 1484 "OSDL #1484 - on_fail=stop" do_test 1494 "OSDL #1494 - Clone stability" do_test unrunnable-1 "Unrunnable" do_test unrunnable-2 "Unrunnable 2" do_test stonith-0 "Stonith loop - 1" do_test stonith-1 "Stonith loop - 2" do_test stonith-2 "Stonith loop - 3" do_test stonith-3 "Stonith startup" do_test stonith-4 "Stonith node state" do_test bug-1572-1 "Recovery of groups depending on master/slave" do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted" do_test bug-1685 "Depends-on-master ordering" do_test bug-1822 "Don't promote partially active groups" do_test bug-pm-11 "New resource added to a m/s group" do_test bug-pm-12 "Recover only the failed portion of a cloned group" do_test bug-n-387749 "Don't shuffle clone instances" do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node" do_test bug-lf-1920 "Correctly handle probes that find active resources" do_test bnc-515172 "Location constraint with multiple expressions" do_test colocate-primitive-with-clone "Optional colocation with a clone" do_test use-after-free-merge "Use-after-free in native_merge_weights" do_test bug-lf-2551 "STONITH ordering for stop" do_test bug-lf-2606 "Stonith implies demote" do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults" do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering" do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false" do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false" do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false" do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts." do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false" do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false." do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false." do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false" do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true" do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources." do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases" do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload" do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change." do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart" do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed." do_test failcount "Ensure failcounts are correctly expired" do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present" do_test per-op-failcount "Ensure per-operation failcount is handled and not passed to fence agent" do_test on-fail-ignore "Ensure on-fail=ignore works even beyond migration-threshold" do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart" do_test monitor-onfail-stop "bug-5058 - Monitor failure wiht on-fail set to stop" do_test bug-5059 "No need to restart p_stateful1:*" do_test bug-5069-op-enabled "Test on-fail=ignore with failure when monitor is enabled." do_test bug-5069-op-disabled "Test on-fail-ignore with failure when monitor is disabled." do_test obsolete-lrm-resource "cl#5115 - Do not use obsolete lrm_resource sections" do_test expire-non-blocked-failure "Ignore failure-timeout only if the failed operation has on-fail=block" do_test asymmetrical-order-move "Respect asymmetrical ordering when trying to move resources" do_test asymmetrical-order-restart "Respect asymmetrical ordering when restarting dependent resource" do_test start-then-stop-with-unfence "Avoid graph loop with start-then-stop constraint plus unfencing" do_test order-expired-failure "Order failcount cleanup after remote fencing" do_test ignore_stonith_rsc_order1 "cl#5056- Ignore order constraint between stonith and non-stonith rsc." do_test ignore_stonith_rsc_order2 "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith." do_test ignore_stonith_rsc_order3 "cl#5056- Ignore order constraint, stonith clone and mixed group" do_test ignore_stonith_rsc_order4 "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group" do_test honor_stonith_rsc_order1 "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)." do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)" do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group." do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs." do_test multiply-active-stonith do_test probe-timeout "cl#5099 - Default probe timeout" do_test concurrent-fencing "Allow performing fencing operations in parallel" echo "" do_test systemhealth1 "System Health () #1" do_test systemhealth2 "System Health () #2" do_test systemhealth3 "System Health () #3" do_test systemhealthn1 "System Health (None) #1" do_test systemhealthn2 "System Health (None) #2" do_test systemhealthn3 "System Health (None) #3" do_test systemhealthm1 "System Health (Migrate On Red) #1" do_test systemhealthm2 "System Health (Migrate On Red) #2" do_test systemhealthm3 "System Health (Migrate On Red) #3" do_test systemhealtho1 "System Health (Only Green) #1" do_test systemhealtho2 "System Health (Only Green) #2" do_test systemhealtho3 "System Health (Only Green) #3" do_test systemhealthp1 "System Health (Progessive) #1" do_test systemhealthp2 "System Health (Progessive) #2" do_test systemhealthp3 "System Health (Progessive) #3" echo "" do_test utilization "Placement Strategy - utilization" do_test minimal "Placement Strategy - minimal" do_test balanced "Placement Strategy - balanced" echo "" do_test placement-stickiness "Optimized Placement Strategy - stickiness" do_test placement-priority "Optimized Placement Strategy - priority" do_test placement-location "Optimized Placement Strategy - location" do_test placement-capacity "Optimized Placement Strategy - capacity" echo "" do_test utilization-order1 "Utilization Order - Simple" do_test utilization-order2 "Utilization Order - Complex" do_test utilization-order3 "Utilization Order - Migrate" do_test utilization-order4 "Utilization Order - Live Migration (bnc#695440)" do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3" do_test load-stopped-loop "Avoid transition loop due to load_stopped (cl#5044)" do_test load-stopped-loop-2 "cl#5235 - Prevent graph loops that can be introduced by load_stopped -> migrate_to ordering" echo "" do_test colocated-utilization-primitive-1 "Colocated Utilization - Primitive" do_test colocated-utilization-primitive-2 "Colocated Utilization - Choose the most capable node" do_test colocated-utilization-group "Colocated Utilization - Group" do_test colocated-utilization-clone "Colocated Utilization - Clone" do_test utilization-check-allowed-nodes "Only check the capacities of the nodes that can run the resource" echo "" do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources" do_test node-maintenance-1 "cl#5128 - Node maintenance" do_test node-maintenance-2 "cl#5128 - Node maintenance (coming out of maintenance mode)" do_test shutdown-maintenance-node "Do not fence a maintenance node if it shuts down cleanly" do_test rsc-maintenance "Per-resource maintenance" echo "" do_test not-installed-agent "The resource agent is missing" do_test not-installed-tools "Something the resource agent needs is missing" echo "" do_test stopped-monitor-00 "Stopped Monitor - initial start" do_test stopped-monitor-01 "Stopped Monitor - failed started" do_test stopped-monitor-02 "Stopped Monitor - started multi-up" do_test stopped-monitor-03 "Stopped Monitor - stop started" do_test stopped-monitor-04 "Stopped Monitor - failed stop" do_test stopped-monitor-05 "Stopped Monitor - start unmanaged" do_test stopped-monitor-06 "Stopped Monitor - unmanaged multi-up" do_test stopped-monitor-07 "Stopped Monitor - start unmanaged multi-up" do_test stopped-monitor-08 "Stopped Monitor - migrate" do_test stopped-monitor-09 "Stopped Monitor - unmanage started" do_test stopped-monitor-10 "Stopped Monitor - unmanaged started multi-up" do_test stopped-monitor-11 "Stopped Monitor - stop unmanaged started" do_test stopped-monitor-12 "Stopped Monitor - unmanaged started multi-up (target-role=Stopped)" do_test stopped-monitor-20 "Stopped Monitor - initial stop" do_test stopped-monitor-21 "Stopped Monitor - stopped single-up" do_test stopped-monitor-22 "Stopped Monitor - stopped multi-up" do_test stopped-monitor-23 "Stopped Monitor - start stopped" do_test stopped-monitor-24 "Stopped Monitor - unmanage stopped" do_test stopped-monitor-25 "Stopped Monitor - unmanaged stopped multi-up" do_test stopped-monitor-26 "Stopped Monitor - start unmanaged stopped" do_test stopped-monitor-27 "Stopped Monitor - unmanaged stopped multi-up (target-role=Started)" do_test stopped-monitor-30 "Stopped Monitor - new node started" do_test stopped-monitor-31 "Stopped Monitor - new node stopped" echo "" # This is a combo test to check: # - probe timeout defaults to the minimum-interval monitor's # - duplicate recurring operations are ignored # - if timeout spec is bad, the default timeout is used # - failure is blocked with on-fail=block even if ISO8601 interval is specified # - started/stopped role monitors are started/stopped on right nodes do_test intervals "Recurring monitor interval handling" echo"" do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)" do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)" do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)" do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)" do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)" do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)" do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)" do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)" do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)" do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)" do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)" do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)" do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)" do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)" do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)" do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)" do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)" do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)" do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)" do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)" do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)" do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)" do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)" do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)" do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)" do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)" do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)" do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)" do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)" do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)" do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)" do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)" do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)" do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)" do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)" do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)" do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)" do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)" do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)" do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)" do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)" do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)" do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)" do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)" do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)" do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)" do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)" do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)" do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)" do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)" do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)" do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)" do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)" do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)" do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)" do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)" do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)" do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)" do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)" do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)" do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)" do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)" do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)" do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)" do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)" do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)" do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)" do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)" do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)" do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)" do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)" do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)" do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)" do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)" do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)" do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)" do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)" do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)" do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)" do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)" do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)" do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)" do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)" do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)" do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)" do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)" do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)" do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)" do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)" do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)" do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)" do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)" do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)" echo "" do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)" do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)" do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)" do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)" do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)" do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)" do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)" do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)" do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)" do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)" do_test cluster-specific-params "Cluster-specific instance attributes based on rules" do_test site-specific-params "Site-specific instance attributes based on rules" echo "" do_test template-1 "Template - 1" do_test template-2 "Template - 2" do_test template-3 "Template - 3 (merge operations)" do_test template-coloc-1 "Template - Colocation 1" do_test template-coloc-2 "Template - Colocation 2" do_test template-coloc-3 "Template - Colocation 3" do_test template-order-1 "Template - Order 1" do_test template-order-2 "Template - Order 2" do_test template-order-3 "Template - Order 3" do_test template-ticket "Template - Ticket" do_test template-rsc-sets-1 "Template - Resource Sets 1" do_test template-rsc-sets-2 "Template - Resource Sets 2" do_test template-rsc-sets-3 "Template - Resource Sets 3" do_test template-rsc-sets-4 "Template - Resource Sets 4" do_test template-clone-primitive "Cloned primitive from template" do_test template-clone-group "Cloned group from template" do_test location-sets-templates "Resource sets and templates - Location" do_test tags-coloc-order-1 "Tags - Colocation and Order (Simple)" do_test tags-coloc-order-2 "Tags - Colocation and Order (Resource Sets with Templates)" do_test tags-location "Tags - Location" do_test tags-ticket "Tags - Ticket" echo "" do_test container-1 "Container - initial" do_test container-2 "Container - monitor failed" do_test container-3 "Container - stop failed" do_test container-4 "Container - reached migration-threshold" do_test container-group-1 "Container in group - initial" do_test container-group-2 "Container in group - monitor failed" do_test container-group-3 "Container in group - stop failed" do_test container-group-4 "Container in group - reached migration-threshold" do_test container-is-remote-node "Place resource within container when container is remote-node" do_test bug-rh-1097457 "Kill user defined container/contents ordering" do_test bug-cl-5247 "Graph loop when recovering m/s resource in a container" do_test bundle-order-startup "Bundle startup ordering" do_test bundle-order-partial-start "Bundle startup ordering when some dependancies are already running" do_test bundle-order-partial-start-2 "Bundle startup ordering when some dependancies and the container are already running" do_test bundle-order-stop "Bundle stop ordering" do_test bundle-order-partial-stop "Bundle startup ordering when some dependancies are already stopped" do_test bundle-order-stop-on-remote "Stop nested resource after bringing up the connection" do_test bundle-order-startup-clone "Prevent startup because bundle isn't promoted" do_test bundle-order-startup-clone-2 "Bundle startup with clones" do_test bundle-order-stop-clone "Stop bundle because clone is stopping" do_test bundle-nested-colocation "Colocation of nested connection resources" do_test bundle-order-fencing "Order pseudo bundle fencing after parent node fencing if both are happening" do_test bundle-probe-order-1 "order 1" do_test bundle-probe-order-2 "order 2" do_test bundle-probe-order-3 "order 3" do_test bundle-probe-remotes "Ensure remotes get probed too" do_test bundle-replicas-change "Change bundle from 1 replica to multiple" +do_test nested-remote-recovery "Recover bundle's container hosted on remote node" echo "" do_test whitebox-fail1 "Fail whitebox container rsc." do_test whitebox-fail2 "Fail cluster connection to guest node" do_test whitebox-fail3 "Failed containers should not run nested on remote nodes." do_test whitebox-start "Start whitebox container with resources assigned to it" do_test whitebox-stop "Stop whitebox container with resources assigned to it" do_test whitebox-move "Move whitebox container with resources assigned to it" do_test whitebox-asymmetric "Verify connection rsc opts-in based on container resource" do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established" do_test whitebox-ms-ordering-move "Stop/Start cycle within a moving container" do_test whitebox-orphaned "Properly shutdown orphaned whitebox container" do_test whitebox-orphan-ms "Properly tear down orphan ms resources on remote-nodes" do_test whitebox-unexpectedly-running "Recover container nodes the cluster did not start." do_test whitebox-migrate1 "Migrate both container and connection resource" do_test whitebox-imply-stop-on-fence "imply stop action on container node rsc when host node is fenced" do_test whitebox-nested-group "Verify guest remote-node works nested in a group" do_test guest-node-host-dies "Verify guest node is recovered if host goes away" do_test guest-node-cleanup "Order guest node connection recovery after container probe" echo "" do_test remote-startup-probes "Baremetal remote-node startup probes" do_test remote-startup "Startup a newly discovered remote-nodes with no status." do_test remote-fence-unclean "Fence unclean baremetal remote-node" do_test remote-fence-unclean2 "Fence baremetal remote-node after cluster node fails and connection can not be recovered" do_test remote-fence-unclean-3 "Probe failed remote nodes (triggers fencing)" do_test remote-move "Move remote-node connection resource" do_test remote-disable "Disable a baremetal remote-node" do_test remote-probe-disable "Probe then stop a baremetal remote-node" do_test remote-orphaned "Properly shutdown orphaned connection resource" do_test remote-orphaned2 "verify we can handle orphaned remote connections with active resources on the remote" do_test remote-recover "Recover connection resource after cluster-node fails." do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section" do_test remote-partial-migrate "Make sure partial migrations are handled before ops on the remote node." do_test remote-partial-migrate2 "Make sure partial migration target is prefered for remote connection." do_test remote-recover-fail "Make sure start failure causes fencing if rsc are active on remote." do_test remote-start-fail "Make sure a start failure does not result in fencing if no active resources are on remote." do_test remote-unclean2 "Make monitor failure always results in fencing, even if no rsc are active on remote." do_test remote-fence-before-reconnect "Fence before clearing recurring monitor failure" do_test remote-recovery "Recover remote connections before attempting demotion" do_test remote-recover-connection "Optimistically recovery of only the connection" do_test remote-recover-all "Fencing when the connection has no home" do_test remote-recover-no-resources "Fencing when the connection has no home and no active resources" do_test remote-recover-unknown "Fencing when the connection has no home and the remote has no operation history" do_test remote-reconnect-delay "Waiting for remote reconnect interval to expire" do_test remote-connection-unrecoverable "Remote connection host must be fenced, with connection unrecoverable" echo "" do_test resource-discovery "Exercises resource-discovery location constraint option." do_test rsc-discovery-per-node "Disable resource discovery per node" if [ $DO_VERSIONED_TESTS -eq 1 ]; then echo "" do_test versioned-resources "Start resources with #ra-version rules" do_test restart-versioned "Restart resources on #ra-version change" do_test reload-versioned "Reload resources on #ra-version change" echo "" do_test versioned-operations-1 "Use #ra-version to configure operations of native resources" do_test versioned-operations-2 "Use #ra-version to configure operations of stonith resources" do_test versioned-operations-3 "Use #ra-version to configure operations of master/slave resources" do_test versioned-operations-4 "Use #ra-version to configure operations of groups of the resources" fi echo "" test_results exit $EXITCODE diff --git a/cts/scheduler/bundle-probe-order-2.dot b/cts/scheduler/bundle-probe-order-2.dot index ee8e34209f..8f91bed9a7 100644 --- a/cts/scheduler/bundle-probe-order-2.dot +++ b/cts/scheduler/bundle-probe-order-2.dot @@ -1,8 +1,6 @@ digraph "g" { -"galera-bundle-0_monitor_30000 centos2" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-1_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-2_monitor_0 centos1" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-2_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-2_monitor_0 centos3" [ style=bold color="green" fontcolor="black"] -"galera:0_monitor_0 galera-bundle-0" [ style=bold color="green" fontcolor="black"] } diff --git a/cts/scheduler/bundle-probe-order-2.exp b/cts/scheduler/bundle-probe-order-2.exp index 8c97b05d50..d6174e74df 100644 --- a/cts/scheduler/bundle-probe-order-2.exp +++ b/cts/scheduler/bundle-probe-order-2.exp @@ -1,56 +1,38 @@ - - - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + diff --git a/cts/scheduler/bundle-probe-order-2.summary b/cts/scheduler/bundle-probe-order-2.summary index 932aa5ed4b..eb8791429a 100644 --- a/cts/scheduler/bundle-probe-order-2.summary +++ b/cts/scheduler/bundle-probe-order-2.summary @@ -1,31 +1,29 @@ Using the original execution date of: 2017-10-12 07:31:57Z Current cluster status: ContainerNode galera-bundle-0:galera-bundle-docker-0: maintenance Online: [ centos1 centos2 centos3 ] Docker container set: galera-bundle [docker.io/tripleoupstream/centos-binary-mariadb:latest] (unmanaged) galera-bundle-0 (ocf::heartbeat:galera): Stopped centos2 (unmanaged) galera-bundle-1 (ocf::heartbeat:galera): Stopped (unmanaged) galera-bundle-2 (ocf::heartbeat:galera): Stopped (unmanaged) Transition Summary: Executing cluster transition: - * Resource action: galera:0 monitor on galera-bundle-0 - * Resource action: galera-bundle-0 monitor=30000 on centos2 * Resource action: galera-bundle-docker-1 monitor on centos2 * Resource action: galera-bundle-docker-2 monitor on centos3 * Resource action: galera-bundle-docker-2 monitor on centos2 * Resource action: galera-bundle-docker-2 monitor on centos1 Using the original execution date of: 2017-10-12 07:31:57Z Revised cluster status: ContainerNode galera-bundle-0:galera-bundle-docker-0: maintenance Online: [ centos1 centos2 centos3 ] Docker container set: galera-bundle [docker.io/tripleoupstream/centos-binary-mariadb:latest] (unmanaged) galera-bundle-0 (ocf::heartbeat:galera): Stopped centos2 (unmanaged) galera-bundle-1 (ocf::heartbeat:galera): Stopped (unmanaged) galera-bundle-2 (ocf::heartbeat:galera): Stopped (unmanaged) diff --git a/cts/scheduler/nested-remote-recovery.dot b/cts/scheduler/nested-remote-recovery.dot new file mode 100644 index 0000000000..0e1f6d26d4 --- /dev/null +++ b/cts/scheduler/nested-remote-recovery.dot @@ -0,0 +1,150 @@ +digraph "g" { +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-0_monitor_30000 controller-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-0_start_0 controller-0" -> "galera-bundle-0_monitor_30000 controller-0" [ style = bold] +"galera-bundle-0_start_0 controller-0" -> "galera_monitor_10000 galera-bundle-0" [ style = bold] +"galera-bundle-0_start_0 controller-0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera-bundle-0_start_0 controller-0" -> "galera_start_0 galera-bundle-0" [ style = bold] +"galera-bundle-0_start_0 controller-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-0_stop_0 controller-0" -> "all_stopped" [ style = bold] +"galera-bundle-0_stop_0 controller-0" -> "galera-bundle-0_start_0 controller-0" [ style = bold] +"galera-bundle-0_stop_0 controller-0" -> "galera-bundle-docker-0_stop_0 database-0" [ style = bold] +"galera-bundle-0_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-0_monitor_60000 database-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-0_start_0 database-0" -> "galera-bundle-0_start_0 controller-0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" -> "galera-bundle-docker-0_monitor_60000 database-0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" -> "galera-bundle_running_0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" -> "galera_start_0 galera-bundle-0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-0_stop_0 database-0" -> "all_stopped" [ style = bold] +"galera-bundle-docker-0_stop_0 database-0" -> "galera-bundle-docker-0_start_0 database-0" [ style = bold] +"galera-bundle-docker-0_stop_0 database-0" -> "galera-bundle_stopped_0" [ style = bold] +"galera-bundle-docker-0_stop_0 database-0" -> "stonith 'reboot' galera-bundle-0" [ style = bold] +"galera-bundle-docker-0_stop_0 database-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-master_demote_0" -> "galera-bundle-master_demoted_0" [ style = bold] +"galera-bundle-master_demote_0" -> "galera_demote_0 galera-bundle-0" [ style = bold] +"galera-bundle-master_demote_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_demoted_0" -> "galera-bundle-master_promote_0" [ style = bold] +"galera-bundle-master_demoted_0" -> "galera-bundle-master_start_0" [ style = bold] +"galera-bundle-master_demoted_0" -> "galera-bundle-master_stop_0" [ style = bold] +"galera-bundle-master_demoted_0" -> "galera-bundle_demoted_0" [ style = bold] +"galera-bundle-master_demoted_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_promote_0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera-bundle-master_promote_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_promoted_0" -> "galera-bundle_promoted_0" [ style = bold] +"galera-bundle-master_promoted_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_running_0" -> "galera-bundle-master_promote_0" [ style = bold] +"galera-bundle-master_running_0" -> "galera-bundle_running_0" [ style = bold] +"galera-bundle-master_running_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_start_0" -> "galera-bundle-master_running_0" [ style = bold] +"galera-bundle-master_start_0" -> "galera_start_0 galera-bundle-0" [ style = bold] +"galera-bundle-master_start_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_stop_0" -> "galera-bundle-master_stopped_0" [ style = bold] +"galera-bundle-master_stop_0" -> "galera_stop_0 galera-bundle-0" [ style = bold] +"galera-bundle-master_stop_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_stopped_0" -> "galera-bundle-master_promote_0" [ style = bold] +"galera-bundle-master_stopped_0" -> "galera-bundle-master_start_0" [ style = bold] +"galera-bundle-master_stopped_0" -> "galera-bundle_stopped_0" [ style = bold] +"galera-bundle-master_stopped_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_demote_0" -> "galera-bundle-master_demote_0" [ style = bold] +"galera-bundle_demote_0" -> "galera-bundle_demoted_0" [ style = bold] +"galera-bundle_demote_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_demoted_0" -> "galera-bundle_promote_0" [ style = bold] +"galera-bundle_demoted_0" -> "galera-bundle_start_0" [ style = bold] +"galera-bundle_demoted_0" -> "galera-bundle_stop_0" [ style = bold] +"galera-bundle_demoted_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_promote_0" -> "galera-bundle-master_promote_0" [ style = bold] +"galera-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_running_0" -> "galera-bundle_promote_0" [ style = bold] +"galera-bundle_running_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_start_0" -> "galera-bundle-docker-0_start_0 database-0" [ style = bold] +"galera-bundle_start_0" -> "galera-bundle-master_start_0" [ style = bold] +"galera-bundle_start_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_stop_0" -> "galera-bundle-docker-0_stop_0 database-0" [ style = bold] +"galera-bundle_stop_0" -> "galera-bundle-master_stop_0" [ style = bold] +"galera-bundle_stop_0" -> "galera_stop_0 galera-bundle-0" [ style = bold] +"galera-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_stopped_0" -> "galera-bundle_promote_0" [ style = bold] +"galera-bundle_stopped_0" -> "galera-bundle_start_0" [ style = bold] +"galera-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] +"galera_demote_0 galera-bundle-0" -> "galera-bundle-master_demoted_0" [ style = bold] +"galera_demote_0 galera-bundle-0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera_demote_0 galera-bundle-0" -> "galera_stop_0 galera-bundle-0" [ style = bold] +"galera_demote_0 galera-bundle-0" [ style=bold color="green" fontcolor="orange"] +"galera_monitor_10000 galera-bundle-0" [ style=bold color="green" fontcolor="black"] +"galera_promote_0 galera-bundle-0" -> "galera-bundle-master_promoted_0" [ style = bold] +"galera_promote_0 galera-bundle-0" -> "galera_monitor_10000 galera-bundle-0" [ style = bold] +"galera_promote_0 galera-bundle-0" [ style=bold color="green" fontcolor="black"] +"galera_start_0 galera-bundle-0" -> "galera-bundle-master_running_0" [ style = bold] +"galera_start_0 galera-bundle-0" -> "galera_monitor_10000 galera-bundle-0" [ style = bold] +"galera_start_0 galera-bundle-0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera_start_0 galera-bundle-0" [ style=bold color="green" fontcolor="black"] +"galera_stop_0 galera-bundle-0" -> "all_stopped" [ style = bold] +"galera_stop_0 galera-bundle-0" -> "galera-bundle-master_stopped_0" [ style = bold] +"galera_stop_0 galera-bundle-0" -> "galera_start_0 galera-bundle-0" [ style = bold] +"galera_stop_0 galera-bundle-0" [ style=bold color="green" fontcolor="orange"] +"stonith 'reboot' galera-bundle-0" -> "galera-bundle-master_stop_0" [ style = bold] +"stonith 'reboot' galera-bundle-0" -> "stonith_complete" [ style = bold] +"stonith 'reboot' galera-bundle-0" [ style=bold color="green" fontcolor="orange"] +"stonith-fence_ipmilan-5254000203a2_monitor_60000 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254000203a2_start_0 controller-2" -> "stonith-fence_ipmilan-5254000203a2_monitor_60000 controller-2" [ style = bold] +"stonith-fence_ipmilan-5254000203a2_start_0 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254000203a2_stop_0 controller-2" -> "all_stopped" [ style = bold] +"stonith-fence_ipmilan-5254000203a2_stop_0 controller-2" -> "stonith-fence_ipmilan-5254000203a2_start_0 controller-2" [ style = bold] +"stonith-fence_ipmilan-5254000203a2_stop_0 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254002f6d57_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254002f6d57_start_0 controller-1" -> "stonith-fence_ipmilan-5254002f6d57_monitor_60000 controller-1" [ style = bold] +"stonith-fence_ipmilan-5254002f6d57_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254002f6d57_stop_0 controller-1" -> "all_stopped" [ style = bold] +"stonith-fence_ipmilan-5254002f6d57_stop_0 controller-1" -> "stonith-fence_ipmilan-5254002f6d57_start_0 controller-1" [ style = bold] +"stonith-fence_ipmilan-5254002f6d57_stop_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254003296a5_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254003296a5_start_0 controller-1" -> "stonith-fence_ipmilan-5254003296a5_monitor_60000 controller-1" [ style = bold] +"stonith-fence_ipmilan-5254003296a5_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254003296a5_stop_0 controller-1" -> "all_stopped" [ style = bold] +"stonith-fence_ipmilan-5254003296a5_stop_0 controller-1" -> "stonith-fence_ipmilan-5254003296a5_start_0 controller-1" [ style = bold] +"stonith-fence_ipmilan-5254003296a5_stop_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254005f9a33_monitor_60000 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254005f9a33_start_0 controller-2" -> "stonith-fence_ipmilan-5254005f9a33_monitor_60000 controller-2" [ style = bold] +"stonith-fence_ipmilan-5254005f9a33_start_0 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254005f9a33_stop_0 controller-2" -> "all_stopped" [ style = bold] +"stonith-fence_ipmilan-5254005f9a33_stop_0 controller-2" -> "stonith-fence_ipmilan-5254005f9a33_start_0 controller-2" [ style = bold] +"stonith-fence_ipmilan-5254005f9a33_stop_0 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-52540065418e_monitor_60000 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-52540065418e_start_0 controller-2" -> "stonith-fence_ipmilan-52540065418e_monitor_60000 controller-2" [ style = bold] +"stonith-fence_ipmilan-52540065418e_start_0 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-52540065418e_stop_0 controller-2" -> "all_stopped" [ style = bold] +"stonith-fence_ipmilan-52540065418e_stop_0 controller-2" -> "stonith-fence_ipmilan-52540065418e_start_0 controller-2" [ style = bold] +"stonith-fence_ipmilan-52540065418e_stop_0 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-52540066e27e_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-52540066e27e_start_0 controller-1" -> "stonith-fence_ipmilan-52540066e27e_monitor_60000 controller-1" [ style = bold] +"stonith-fence_ipmilan-52540066e27e_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-52540066e27e_stop_0 controller-1" -> "all_stopped" [ style = bold] +"stonith-fence_ipmilan-52540066e27e_stop_0 controller-1" -> "stonith-fence_ipmilan-52540066e27e_start_0 controller-1" [ style = bold] +"stonith-fence_ipmilan-52540066e27e_stop_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-52540098c9ff_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-52540098c9ff_start_0 controller-1" -> "stonith-fence_ipmilan-52540098c9ff_monitor_60000 controller-1" [ style = bold] +"stonith-fence_ipmilan-52540098c9ff_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-52540098c9ff_stop_0 controller-1" -> "all_stopped" [ style = bold] +"stonith-fence_ipmilan-52540098c9ff_stop_0 controller-1" -> "stonith-fence_ipmilan-52540098c9ff_start_0 controller-1" [ style = bold] +"stonith-fence_ipmilan-52540098c9ff_stop_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400a16c0d_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400a16c0d_start_0 controller-1" -> "stonith-fence_ipmilan-525400a16c0d_monitor_60000 controller-1" [ style = bold] +"stonith-fence_ipmilan-525400a16c0d_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400a16c0d_stop_0 controller-1" -> "all_stopped" [ style = bold] +"stonith-fence_ipmilan-525400a16c0d_stop_0 controller-1" -> "stonith-fence_ipmilan-525400a16c0d_start_0 controller-1" [ style = bold] +"stonith-fence_ipmilan-525400a16c0d_stop_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400aab9d9_monitor_60000 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400aab9d9_start_0 controller-2" -> "stonith-fence_ipmilan-525400aab9d9_monitor_60000 controller-2" [ style = bold] +"stonith-fence_ipmilan-525400aab9d9_start_0 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400aab9d9_stop_0 controller-2" -> "all_stopped" [ style = bold] +"stonith-fence_ipmilan-525400aab9d9_stop_0 controller-2" -> "stonith-fence_ipmilan-525400aab9d9_start_0 controller-2" [ style = bold] +"stonith-fence_ipmilan-525400aab9d9_stop_0 controller-2" [ style=bold color="green" fontcolor="black"] +"stonith_complete" -> "all_stopped" [ style = bold] +"stonith_complete" -> "galera-bundle-docker-0_start_0 database-0" [ style = bold] +"stonith_complete" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"stonith_complete" -> "galera_start_0 galera-bundle-0" [ style = bold] +"stonith_complete" [ style=bold color="green" fontcolor="orange"] +} diff --git a/cts/scheduler/nested-remote-recovery.exp b/cts/scheduler/nested-remote-recovery.exp new file mode 100644 index 0000000000..b3ac896304 --- /dev/null +++ b/cts/scheduler/nested-remote-recovery.exp @@ -0,0 +1,819 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cts/scheduler/nested-remote-recovery.scores b/cts/scheduler/nested-remote-recovery.scores new file mode 100644 index 0000000000..fb7e779ccb --- /dev/null +++ b/cts/scheduler/nested-remote-recovery.scores @@ -0,0 +1,953 @@ +Allocation scores: +Using the original execution date of: 2018-09-11 21:23:25Z +clone_color: galera-bundle-master allocation score on controller-0: -INFINITY +clone_color: galera-bundle-master allocation score on controller-1: -INFINITY +clone_color: galera-bundle-master allocation score on controller-2: -INFINITY +clone_color: galera-bundle-master allocation score on database-0: -INFINITY +clone_color: galera-bundle-master allocation score on database-1: -INFINITY +clone_color: galera-bundle-master allocation score on database-2: -INFINITY +clone_color: galera-bundle-master allocation score on galera-bundle-0: 0 +clone_color: galera-bundle-master allocation score on galera-bundle-1: 0 +clone_color: galera-bundle-master allocation score on galera-bundle-2: 0 +clone_color: galera-bundle-master allocation score on messaging-0: -INFINITY +clone_color: galera-bundle-master allocation score on messaging-1: -INFINITY +clone_color: galera-bundle-master allocation score on messaging-2: -INFINITY +clone_color: galera:0 allocation score on galera-bundle-0: INFINITY +clone_color: galera:1 allocation score on galera-bundle-1: INFINITY +clone_color: galera:2 allocation score on galera-bundle-2: INFINITY +clone_color: rabbitmq-bundle-clone allocation score on controller-0: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on controller-1: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on controller-2: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on database-0: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on database-1: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on database-2: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on messaging-0: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on messaging-1: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on messaging-2: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-0: 0 +clone_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-1: 0 +clone_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-2: 0 +clone_color: rabbitmq:0 allocation score on rabbitmq-bundle-0: INFINITY +clone_color: rabbitmq:1 allocation score on rabbitmq-bundle-1: INFINITY +clone_color: rabbitmq:2 allocation score on rabbitmq-bundle-2: INFINITY +clone_color: redis-bundle-master allocation score on controller-0: -INFINITY +clone_color: redis-bundle-master allocation score on controller-1: -INFINITY +clone_color: redis-bundle-master allocation score on controller-2: -INFINITY +clone_color: redis-bundle-master allocation score on database-0: -INFINITY +clone_color: redis-bundle-master allocation score on database-1: -INFINITY +clone_color: redis-bundle-master allocation score on database-2: -INFINITY +clone_color: redis-bundle-master allocation score on messaging-0: -INFINITY +clone_color: redis-bundle-master allocation score on messaging-1: -INFINITY +clone_color: redis-bundle-master allocation score on messaging-2: -INFINITY +clone_color: redis-bundle-master allocation score on redis-bundle-0: 0 +clone_color: redis-bundle-master allocation score on redis-bundle-1: 0 +clone_color: redis-bundle-master allocation score on redis-bundle-2: 0 +clone_color: redis:0 allocation score on redis-bundle-0: INFINITY +clone_color: redis:1 allocation score on redis-bundle-1: INFINITY +clone_color: redis:2 allocation score on redis-bundle-2: INFINITY +container_color: galera-bundle allocation score on controller-0: -INFINITY +container_color: galera-bundle allocation score on controller-1: -INFINITY +container_color: galera-bundle allocation score on controller-2: -INFINITY +container_color: galera-bundle allocation score on database-0: 0 +container_color: galera-bundle allocation score on database-1: 0 +container_color: galera-bundle allocation score on database-2: 0 +container_color: galera-bundle allocation score on messaging-0: -INFINITY +container_color: galera-bundle allocation score on messaging-1: -INFINITY +container_color: galera-bundle allocation score on messaging-2: -INFINITY +container_color: galera-bundle-0 allocation score on controller-0: INFINITY +container_color: galera-bundle-0 allocation score on controller-1: 0 +container_color: galera-bundle-0 allocation score on controller-2: 0 +container_color: galera-bundle-0 allocation score on database-0: -INFINITY +container_color: galera-bundle-0 allocation score on database-1: -INFINITY +container_color: galera-bundle-0 allocation score on database-2: -INFINITY +container_color: galera-bundle-0 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-0 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-0 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-1 allocation score on controller-0: 0 +container_color: galera-bundle-1 allocation score on controller-1: INFINITY +container_color: galera-bundle-1 allocation score on controller-2: 0 +container_color: galera-bundle-1 allocation score on database-0: -INFINITY +container_color: galera-bundle-1 allocation score on database-1: -INFINITY +container_color: galera-bundle-1 allocation score on database-2: -INFINITY +container_color: galera-bundle-1 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-1 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-1 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-2 allocation score on controller-0: 0 +container_color: galera-bundle-2 allocation score on controller-1: 0 +container_color: galera-bundle-2 allocation score on controller-2: INFINITY +container_color: galera-bundle-2 allocation score on database-0: -INFINITY +container_color: galera-bundle-2 allocation score on database-1: -INFINITY +container_color: galera-bundle-2 allocation score on database-2: -INFINITY +container_color: galera-bundle-2 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-2 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-2 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-docker-0 allocation score on controller-0: -INFINITY +container_color: galera-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: galera-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: galera-bundle-docker-0 allocation score on database-0: INFINITY +container_color: galera-bundle-docker-0 allocation score on database-1: 0 +container_color: galera-bundle-docker-0 allocation score on database-2: 0 +container_color: galera-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: galera-bundle-docker-1 allocation score on controller-1: -INFINITY +container_color: galera-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: galera-bundle-docker-1 allocation score on database-0: 0 +container_color: galera-bundle-docker-1 allocation score on database-1: INFINITY +container_color: galera-bundle-docker-1 allocation score on database-2: 0 +container_color: galera-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-docker-2 allocation score on controller-0: -INFINITY +container_color: galera-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: galera-bundle-docker-2 allocation score on controller-2: -INFINITY +container_color: galera-bundle-docker-2 allocation score on database-0: 0 +container_color: galera-bundle-docker-2 allocation score on database-1: 0 +container_color: galera-bundle-docker-2 allocation score on database-2: INFINITY +container_color: galera-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-master allocation score on controller-0: 0 +container_color: galera-bundle-master allocation score on controller-1: 0 +container_color: galera-bundle-master allocation score on controller-2: 0 +container_color: galera-bundle-master allocation score on database-0: 0 +container_color: galera-bundle-master allocation score on database-1: 0 +container_color: galera-bundle-master allocation score on database-2: 0 +container_color: galera-bundle-master allocation score on galera-bundle-0: -INFINITY +container_color: galera-bundle-master allocation score on galera-bundle-1: -INFINITY +container_color: galera-bundle-master allocation score on galera-bundle-2: -INFINITY +container_color: galera-bundle-master allocation score on messaging-0: 0 +container_color: galera-bundle-master allocation score on messaging-1: 0 +container_color: galera-bundle-master allocation score on messaging-2: 0 +container_color: galera:0 allocation score on galera-bundle-0: INFINITY +container_color: galera:1 allocation score on galera-bundle-1: INFINITY +container_color: galera:2 allocation score on galera-bundle-2: INFINITY +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: 0 +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: openstack-cinder-volume allocation score on controller-0: 0 +container_color: openstack-cinder-volume allocation score on controller-1: 0 +container_color: openstack-cinder-volume allocation score on controller-2: 0 +container_color: openstack-cinder-volume allocation score on database-0: -INFINITY +container_color: openstack-cinder-volume allocation score on database-1: -INFINITY +container_color: openstack-cinder-volume allocation score on database-2: -INFINITY +container_color: openstack-cinder-volume allocation score on messaging-0: -INFINITY +container_color: openstack-cinder-volume allocation score on messaging-1: -INFINITY +container_color: openstack-cinder-volume allocation score on messaging-2: -INFINITY +container_color: openstack-cinder-volume-docker-0 allocation score on controller-0: INFINITY +container_color: openstack-cinder-volume-docker-0 allocation score on controller-1: 0 +container_color: openstack-cinder-volume-docker-0 allocation score on controller-2: 0 +container_color: openstack-cinder-volume-docker-0 allocation score on database-0: -INFINITY +container_color: openstack-cinder-volume-docker-0 allocation score on database-1: -INFINITY +container_color: openstack-cinder-volume-docker-0 allocation score on database-2: -INFINITY +container_color: openstack-cinder-volume-docker-0 allocation score on messaging-0: -INFINITY +container_color: openstack-cinder-volume-docker-0 allocation score on messaging-1: -INFINITY +container_color: openstack-cinder-volume-docker-0 allocation score on messaging-2: -INFINITY +container_color: rabbitmq-bundle allocation score on controller-0: -INFINITY +container_color: rabbitmq-bundle allocation score on controller-1: -INFINITY +container_color: rabbitmq-bundle allocation score on controller-2: -INFINITY +container_color: rabbitmq-bundle allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle allocation score on messaging-0: 0 +container_color: rabbitmq-bundle allocation score on messaging-1: 0 +container_color: rabbitmq-bundle allocation score on messaging-2: 0 +container_color: rabbitmq-bundle-0 allocation score on controller-0: 0 +container_color: rabbitmq-bundle-0 allocation score on controller-1: 0 +container_color: rabbitmq-bundle-0 allocation score on controller-2: INFINITY +container_color: rabbitmq-bundle-0 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on messaging-0: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on messaging-1: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on messaging-2: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on controller-0: 0 +container_color: rabbitmq-bundle-1 allocation score on controller-1: INFINITY +container_color: rabbitmq-bundle-1 allocation score on controller-2: 0 +container_color: rabbitmq-bundle-1 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on messaging-0: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on messaging-1: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on messaging-2: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on controller-0: 0 +container_color: rabbitmq-bundle-2 allocation score on controller-1: INFINITY +container_color: rabbitmq-bundle-2 allocation score on controller-2: 0 +container_color: rabbitmq-bundle-2 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on messaging-0: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on messaging-1: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on messaging-2: -INFINITY +container_color: rabbitmq-bundle-clone allocation score on controller-0: 0 +container_color: rabbitmq-bundle-clone allocation score on controller-1: 0 +container_color: rabbitmq-bundle-clone allocation score on controller-2: 0 +container_color: rabbitmq-bundle-clone allocation score on database-0: 0 +container_color: rabbitmq-bundle-clone allocation score on database-1: 0 +container_color: rabbitmq-bundle-clone allocation score on database-2: 0 +container_color: rabbitmq-bundle-clone allocation score on messaging-0: 0 +container_color: rabbitmq-bundle-clone allocation score on messaging-1: 0 +container_color: rabbitmq-bundle-clone allocation score on messaging-2: 0 +container_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-0: -INFINITY +container_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-1: -INFINITY +container_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-2: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on controller-0: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on messaging-0: INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on messaging-1: 0 +container_color: rabbitmq-bundle-docker-0 allocation score on messaging-2: 0 +container_color: rabbitmq-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on controller-1: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on messaging-0: 0 +container_color: rabbitmq-bundle-docker-1 allocation score on messaging-1: INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on messaging-2: 0 +container_color: rabbitmq-bundle-docker-2 allocation score on controller-0: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on controller-2: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on messaging-0: 0 +container_color: rabbitmq-bundle-docker-2 allocation score on messaging-1: 0 +container_color: rabbitmq-bundle-docker-2 allocation score on messaging-2: INFINITY +container_color: rabbitmq:0 allocation score on rabbitmq-bundle-0: INFINITY +container_color: rabbitmq:1 allocation score on rabbitmq-bundle-1: INFINITY +container_color: rabbitmq:2 allocation score on rabbitmq-bundle-2: INFINITY +container_color: redis-bundle allocation score on controller-0: 0 +container_color: redis-bundle allocation score on controller-1: 0 +container_color: redis-bundle allocation score on controller-2: 0 +container_color: redis-bundle allocation score on database-0: -INFINITY +container_color: redis-bundle allocation score on database-1: -INFINITY +container_color: redis-bundle allocation score on database-2: -INFINITY +container_color: redis-bundle allocation score on messaging-0: -INFINITY +container_color: redis-bundle allocation score on messaging-1: -INFINITY +container_color: redis-bundle allocation score on messaging-2: -INFINITY +container_color: redis-bundle-0 allocation score on controller-0: INFINITY +container_color: redis-bundle-0 allocation score on controller-1: 0 +container_color: redis-bundle-0 allocation score on controller-2: 0 +container_color: redis-bundle-0 allocation score on database-0: -INFINITY +container_color: redis-bundle-0 allocation score on database-1: -INFINITY +container_color: redis-bundle-0 allocation score on database-2: -INFINITY +container_color: redis-bundle-0 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-0 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-0 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-1 allocation score on controller-0: 0 +container_color: redis-bundle-1 allocation score on controller-1: INFINITY +container_color: redis-bundle-1 allocation score on controller-2: 0 +container_color: redis-bundle-1 allocation score on database-0: -INFINITY +container_color: redis-bundle-1 allocation score on database-1: -INFINITY +container_color: redis-bundle-1 allocation score on database-2: -INFINITY +container_color: redis-bundle-1 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-1 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-1 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-2 allocation score on controller-0: 0 +container_color: redis-bundle-2 allocation score on controller-1: 0 +container_color: redis-bundle-2 allocation score on controller-2: INFINITY +container_color: redis-bundle-2 allocation score on database-0: -INFINITY +container_color: redis-bundle-2 allocation score on database-1: -INFINITY +container_color: redis-bundle-2 allocation score on database-2: -INFINITY +container_color: redis-bundle-2 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-2 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-2 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: redis-bundle-docker-0 allocation score on controller-1: 0 +container_color: redis-bundle-docker-0 allocation score on controller-2: 0 +container_color: redis-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: redis-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: redis-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: redis-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-docker-1 allocation score on controller-0: 0 +container_color: redis-bundle-docker-1 allocation score on controller-1: INFINITY +container_color: redis-bundle-docker-1 allocation score on controller-2: 0 +container_color: redis-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: redis-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: redis-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: redis-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-docker-2 allocation score on controller-0: 0 +container_color: redis-bundle-docker-2 allocation score on controller-1: 0 +container_color: redis-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: redis-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: redis-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: redis-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: redis-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-master allocation score on controller-0: 0 +container_color: redis-bundle-master allocation score on controller-1: 0 +container_color: redis-bundle-master allocation score on controller-2: 0 +container_color: redis-bundle-master allocation score on database-0: 0 +container_color: redis-bundle-master allocation score on database-1: 0 +container_color: redis-bundle-master allocation score on database-2: 0 +container_color: redis-bundle-master allocation score on messaging-0: 0 +container_color: redis-bundle-master allocation score on messaging-1: 0 +container_color: redis-bundle-master allocation score on messaging-2: 0 +container_color: redis-bundle-master allocation score on redis-bundle-0: -INFINITY +container_color: redis-bundle-master allocation score on redis-bundle-1: -INFINITY +container_color: redis-bundle-master allocation score on redis-bundle-2: -INFINITY +container_color: redis:0 allocation score on redis-bundle-0: INFINITY +container_color: redis:1 allocation score on redis-bundle-1: INFINITY +container_color: redis:2 allocation score on redis-bundle-2: INFINITY +galera:0 promotion score on galera-bundle-0: 100 +galera:1 promotion score on galera-bundle-1: 100 +galera:2 promotion score on galera-bundle-2: 100 +native_color: database-0 allocation score on controller-0: INFINITY +native_color: database-0 allocation score on controller-1: 0 +native_color: database-0 allocation score on controller-2: 0 +native_color: database-0 allocation score on database-0: -INFINITY +native_color: database-0 allocation score on database-1: -INFINITY +native_color: database-0 allocation score on database-2: -INFINITY +native_color: database-0 allocation score on messaging-0: -INFINITY +native_color: database-0 allocation score on messaging-1: -INFINITY +native_color: database-0 allocation score on messaging-2: -INFINITY +native_color: database-1 allocation score on controller-0: 0 +native_color: database-1 allocation score on controller-1: INFINITY +native_color: database-1 allocation score on controller-2: 0 +native_color: database-1 allocation score on database-0: -INFINITY +native_color: database-1 allocation score on database-1: -INFINITY +native_color: database-1 allocation score on database-2: -INFINITY +native_color: database-1 allocation score on messaging-0: -INFINITY +native_color: database-1 allocation score on messaging-1: -INFINITY +native_color: database-1 allocation score on messaging-2: -INFINITY +native_color: database-2 allocation score on controller-0: 0 +native_color: database-2 allocation score on controller-1: 0 +native_color: database-2 allocation score on controller-2: INFINITY +native_color: database-2 allocation score on database-0: -INFINITY +native_color: database-2 allocation score on database-1: -INFINITY +native_color: database-2 allocation score on database-2: -INFINITY +native_color: database-2 allocation score on messaging-0: -INFINITY +native_color: database-2 allocation score on messaging-1: -INFINITY +native_color: database-2 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-0 allocation score on controller-0: INFINITY +native_color: galera-bundle-0 allocation score on controller-1: -INFINITY +native_color: galera-bundle-0 allocation score on controller-2: -INFINITY +native_color: galera-bundle-0 allocation score on database-0: -INFINITY +native_color: galera-bundle-0 allocation score on database-1: -INFINITY +native_color: galera-bundle-0 allocation score on database-2: -INFINITY +native_color: galera-bundle-0 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-0 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-0 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-1 allocation score on controller-0: -INFINITY +native_color: galera-bundle-1 allocation score on controller-1: INFINITY +native_color: galera-bundle-1 allocation score on controller-2: -INFINITY +native_color: galera-bundle-1 allocation score on database-0: -INFINITY +native_color: galera-bundle-1 allocation score on database-1: -INFINITY +native_color: galera-bundle-1 allocation score on database-2: -INFINITY +native_color: galera-bundle-1 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-1 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-1 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-2 allocation score on controller-0: -INFINITY +native_color: galera-bundle-2 allocation score on controller-1: -INFINITY +native_color: galera-bundle-2 allocation score on controller-2: INFINITY +native_color: galera-bundle-2 allocation score on database-0: -INFINITY +native_color: galera-bundle-2 allocation score on database-1: -INFINITY +native_color: galera-bundle-2 allocation score on database-2: -INFINITY +native_color: galera-bundle-2 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-2 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-2 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-docker-0 allocation score on controller-0: -INFINITY +native_color: galera-bundle-docker-0 allocation score on controller-1: -INFINITY +native_color: galera-bundle-docker-0 allocation score on controller-2: -INFINITY +native_color: galera-bundle-docker-0 allocation score on database-0: INFINITY +native_color: galera-bundle-docker-0 allocation score on database-1: -INFINITY +native_color: galera-bundle-docker-0 allocation score on database-2: -INFINITY +native_color: galera-bundle-docker-0 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-docker-0 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-docker-0 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-docker-1 allocation score on controller-0: -INFINITY +native_color: galera-bundle-docker-1 allocation score on controller-1: -INFINITY +native_color: galera-bundle-docker-1 allocation score on controller-2: -INFINITY +native_color: galera-bundle-docker-1 allocation score on database-0: -10000 +native_color: galera-bundle-docker-1 allocation score on database-1: INFINITY +native_color: galera-bundle-docker-1 allocation score on database-2: -10000 +native_color: galera-bundle-docker-1 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-docker-1 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-docker-1 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-docker-2 allocation score on controller-0: -INFINITY +native_color: galera-bundle-docker-2 allocation score on controller-1: -INFINITY +native_color: galera-bundle-docker-2 allocation score on controller-2: -INFINITY +native_color: galera-bundle-docker-2 allocation score on database-0: -10000 +native_color: galera-bundle-docker-2 allocation score on database-1: -INFINITY +native_color: galera-bundle-docker-2 allocation score on database-2: INFINITY +native_color: galera-bundle-docker-2 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-docker-2 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-docker-2 allocation score on messaging-2: -INFINITY +native_color: galera:0 allocation score on galera-bundle-0: INFINITY +native_color: galera:1 allocation score on galera-bundle-1: INFINITY +native_color: galera:2 allocation score on galera-bundle-2: INFINITY +native_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +native_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 +native_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY +native_color: haproxy-bundle-docker-1 allocation score on controller-2: INFINITY +native_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +native_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +native_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +native_color: ip-10.0.0.109 allocation score on controller-0: 0 +native_color: ip-10.0.0.109 allocation score on controller-1: 0 +native_color: ip-10.0.0.109 allocation score on controller-2: INFINITY +native_color: ip-10.0.0.109 allocation score on database-0: -INFINITY +native_color: ip-10.0.0.109 allocation score on database-1: -INFINITY +native_color: ip-10.0.0.109 allocation score on database-2: -INFINITY +native_color: ip-10.0.0.109 allocation score on messaging-0: -INFINITY +native_color: ip-10.0.0.109 allocation score on messaging-1: -INFINITY +native_color: ip-10.0.0.109 allocation score on messaging-2: -INFINITY +native_color: ip-172.17.1.12 allocation score on controller-0: 0 +native_color: ip-172.17.1.12 allocation score on controller-1: 0 +native_color: ip-172.17.1.12 allocation score on controller-2: INFINITY +native_color: ip-172.17.1.12 allocation score on database-0: -INFINITY +native_color: ip-172.17.1.12 allocation score on database-1: -INFINITY +native_color: ip-172.17.1.12 allocation score on database-2: -INFINITY +native_color: ip-172.17.1.12 allocation score on messaging-0: -INFINITY +native_color: ip-172.17.1.12 allocation score on messaging-1: -INFINITY +native_color: ip-172.17.1.12 allocation score on messaging-2: -INFINITY +native_color: ip-172.17.1.18 allocation score on controller-0: 0 +native_color: ip-172.17.1.18 allocation score on controller-1: INFINITY +native_color: ip-172.17.1.18 allocation score on controller-2: 0 +native_color: ip-172.17.1.18 allocation score on database-0: -INFINITY +native_color: ip-172.17.1.18 allocation score on database-1: -INFINITY +native_color: ip-172.17.1.18 allocation score on database-2: -INFINITY +native_color: ip-172.17.1.18 allocation score on messaging-0: -INFINITY +native_color: ip-172.17.1.18 allocation score on messaging-1: -INFINITY +native_color: ip-172.17.1.18 allocation score on messaging-2: -INFINITY +native_color: ip-172.17.3.18 allocation score on controller-0: 0 +native_color: ip-172.17.3.18 allocation score on controller-1: INFINITY +native_color: ip-172.17.3.18 allocation score on controller-2: 0 +native_color: ip-172.17.3.18 allocation score on database-0: -INFINITY +native_color: ip-172.17.3.18 allocation score on database-1: -INFINITY +native_color: ip-172.17.3.18 allocation score on database-2: -INFINITY +native_color: ip-172.17.3.18 allocation score on messaging-0: -INFINITY +native_color: ip-172.17.3.18 allocation score on messaging-1: -INFINITY +native_color: ip-172.17.3.18 allocation score on messaging-2: -INFINITY +native_color: ip-172.17.4.14 allocation score on controller-0: 0 +native_color: ip-172.17.4.14 allocation score on controller-1: INFINITY +native_color: ip-172.17.4.14 allocation score on controller-2: 0 +native_color: ip-172.17.4.14 allocation score on database-0: -INFINITY +native_color: ip-172.17.4.14 allocation score on database-1: -INFINITY +native_color: ip-172.17.4.14 allocation score on database-2: -INFINITY +native_color: ip-172.17.4.14 allocation score on messaging-0: -INFINITY +native_color: ip-172.17.4.14 allocation score on messaging-1: -INFINITY +native_color: ip-172.17.4.14 allocation score on messaging-2: -INFINITY +native_color: ip-192.168.24.12 allocation score on controller-0: 0 +native_color: ip-192.168.24.12 allocation score on controller-1: INFINITY +native_color: ip-192.168.24.12 allocation score on controller-2: 0 +native_color: ip-192.168.24.12 allocation score on database-0: -INFINITY +native_color: ip-192.168.24.12 allocation score on database-1: -INFINITY +native_color: ip-192.168.24.12 allocation score on database-2: -INFINITY +native_color: ip-192.168.24.12 allocation score on messaging-0: -INFINITY +native_color: ip-192.168.24.12 allocation score on messaging-1: -INFINITY +native_color: ip-192.168.24.12 allocation score on messaging-2: -INFINITY +native_color: messaging-0 allocation score on controller-0: 0 +native_color: messaging-0 allocation score on controller-1: 0 +native_color: messaging-0 allocation score on controller-2: INFINITY +native_color: messaging-0 allocation score on database-0: -INFINITY +native_color: messaging-0 allocation score on database-1: -INFINITY +native_color: messaging-0 allocation score on database-2: -INFINITY +native_color: messaging-0 allocation score on messaging-0: -INFINITY +native_color: messaging-0 allocation score on messaging-1: -INFINITY +native_color: messaging-0 allocation score on messaging-2: -INFINITY +native_color: messaging-1 allocation score on controller-0: 0 +native_color: messaging-1 allocation score on controller-1: INFINITY +native_color: messaging-1 allocation score on controller-2: 0 +native_color: messaging-1 allocation score on database-0: -INFINITY +native_color: messaging-1 allocation score on database-1: -INFINITY +native_color: messaging-1 allocation score on database-2: -INFINITY +native_color: messaging-1 allocation score on messaging-0: -INFINITY +native_color: messaging-1 allocation score on messaging-1: -INFINITY +native_color: messaging-1 allocation score on messaging-2: -INFINITY +native_color: messaging-2 allocation score on controller-0: 0 +native_color: messaging-2 allocation score on controller-1: INFINITY +native_color: messaging-2 allocation score on controller-2: 0 +native_color: messaging-2 allocation score on database-0: -INFINITY +native_color: messaging-2 allocation score on database-1: -INFINITY +native_color: messaging-2 allocation score on database-2: -INFINITY +native_color: messaging-2 allocation score on messaging-0: -INFINITY +native_color: messaging-2 allocation score on messaging-1: -INFINITY +native_color: messaging-2 allocation score on messaging-2: -INFINITY +native_color: openstack-cinder-volume-docker-0 allocation score on controller-0: INFINITY +native_color: openstack-cinder-volume-docker-0 allocation score on controller-1: 0 +native_color: openstack-cinder-volume-docker-0 allocation score on controller-2: 0 +native_color: openstack-cinder-volume-docker-0 allocation score on database-0: -INFINITY +native_color: openstack-cinder-volume-docker-0 allocation score on database-1: -INFINITY +native_color: openstack-cinder-volume-docker-0 allocation score on database-2: -INFINITY +native_color: openstack-cinder-volume-docker-0 allocation score on messaging-0: -INFINITY +native_color: openstack-cinder-volume-docker-0 allocation score on messaging-1: -INFINITY +native_color: openstack-cinder-volume-docker-0 allocation score on messaging-2: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on controller-2: INFINITY +native_color: rabbitmq-bundle-0 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on messaging-1: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on messaging-2: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on controller-1: INFINITY +native_color: rabbitmq-bundle-1 allocation score on controller-2: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on messaging-1: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on messaging-2: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on controller-1: INFINITY +native_color: rabbitmq-bundle-2 allocation score on controller-2: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on messaging-1: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on messaging-2: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on controller-2: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on messaging-0: INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on messaging-1: -10000 +native_color: rabbitmq-bundle-docker-0 allocation score on messaging-2: -10000 +native_color: rabbitmq-bundle-docker-1 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on controller-2: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on messaging-1: INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on messaging-2: -10000 +native_color: rabbitmq-bundle-docker-2 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on controller-2: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on messaging-1: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on messaging-2: INFINITY +native_color: rabbitmq:0 allocation score on rabbitmq-bundle-0: INFINITY +native_color: rabbitmq:1 allocation score on rabbitmq-bundle-1: INFINITY +native_color: rabbitmq:2 allocation score on rabbitmq-bundle-2: INFINITY +native_color: redis-bundle-0 allocation score on controller-0: INFINITY +native_color: redis-bundle-0 allocation score on controller-1: 0 +native_color: redis-bundle-0 allocation score on controller-2: 0 +native_color: redis-bundle-0 allocation score on database-0: -INFINITY +native_color: redis-bundle-0 allocation score on database-1: -INFINITY +native_color: redis-bundle-0 allocation score on database-2: -INFINITY +native_color: redis-bundle-0 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-0 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-0 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-1 allocation score on controller-0: 0 +native_color: redis-bundle-1 allocation score on controller-1: INFINITY +native_color: redis-bundle-1 allocation score on controller-2: 0 +native_color: redis-bundle-1 allocation score on database-0: -INFINITY +native_color: redis-bundle-1 allocation score on database-1: -INFINITY +native_color: redis-bundle-1 allocation score on database-2: -INFINITY +native_color: redis-bundle-1 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-1 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-1 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-2 allocation score on controller-0: 0 +native_color: redis-bundle-2 allocation score on controller-1: 0 +native_color: redis-bundle-2 allocation score on controller-2: INFINITY +native_color: redis-bundle-2 allocation score on database-0: -INFINITY +native_color: redis-bundle-2 allocation score on database-1: -INFINITY +native_color: redis-bundle-2 allocation score on database-2: -INFINITY +native_color: redis-bundle-2 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-2 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-2 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-docker-0 allocation score on controller-0: INFINITY +native_color: redis-bundle-docker-0 allocation score on controller-1: 0 +native_color: redis-bundle-docker-0 allocation score on controller-2: 0 +native_color: redis-bundle-docker-0 allocation score on database-0: -INFINITY +native_color: redis-bundle-docker-0 allocation score on database-1: -INFINITY +native_color: redis-bundle-docker-0 allocation score on database-2: -INFINITY +native_color: redis-bundle-docker-0 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-docker-0 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-docker-0 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-docker-1 allocation score on controller-0: -INFINITY +native_color: redis-bundle-docker-1 allocation score on controller-1: INFINITY +native_color: redis-bundle-docker-1 allocation score on controller-2: 0 +native_color: redis-bundle-docker-1 allocation score on database-0: -INFINITY +native_color: redis-bundle-docker-1 allocation score on database-1: -INFINITY +native_color: redis-bundle-docker-1 allocation score on database-2: -INFINITY +native_color: redis-bundle-docker-1 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-docker-1 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-docker-1 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-docker-2 allocation score on controller-0: -INFINITY +native_color: redis-bundle-docker-2 allocation score on controller-1: -INFINITY +native_color: redis-bundle-docker-2 allocation score on controller-2: INFINITY +native_color: redis-bundle-docker-2 allocation score on database-0: -INFINITY +native_color: redis-bundle-docker-2 allocation score on database-1: -INFINITY +native_color: redis-bundle-docker-2 allocation score on database-2: -INFINITY +native_color: redis-bundle-docker-2 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-docker-2 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-docker-2 allocation score on messaging-2: -INFINITY +native_color: redis:0 allocation score on redis-bundle-0: INFINITY +native_color: redis:1 allocation score on redis-bundle-1: INFINITY +native_color: redis:2 allocation score on redis-bundle-2: INFINITY +native_color: stonith-fence_ipmilan-5254000203a2 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-5254000203a2 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-5254000203a2 allocation score on controller-2: INFINITY +native_color: stonith-fence_ipmilan-5254000203a2 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-5254000203a2 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-5254000203a2 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-5254000203a2 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-5254000203a2 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-5254000203a2 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-5254002f6d57 allocation score on controller-0: -INFINITY +native_color: stonith-fence_ipmilan-5254002f6d57 allocation score on controller-1: INFINITY +native_color: stonith-fence_ipmilan-5254002f6d57 allocation score on controller-2: 0 +native_color: stonith-fence_ipmilan-5254002f6d57 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-5254002f6d57 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-5254002f6d57 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-5254002f6d57 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-5254002f6d57 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-5254002f6d57 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-5254003296a5 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-5254003296a5 allocation score on controller-1: INFINITY +native_color: stonith-fence_ipmilan-5254003296a5 allocation score on controller-2: 0 +native_color: stonith-fence_ipmilan-5254003296a5 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-5254003296a5 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-5254003296a5 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-5254003296a5 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-5254003296a5 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-5254003296a5 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-5254005f9a33 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-5254005f9a33 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-5254005f9a33 allocation score on controller-2: INFINITY +native_color: stonith-fence_ipmilan-5254005f9a33 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-5254005f9a33 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-5254005f9a33 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-5254005f9a33 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-5254005f9a33 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-5254005f9a33 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-52540065418e allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-52540065418e allocation score on controller-1: -INFINITY +native_color: stonith-fence_ipmilan-52540065418e allocation score on controller-2: INFINITY +native_color: stonith-fence_ipmilan-52540065418e allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-52540065418e allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-52540065418e allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-52540065418e allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-52540065418e allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-52540065418e allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-52540066e27e allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-52540066e27e allocation score on controller-1: INFINITY +native_color: stonith-fence_ipmilan-52540066e27e allocation score on controller-2: 0 +native_color: stonith-fence_ipmilan-52540066e27e allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-52540066e27e allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-52540066e27e allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-52540066e27e allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-52540066e27e allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-52540066e27e allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-52540098c9ff allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-52540098c9ff allocation score on controller-1: INFINITY +native_color: stonith-fence_ipmilan-52540098c9ff allocation score on controller-2: 0 +native_color: stonith-fence_ipmilan-52540098c9ff allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-52540098c9ff allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-52540098c9ff allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-52540098c9ff allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-52540098c9ff allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-52540098c9ff allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-525400a16c0d allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-525400a16c0d allocation score on controller-1: INFINITY +native_color: stonith-fence_ipmilan-525400a16c0d allocation score on controller-2: -INFINITY +native_color: stonith-fence_ipmilan-525400a16c0d allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400a16c0d allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400a16c0d allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400a16c0d allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400a16c0d allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400a16c0d allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-525400aab9d9 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-525400aab9d9 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-525400aab9d9 allocation score on controller-2: INFINITY +native_color: stonith-fence_ipmilan-525400aab9d9 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400aab9d9 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400aab9d9 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400aab9d9 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400aab9d9 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400aab9d9 allocation score on messaging-2: -INFINITY +redis:0 promotion score on redis-bundle-0: 1 +redis:1 promotion score on redis-bundle-1: 1 +redis:2 promotion score on redis-bundle-2: 1 diff --git a/cts/scheduler/nested-remote-recovery.summary b/cts/scheduler/nested-remote-recovery.summary new file mode 100644 index 0000000000..8356f486e0 --- /dev/null +++ b/cts/scheduler/nested-remote-recovery.summary @@ -0,0 +1,167 @@ +Using the original execution date of: 2018-09-11 21:23:25Z + +Current cluster status: +Online: [ controller-0 controller-1 controller-2 ] +RemoteOnline: [ database-0 database-1 database-2 messaging-0 messaging-1 messaging-2 ] +Containers: [ galera-bundle-1:galera-bundle-docker-1 galera-bundle-2:galera-bundle-docker-2 rabbitmq-bundle-0:rabbitmq-bundle-docker-0 rabbitmq-bundle-1:rabbitmq-bundle-docker-1 rabbitmq-bundle-2:rabbitmq-bundle-docker-2 redis-bundle-0:redis-bundle-docker-0 redis-bundle-1:redis-bundle-docker-1 redis-bundle-2:redis-bundle-docker-2 ] + + database-0 (ocf::pacemaker:remote): Started controller-0 + database-1 (ocf::pacemaker:remote): Started controller-1 + database-2 (ocf::pacemaker:remote): Started controller-2 + messaging-0 (ocf::pacemaker:remote): Started controller-2 + messaging-1 (ocf::pacemaker:remote): Started controller-1 + messaging-2 (ocf::pacemaker:remote): Started controller-1 + Docker container set: galera-bundle [192.168.24.1:8787/rhosp13/openstack-mariadb:pcmklatest] + galera-bundle-0 (ocf::heartbeat:galera): FAILED Master database-0 + galera-bundle-1 (ocf::heartbeat:galera): Master database-1 + galera-bundle-2 (ocf::heartbeat:galera): Master database-2 + Docker container set: rabbitmq-bundle [192.168.24.1:8787/rhosp13/openstack-rabbitmq:pcmklatest] + rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started messaging-0 + rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started messaging-1 + rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started messaging-2 + Docker container set: redis-bundle [192.168.24.1:8787/rhosp13/openstack-redis:pcmklatest] + redis-bundle-0 (ocf::heartbeat:redis): Slave controller-0 + redis-bundle-1 (ocf::heartbeat:redis): Master controller-1 + redis-bundle-2 (ocf::heartbeat:redis): Slave controller-2 + ip-192.168.24.12 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-10.0.0.109 (ocf::heartbeat:IPaddr2): Started controller-2 + ip-172.17.1.18 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-172.17.1.12 (ocf::heartbeat:IPaddr2): Started controller-2 + ip-172.17.3.18 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-172.17.4.14 (ocf::heartbeat:IPaddr2): Started controller-1 + Docker container set: haproxy-bundle [192.168.24.1:8787/rhosp13/openstack-haproxy:pcmklatest] + haproxy-bundle-docker-0 (ocf::heartbeat:docker): Started controller-0 + haproxy-bundle-docker-1 (ocf::heartbeat:docker): Started controller-1 + haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started controller-2 + Docker container: openstack-cinder-volume [192.168.24.1:8787/rhosp13/openstack-cinder-volume:pcmklatest] + openstack-cinder-volume-docker-0 (ocf::heartbeat:docker): Started controller-0 + stonith-fence_ipmilan-5254005f9a33 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-52540098c9ff (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-5254000203a2 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-5254003296a5 (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-52540066e27e (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-52540065418e (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400aab9d9 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400a16c0d (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-5254002f6d57 (stonith:fence_ipmilan): Started controller-1 + +Transition Summary: + * Fence (reboot) galera-bundle-0 (resource: galera-bundle-docker-0) 'guest is unclean' + * Recover galera-bundle-docker-0 ( database-0 ) + * Recover galera-bundle-0 ( controller-0 ) + * Recover galera:0 ( Master galera-bundle-0 ) + * Restart stonith-fence_ipmilan-5254005f9a33 ( controller-2 ) due to resource definition change + * Restart stonith-fence_ipmilan-52540098c9ff ( controller-1 ) due to resource definition change + * Restart stonith-fence_ipmilan-5254000203a2 ( controller-2 ) due to resource definition change + * Restart stonith-fence_ipmilan-5254003296a5 ( controller-1 ) due to resource definition change + * Restart stonith-fence_ipmilan-52540066e27e ( controller-1 ) due to resource definition change + * Restart stonith-fence_ipmilan-52540065418e ( controller-2 ) due to resource definition change + * Restart stonith-fence_ipmilan-525400aab9d9 ( controller-2 ) due to resource definition change + * Restart stonith-fence_ipmilan-525400a16c0d ( controller-1 ) due to resource definition change + * Restart stonith-fence_ipmilan-5254002f6d57 ( controller-1 ) due to resource definition change + +Executing cluster transition: + * Resource action: galera-bundle-0 stop on controller-0 + * Resource action: stonith-fence_ipmilan-5254005f9a33 stop on controller-2 + * Resource action: stonith-fence_ipmilan-5254005f9a33 start on controller-2 + * Resource action: stonith-fence_ipmilan-5254005f9a33 monitor=60000 on controller-2 + * Resource action: stonith-fence_ipmilan-52540098c9ff stop on controller-1 + * Resource action: stonith-fence_ipmilan-52540098c9ff start on controller-1 + * Resource action: stonith-fence_ipmilan-52540098c9ff monitor=60000 on controller-1 + * Resource action: stonith-fence_ipmilan-5254000203a2 stop on controller-2 + * Resource action: stonith-fence_ipmilan-5254000203a2 start on controller-2 + * Resource action: stonith-fence_ipmilan-5254000203a2 monitor=60000 on controller-2 + * Resource action: stonith-fence_ipmilan-5254003296a5 stop on controller-1 + * Resource action: stonith-fence_ipmilan-5254003296a5 start on controller-1 + * Resource action: stonith-fence_ipmilan-5254003296a5 monitor=60000 on controller-1 + * Resource action: stonith-fence_ipmilan-52540066e27e stop on controller-1 + * Resource action: stonith-fence_ipmilan-52540066e27e start on controller-1 + * Resource action: stonith-fence_ipmilan-52540066e27e monitor=60000 on controller-1 + * Resource action: stonith-fence_ipmilan-52540065418e stop on controller-2 + * Resource action: stonith-fence_ipmilan-52540065418e start on controller-2 + * Resource action: stonith-fence_ipmilan-52540065418e monitor=60000 on controller-2 + * Resource action: stonith-fence_ipmilan-525400aab9d9 stop on controller-2 + * Resource action: stonith-fence_ipmilan-525400aab9d9 start on controller-2 + * Resource action: stonith-fence_ipmilan-525400aab9d9 monitor=60000 on controller-2 + * Resource action: stonith-fence_ipmilan-525400a16c0d stop on controller-1 + * Resource action: stonith-fence_ipmilan-525400a16c0d start on controller-1 + * Resource action: stonith-fence_ipmilan-525400a16c0d monitor=60000 on controller-1 + * Resource action: stonith-fence_ipmilan-5254002f6d57 stop on controller-1 + * Resource action: stonith-fence_ipmilan-5254002f6d57 start on controller-1 + * Resource action: stonith-fence_ipmilan-5254002f6d57 monitor=60000 on controller-1 + * Pseudo action: galera-bundle_demote_0 + * Pseudo action: galera-bundle-master_demote_0 + * Pseudo action: galera_demote_0 + * Pseudo action: galera-bundle-master_demoted_0 + * Pseudo action: galera-bundle_demoted_0 + * Pseudo action: galera-bundle_stop_0 + * Resource action: galera-bundle-docker-0 stop on database-0 + * Pseudo action: stonith-galera-bundle-0-reboot on galera-bundle-0 + * Pseudo action: stonith_complete + * Pseudo action: galera-bundle-master_stop_0 + * Pseudo action: galera_stop_0 + * Pseudo action: galera-bundle-master_stopped_0 + * Pseudo action: galera-bundle_stopped_0 + * Pseudo action: galera-bundle_start_0 + * Pseudo action: all_stopped + * Pseudo action: galera-bundle-master_start_0 + * Resource action: galera-bundle-docker-0 start on database-0 + * Resource action: galera-bundle-docker-0 monitor=60000 on database-0 + * Resource action: galera-bundle-0 start on controller-0 + * Resource action: galera-bundle-0 monitor=30000 on controller-0 + * Resource action: galera start on galera-bundle-0 + * Pseudo action: galera-bundle-master_running_0 + * Pseudo action: galera-bundle_running_0 + * Pseudo action: galera-bundle_promote_0 + * Pseudo action: galera-bundle-master_promote_0 + * Resource action: galera promote on galera-bundle-0 + * Pseudo action: galera-bundle-master_promoted_0 + * Pseudo action: galera-bundle_promoted_0 + * Resource action: galera monitor=10000 on galera-bundle-0 +Using the original execution date of: 2018-09-11 21:23:25Z + +Revised cluster status: +Online: [ controller-0 controller-1 controller-2 ] +RemoteOnline: [ database-0 database-1 database-2 messaging-0 messaging-1 messaging-2 ] +Containers: [ galera-bundle-0:galera-bundle-docker-0 galera-bundle-1:galera-bundle-docker-1 galera-bundle-2:galera-bundle-docker-2 rabbitmq-bundle-0:rabbitmq-bundle-docker-0 rabbitmq-bundle-1:rabbitmq-bundle-docker-1 rabbitmq-bundle-2:rabbitmq-bundle-docker-2 redis-bundle-0:redis-bundle-docker-0 redis-bundle-1:redis-bundle-docker-1 redis-bundle-2:redis-bundle-docker-2 ] + + database-0 (ocf::pacemaker:remote): Started controller-0 + database-1 (ocf::pacemaker:remote): Started controller-1 + database-2 (ocf::pacemaker:remote): Started controller-2 + messaging-0 (ocf::pacemaker:remote): Started controller-2 + messaging-1 (ocf::pacemaker:remote): Started controller-1 + messaging-2 (ocf::pacemaker:remote): Started controller-1 + Docker container set: galera-bundle [192.168.24.1:8787/rhosp13/openstack-mariadb:pcmklatest] + galera-bundle-0 (ocf::heartbeat:galera): Master database-0 + galera-bundle-1 (ocf::heartbeat:galera): Master database-1 + galera-bundle-2 (ocf::heartbeat:galera): Master database-2 + Docker container set: rabbitmq-bundle [192.168.24.1:8787/rhosp13/openstack-rabbitmq:pcmklatest] + rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started messaging-0 + rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started messaging-1 + rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started messaging-2 + Docker container set: redis-bundle [192.168.24.1:8787/rhosp13/openstack-redis:pcmklatest] + redis-bundle-0 (ocf::heartbeat:redis): Slave controller-0 + redis-bundle-1 (ocf::heartbeat:redis): Master controller-1 + redis-bundle-2 (ocf::heartbeat:redis): Slave controller-2 + ip-192.168.24.12 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-10.0.0.109 (ocf::heartbeat:IPaddr2): Started controller-2 + ip-172.17.1.18 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-172.17.1.12 (ocf::heartbeat:IPaddr2): Started controller-2 + ip-172.17.3.18 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-172.17.4.14 (ocf::heartbeat:IPaddr2): Started controller-1 + Docker container set: haproxy-bundle [192.168.24.1:8787/rhosp13/openstack-haproxy:pcmklatest] + haproxy-bundle-docker-0 (ocf::heartbeat:docker): Started controller-0 + haproxy-bundle-docker-1 (ocf::heartbeat:docker): Started controller-1 + haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started controller-2 + Docker container: openstack-cinder-volume [192.168.24.1:8787/rhosp13/openstack-cinder-volume:pcmklatest] + openstack-cinder-volume-docker-0 (ocf::heartbeat:docker): Started controller-0 + stonith-fence_ipmilan-5254005f9a33 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-52540098c9ff (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-5254000203a2 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-5254003296a5 (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-52540066e27e (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-52540065418e (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400aab9d9 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400a16c0d (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-5254002f6d57 (stonith:fence_ipmilan): Started controller-1 + diff --git a/cts/scheduler/nested-remote-recovery.xml b/cts/scheduler/nested-remote-recovery.xml new file mode 100644 index 0000000000..c15d1f0067 --- /dev/null +++ b/cts/scheduler/nested-remote-recovery.xml @@ -0,0 +1,1273 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/daemons/schedulerd/sched_allocate.c b/daemons/schedulerd/sched_allocate.c index 17d9e026d4..c22b29c821 100644 --- a/daemons/schedulerd/sched_allocate.c +++ b/daemons/schedulerd/sched_allocate.c @@ -1,2498 +1,2566 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_allocate); void set_alloc_actions(pe_working_set_t * data_set); extern void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set); extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); static void apply_remote_node_ordering(pe_working_set_t *data_set); static enum remote_connection_state get_remote_node_state(pe_node_t *node); enum remote_connection_state { remote_state_unknown = 0, remote_state_alive = 1, remote_state_resting = 2, remote_state_failed = 3, remote_state_stopped = 4 }; static const char * state2text(enum remote_connection_state state) { switch (state) { case remote_state_unknown: return "unknown"; case remote_state_alive: return "alive"; case remote_state_resting: return "resting"; case remote_state_failed: return "failed"; case remote_state_stopped: return "stopped"; } return "impossible"; } resource_alloc_functions_t resource_class_alloc_functions[] = { { native_merge_weights, native_color, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_location, native_action_flags, native_update_actions, native_expand, native_append_meta, }, { group_merge_weights, group_color, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_location, group_action_flags, group_update_actions, group_expand, group_append_meta, }, { clone_merge_weights, clone_color, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_location, clone_action_flags, container_update_actions, clone_expand, clone_append_meta, }, { container_merge_weights, container_color, container_create_actions, container_create_probe, container_internal_constraints, container_rsc_colocation_lh, container_rsc_colocation_rh, container_rsc_location, container_action_flags, container_update_actions, container_expand, container_append_meta, } }; gboolean update_action_flags(action_t * action, enum pe_action_flags flags, const char *source, int line) { static unsigned long calls = 0; gboolean changed = FALSE; gboolean clear = is_set(flags, pe_action_clear); enum pe_action_flags last = action->flags; if (clear) { action->flags = crm_clear_bit(source, line, action->uuid, action->flags, flags); } else { action->flags = crm_set_bit(source, line, action->uuid, action->flags, flags); } if (last != action->flags) { calls++; changed = TRUE; /* Useful for tracking down _who_ changed a specific flag */ /* CRM_ASSERT(calls != 534); */ clear_bit(flags, pe_action_clear); crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)", action->uuid, action->node ? action->node->details->uname : "[none]", clear ? "un-" : "", flags, last, action->flags, calls, source); } return changed; } static gboolean check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry, gboolean active_here, pe_working_set_t * data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; gboolean changed = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (; attr_lpc < DIMOF(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if (value == old_value /* i.e. NULL */ || crm_str_eq(value, old_value, TRUE)) { continue; } changed = TRUE; trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set); if (active_here) { force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } } if (force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); set_bit(rsc->flags, pe_rsc_start_pending); delete_resource = TRUE; } else if (changed) { delete_resource = TRUE; } return delete_resource; } static void CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node, const char *reason, pe_working_set_t * data_set) { guint interval_ms = 0; action_t *cancel = NULL; const char *task = NULL; const char *call_id = NULL; const char *interval_ms_s = NULL; CRM_CHECK(xml_op != NULL, return); CRM_CHECK(active_node != NULL, return); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); interval_ms = crm_parse_ms(interval_ms_s); crm_info("Action " CRM_OP_FMT " on %s will be stopped: %s", rsc->id, task, interval_ms, active_node->details->uname, (reason? reason : "unknown")); cancel = pe_cancel_op(rsc, task, interval_ms, active_node, data_set); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); } static gboolean check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; guint interval_ms = 0; const char *interval_ms_s = NULL; const op_digest_cache_t *digest_data = NULL; gboolean did_change = FALSE; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_secure = NULL; CRM_CHECK(active_node != NULL, return FALSE); interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); interval_ms = crm_parse_ms(interval_ms_s); if (interval_ms > 0) { xmlNode *op_match = NULL; /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) { CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set); free(key); return TRUE; } else if (op_match == NULL) { pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname); free(key); return TRUE; } free(key); key = NULL; } crm_trace("Testing " CRM_OP_FMT " on %s", rsc->id, task, interval_ms, active_node->details->uname); if ((interval_ms == 0) && safe_str_eq(task, RSC_STATUS)) { /* Reload based on the start action not a probe */ task = RSC_START; } else if ((interval_ms == 0) && safe_str_eq(task, RSC_MIGRATED)) { /* Reload based on the start action not a migrate */ task = RSC_START; } else if ((interval_ms == 0) && safe_str_eq(task, RSC_PROMOTE)) { /* Reload based on the start action not a promote */ task = RSC_START; } digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set); if(is_set(data_set->flags, pe_flag_sanitized)) { digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST); } if(digest_data->rc != RSC_DIGEST_MATCH && digest_secure && digest_data->digest_secure_calc && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) { if (is_set(data_set->flags, pe_flag_stdout)) { printf("Only 'private' parameters to " CRM_OP_FMT " on %s changed: %s\n", rsc->id, task, interval_ms, active_node->details->uname, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); } } else if (digest_data->rc == RSC_DIGEST_RESTART) { /* Changes that force a restart */ pe_action_t *required = NULL; did_change = TRUE; key = generate_op_key(rsc->id, task, interval_ms); crm_log_xml_info(digest_data->params_restart, "params:restart"); required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL, "resource definition change", pe_action_optional, TRUE); trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set); } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) { /* Changes that can potentially be handled by a reload */ const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); did_change = TRUE; trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set); crm_log_xml_info(digest_data->params_all, "params:reload"); key = generate_op_key(rsc->id, task, interval_ms); if (interval_ms > 0) { action_t *op = NULL; #if 0 /* Always reload/restart the entire resource */ ReloadRsc(rsc, active_node, data_set); #else /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */ op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set); set_bit(op->flags, pe_action_reschedule); #endif } else if (digest_restart) { pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id); /* Reload this resource */ ReloadRsc(rsc, active_node, data_set); free(key); } else { pe_action_t *required = NULL; pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id); /* Re-send the start/demote/promote op * Recurring ops will be detected independently */ required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL, "resource definition change", pe_action_optional, TRUE); } } return did_change; } +/*! + * \internal + * \brief Do deferred action checks after allocation + * + * \param[in] data_set Working set for cluster + */ +static void +check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op, + enum pe_check_parameters check, pe_working_set_t *data_set) +{ + const char *reason = NULL; + op_digest_cache_t *digest_data = NULL; + + switch (check) { + case pe_check_active: + if (check_action_definition(rsc, node, rsc_op, data_set) + && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, + data_set)) { + + reason = "action definition changed"; + } + break; + + case pe_check_last_failure: + digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set); + switch (digest_data->rc) { + case RSC_DIGEST_UNKNOWN: + crm_trace("Resource %s history entry %s on %s has no digest to compare", + rsc->id, ID(rsc_op), node->details->id); + break; + case RSC_DIGEST_MATCH: + break; + default: + reason = "resource parameters have changed"; + break; + } + break; + } + + if (reason) { + pe__clear_failcount(rsc, node, reason, data_set); + } +} + static void check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GListPtr gIter = NULL; int offset = -1; guint interval_ms = 0; int stop_index = 0; int start_index = 0; const char *task = NULL; const char *interval_ms_s = NULL; xmlNode *rsc_op = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; CRM_CHECK(node != NULL, return); if (is_set(rsc->flags, pe_rsc_orphan)) { resource_t *parent = uber_parent(rsc); if(parent == NULL || pe_rsc_is_clone(parent) == FALSE || is_set(parent->flags, pe_rsc_unique)) { pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); DeleteRsc(rsc, node, FALSE, data_set); } else { pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id); } return; } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname); if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; offset++; if (start_index < stop_index) { /* stopped */ continue; } else if (offset < start_index) { /* action occurred prior to a start */ continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS); interval_ms = crm_parse_ms(interval_ms_s); if ((interval_ms > 0) && (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) { // Maintenance mode cancels recurring operations CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); } else if ((interval_ms > 0) || safe_str_eq(task, RSC_STATUS) || safe_str_eq(task, RSC_START) || safe_str_eq(task, RSC_PROMOTE) || safe_str_eq(task, RSC_MIGRATED)) { + /* If a resource operation failed, and the operation's definition * has changed, clear any fail count so they can be retried fresh. */ - if (check_action_definition(rsc, node, rsc_op, data_set) + + if (container_fix_remote_addr(rsc)) { + /* We haven't allocated resources to nodes yet, so if the + * REMOTE_CONTAINER_HACK is used, we may calculate the digest + * based on the literal "#uname" value rather than the properly + * substituted value. That would mistakenly make the action + * definition appear to have been changed. Defer the check until + * later in this case. + */ + pe__add_param_check(rsc_op, rsc, node, pe_check_active, + data_set); + + } else if (check_action_definition(rsc, node, rsc_op, data_set) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe__clear_failcount(rsc, node, "action definition changed", data_set); } } } g_list_free(sorted_op_list); } static GListPtr find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones, gboolean partial, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean match = FALSE; if (id == NULL) { return NULL; } else if (rsc == NULL && data_set) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } return result; } else if (rsc == NULL) { return NULL; } if (partial) { if (strstr(rsc->id, id)) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match) { result = g_list_prepend(result, rsc); } if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } } return result; } static void check_actions(pe_working_set_t * data_set) { const char *id = NULL; node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if (node == NULL) { continue; /* Still need to check actions for a maintenance node to cancel existing monitor operations */ } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) { crm_trace("Skipping param check for %s: can't run resources", node->details->uname); continue; } crm_trace("Processing node %s", node->details->uname); if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rscs); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { if (xml_has_children(rsc_entry)) { GListPtr gIter = NULL; GListPtr result = NULL; const char *rsc_id = ID(rsc_entry); CRM_CHECK(rsc_id != NULL, return); result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set); for (gIter = result; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->variant != pe_native) { continue; } check_actions_for(rsc_entry, rsc, node, data_set); } g_list_free(result); } } } } } } } static gboolean apply_placement_constraints(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying constraints..."); for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { pe__location_t *cons = gIter->data; cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); } return TRUE; } static gboolean failcount_clear_action_exists(node_t * node, resource_t * rsc) { gboolean rc = FALSE; char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); GListPtr list = find_actions_exact(rsc->actions, key, node); if (list) { rc = TRUE; } g_list_free(list); free(key); return rc; } /*! * \internal * \brief Force resource away if failures hit migration threshold * * \param[in,out] rsc Resource to check for failures * \param[in,out] node Node to check for failures * \param[in,out] data_set Cluster working set to update */ static void check_migration_threshold(resource_t *rsc, node_t *node, pe_working_set_t *data_set) { int fail_count, countdown; resource_t *failed; /* Migration threshold of 0 means never force away */ if (rsc->migration_threshold == 0) { return; } // If we're ignoring failures, also ignore the migration threshold if (is_set(rsc->flags, pe_rsc_failure_ignored)) { return; } /* If there are no failures, there's no need to force away */ fail_count = pe_get_failcount(node, rsc, NULL, pe_fc_effective|pe_fc_fillers, NULL, data_set); if (fail_count <= 0) { return; } /* How many more times recovery will be tried on this node */ countdown = QB_MAX(rsc->migration_threshold - fail_count, 0); /* If failed resource has a parent, we'll force the parent away */ failed = rsc; if (is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if (countdown == 0) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_info("%s can fail %d more times on %s before being forced off", failed->id, countdown, node->details->uname); } } static void common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; common_apply_stickiness(child_rsc, node, data_set); } return; } if (is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) { node_t *current = pe_find_node_id(rsc->running_on, node->details->id); node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (current == NULL) { } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) { resource_t *sticky_rsc = rsc; resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { GHashTableIter iter; node_t *nIter = NULL; pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric" " and node %s is not explicitly allowed", rsc->id, node->details->uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) { crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight); } } } /* Check the migration threshold only if a failcount clear action * has not already been placed for this resource on the node. * There is no sense in potentially forcing the resource from this - * node if the failcount is being reset anyway. */ + * node if the failcount is being reset anyway. + * + * @TODO A clear_failcount operation can be scheduled in stage4() via + * check_actions_for(), or in stage5() via check_params(). This runs in + * stage2(), so it cannot detect those, meaning we might check the migration + * threshold when we shouldn't -- worst case, we stop or move the resource, + * then move it back next transition. + */ if (failcount_clear_action_exists(node, rsc) == FALSE) { check_migration_threshold(rsc, node, data_set); } } void complex_set_cmds(resource_t * rsc) { GListPtr gIter = rsc->children; rsc->cmds = &resource_class_alloc_functions[rsc->variant]; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; complex_set_cmds(child_rsc); } } void set_alloc_actions(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; complex_set_cmds(rsc); } } static void calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data) { const char *key = (const char *)gKey; const char *value = (const char *)gValue; int *system_health = (int *)user_data; if (!gKey || !gValue || !user_data) { return; } if (crm_starts_with(key, "#health")) { int score; /* Convert the value into an integer */ score = char2score(value); /* Add it to the running total */ *system_health = merge_weights(score, *system_health); } } static gboolean apply_system_health(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy"); int base_health = 0; if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) { /* Prevent any accidental health -> score translation */ node_score_red = 0; node_score_yellow = 0; node_score_green = 0; return TRUE; } else if (safe_str_eq(health_strategy, "migrate-on-red")) { /* Resources on nodes which have health values of red are * weighted away from that node. */ node_score_red = -INFINITY; node_score_yellow = 0; node_score_green = 0; } else if (safe_str_eq(health_strategy, "only-green")) { /* Resources on nodes which have health values of red or yellow * are forced away from that node. */ node_score_red = -INFINITY; node_score_yellow = -INFINITY; node_score_green = 0; } else if (safe_str_eq(health_strategy, "progressive")) { /* Same as the above, but use the r/y/g scores provided by the user * Defaults are provided by the pe_prefs table * Also, custom health "base score" can be used */ base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0"); } else if (safe_str_eq(health_strategy, "custom")) { /* Requires the admin to configure the rsc_location constaints for * processing the stored health scores */ /* TODO: Check for the existence of appropriate node health constraints */ return TRUE; } else { crm_err("Unknown node health strategy: %s", health_strategy); return FALSE; } crm_info("Applying automated node health strategy: %s", health_strategy); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int system_health = base_health; node_t *node = (node_t *) gIter->data; /* Search through the node hash table for system health entries. */ g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health); crm_info(" Node %s has an combined system health of %d", node->details->uname, system_health); /* If the health is non-zero, then create a new rsc2node so that the * weight will be added later on. */ if (system_health != 0) { GListPtr gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set); } } } return TRUE; } gboolean stage0(pe_working_set_t * data_set) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); if (data_set->input == NULL) { return FALSE; } if (is_set(data_set->flags, pe_flag_have_status) == FALSE) { crm_trace("Calculating status"); cluster_status(data_set); } set_alloc_actions(data_set); apply_system_health(data_set); unpack_constraints(cib_constraints, data_set); return TRUE; } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t * data_set) { action_t *probe_node_complete = NULL; for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED); if (node->details->online == FALSE) { if (is_baremetal_remote_node(node) && node->details->remote_rsc && (get_remote_node_state(node) == remote_state_failed)) { pe_fence_node(data_set, node, "the connection is unrecoverable"); } continue; } else if (node->details->unclean) { continue; } else if (node->details->rsc_discovery_enabled == FALSE) { /* resource discovery is disabled for this node */ continue; } if (probed != NULL && crm_is_true(probed) == FALSE) { action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname), CRM_OP_REPROBE, node, FALSE, TRUE, data_set); add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); continue; } for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set); } } return TRUE; } static void rsc_discover_filter(resource_t *rsc, node_t *node) { GListPtr gIter = rsc->children; resource_t *top = uber_parent(rsc); node_t *match; if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) { return; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_discover_filter(child_rsc, node); } match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match && match->rsc_discover_mode != pe_discover_exclusive) { match->weight = -INFINITY; } } /* * Count how many valid nodes we have (so we know the maximum number of * colors we can resolve). * * Apply node constraints (i.e. filter the "allowed_nodes" part of resources) */ gboolean stage2(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying placement constraints"); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node == NULL) { /* error */ } else if (node->weight >= 0.0 /* global weight */ && node->details->online && node->details->type != node_ping) { data_set->max_valid_nodes++; } } apply_placement_constraints(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { GListPtr gIter2 = NULL; node_t *node = (node_t *) gIter->data; gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; common_apply_stickiness(rsc, node, data_set); rsc_discover_filter(rsc, node); } } return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->internal_constraints(rsc, data_set); } return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t * data_set) { check_actions(data_set); return TRUE; } static gint sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) { int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; const char *reason = "existence"; const GListPtr nodes = (GListPtr) data; resource_t *resource1 = (resource_t *) convert_const_pointer(a); resource_t *resource2 = (resource_t *) convert_const_pointer(b); node_t *r1_node = NULL; node_t *r2_node = NULL; GListPtr gIter = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; if (a == NULL && b == NULL) { goto done; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "no node list"; if (nodes == NULL) { goto done; } r1_nodes = rsc_merge_weights(resource1, resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource1->id, r1_nodes); r2_nodes = rsc_merge_weights(resource2, resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource2->id, r2_nodes); /* Current location score */ reason = "current location"; r1_weight = -INFINITY; r2_weight = -INFINITY; if (resource1->running_on) { r1_node = pe__current_node(resource1); r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id); if (r1_node != NULL) { r1_weight = r1_node->weight; } } if (resource2->running_on) { r2_node = pe__current_node(resource2); r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id); if (r2_node != NULL) { r2_weight = r2_node->weight; } } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "score"; for (gIter = nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; r1_node = NULL; r2_node = NULL; r1_weight = -INFINITY; if (r1_nodes) { r1_node = g_hash_table_lookup(r1_nodes, node->details->id); } if (r1_node) { r1_weight = r1_node->weight; } r2_weight = -INFINITY; if (r2_nodes) { r2_node = g_hash_table_lookup(r2_nodes, node->details->id); } if (r2_node) { r2_weight = r2_node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: crm_trace("%s (%d) on %s %c %s (%d) on %s: %s", resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a", rc < 0 ? '>' : rc > 0 ? '<' : '=', resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason); if (r1_nodes) { g_hash_table_destroy(r1_nodes); } if (r2_nodes) { g_hash_table_destroy(r2_nodes); } return rc; } static void allocate_resources(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (is_set(data_set->flags, pe_flag_have_remote_nodes)) { /* Force remote connection resources to be allocated first. This * also forces any colocation dependencies to be allocated as well */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->is_remote_node == FALSE) { continue; } pe_rsc_trace(rsc, "Allocating: %s", rsc->id); /* For remote node connection resources, always prefer the partial * migration target during resource allocation, if the rsc is in the * middle of a migration. */ rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set); } } /* now do the rest of the resources */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->is_remote_node == TRUE) { continue; } pe_rsc_trace(rsc, "Allocating: %s", rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } } /* We always use pe_order_preserve with these convenience functions to exempt * internally generated constraints from the prohibition of user constraints * involving remote connection resources. * * The start ordering additionally uses pe_order_runnable_left so that the * specified action is not runnable if the start is not runnable. */ static inline void order_start_then_action(resource_t *lh_rsc, action_t *rh_action, enum pe_ordering extra, pe_working_set_t *data_set) { if (lh_rsc && rh_action && data_set) { custom_action_order(lh_rsc, start_key(lh_rsc), NULL, rh_action->rsc, NULL, rh_action, pe_order_preserve | pe_order_runnable_left | extra, data_set); } } static inline void order_action_then_stop(action_t *lh_action, resource_t *rh_rsc, enum pe_ordering extra, pe_working_set_t *data_set) { if (lh_action && rh_rsc && data_set) { custom_action_order(lh_action->rsc, NULL, lh_action, rh_rsc, stop_key(rh_rsc), NULL, pe_order_preserve | extra, data_set); } } static void cleanup_orphans(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { return; } /* Don't recurse into ->children, those are just unallocated clone instances */ if(is_not_set(rsc->flags, pe_rsc_orphan)) { return; } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node->details->online && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe_action_t *clear_op = NULL; clear_op = pe__clear_failcount(rsc, node, "it is orphaned", data_set); /* We can't use order_action_then_stop() here because its * pe_order_preserve breaks things */ custom_action_order(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pe_order_optional, data_set); } } } gboolean stage5(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (safe_str_neq(data_set->placement_strategy, "default")) { GListPtr nodes = g_list_copy(data_set->nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); data_set->resources = g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes); g_list_free(nodes); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node); } crm_trace("Allocating services"); /* Take (next) highest resource, assign it and create its actions */ allocate_resources(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node); } + // Process deferred action checks + pe__foreach_param_check(data_set, check_params); + pe__free_param_checks(data_set); + if (is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Calculating needed probes"); /* This code probably needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=100: With probes: ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints 36s ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph Without probes: ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph */ probe_resources(data_set); } crm_trace("Handle orphans"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; cleanup_orphans(rsc, data_set); } crm_trace("Creating actions"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->create_actions(rsc, data_set); } crm_trace("Creating done"); return TRUE; } static gboolean is_managed(const resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; if (is_managed(child_rsc)) { return TRUE; } } return FALSE; } static gboolean any_managed_resources(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (is_managed(rsc)) { return TRUE; } } return FALSE; } /*! * \internal * \brief Create pseudo-op for guest node fence, and order relative to it * * \param[in] node Guest node to fence * \param[in] done STONITH_DONE operation * \param[in] data_set Working set of CIB state */ static void fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set) { resource_t *container = node->details->remote_rsc->container; pe_action_t *stop = NULL; pe_action_t *stonith_op = NULL; /* The fence action is just a label; we don't do anything differently for * off vs. reboot. We specify it explicitly, rather than let it default to * cluster's default action, because we are not _initiating_ fencing -- we * are creating a pseudo-event to describe fencing that is already occurring * by other means (container recovery). */ const char *fence_action = "off"; /* Check whether guest's container resource has any explicit stop or * start (the stop may be implied by fencing of the guest's host). */ if (container) { stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL); if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) { fence_action = "reboot"; } } /* Create a fence pseudo-event, so we have an event to order actions * against, and the controller can always detect it. */ stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", data_set); update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable, __FUNCTION__, __LINE__); /* We want to imply stops/demotes after the guest is stopped, not wait until * it is restarted, so we always order pseudo-fencing after stop, not start * (even though start might be closer to what is done for a real reboot). */ if(stop && is_set(stop->flags, pe_action_pseudo)) { pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, data_set); crm_info("Implying guest node %s is down (action %d) after %s fencing", node->details->uname, stonith_op->id, stop->node->details->uname); order_actions(parent_stonith_op, stonith_op, pe_order_runnable_left|pe_order_implies_then); } else if (stop) { order_actions(stop, stonith_op, pe_order_runnable_left|pe_order_implies_then); crm_info("Implying guest node %s is down (action %d) " "after container %s is stopped (action %d)", node->details->uname, stonith_op->id, container->id, stop->id); } else { /* If we're fencing the guest node but there's no stop for the guest * resource, we must think the guest is already stopped. However, we may * think so because its resource history was just cleaned. To avoid * unnecessarily considering the guest node down if it's really up, * order the pseudo-fencing after any stop of the connection resource, * which will be ordered after any container (re-)probe. */ stop = find_first_action(node->details->remote_rsc->actions, NULL, RSC_STOP, NULL); if (stop) { order_actions(stop, stonith_op, pe_order_optional); crm_info("Implying guest node %s is down (action %d) " "after connection is stopped (action %d)", node->details->uname, stonith_op->id, stop->id); } else { /* Not sure why we're fencing, but everything must already be * cleanly stopped. */ crm_info("Implying guest node %s is down (action %d) ", node->details->uname, stonith_op->id); } } /* Order/imply other actions relative to pseudo-fence as with real fence */ stonith_constraints(node, stonith_op, data_set); if(done) { order_actions(stonith_op, done, pe_order_implies_then); } } /* * Create dependencies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t * data_set) { action_t *dc_down = NULL; action_t *dc_fence = NULL; action_t *stonith_op = NULL; action_t *last_stonith = NULL; gboolean integrity_lost = FALSE; action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *done = get_pseudo_op(STONITH_DONE, data_set); gboolean need_stonith = TRUE; GListPtr gIter; GListPtr stonith_ops = NULL; /* Remote ordering constraints need to happen prior to calculate * fencing because it is one more place we will mark the node as * dirty. * * A nice side-effect of doing it first is that we can remove a * bunch of special logic from apply_*_ordering() because its * already part of pe_fence_node() */ crm_trace("Creating remote ordering constraints"); apply_remote_node_ordering(data_set); crm_trace("Processing fencing and shutdown cases"); if (any_managed_resources(data_set) == FALSE) { crm_notice("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } /* Check each node for stonith/shutdown */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (is_container_remote_node(node)) { if (node->details->remote_requires_reset && need_stonith) { fence_guest(node, done, data_set); } continue; } stonith_op = NULL; if (node->details->unclean && need_stonith && pe_can_fence(data_set, node)) { stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", data_set); pe_warn("Scheduling Node %s for STONITH", node->details->uname); stonith_constraints(node, stonith_op, data_set); if (node->details->is_dc) { dc_down = stonith_op; dc_fence = stonith_op; } else if (is_set(data_set->flags, pe_flag_concurrent_fencing) == FALSE) { if (last_stonith) { order_actions(last_stonith, stonith_op, pe_order_optional); } last_stonith = stonith_op; } else { order_actions(stonith_op, done, pe_order_implies_then); stonith_ops = g_list_append(stonith_ops, stonith_op); } } else if (node->details->online && node->details->shutdown && /* TODO define what a shutdown op means for a remote node. * For now we do not send shutdown operations for remote nodes, but * if we can come up with a good use for this in the future, we will. */ is_remote_node(node) == FALSE) { action_t *down_op = NULL; crm_notice("Scheduling Node %s for shutdown", node->details->uname); down_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN, node->details->uname), CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set); shutdown_constraints(node, down_op, data_set); add_hash_param(down_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); if (node->details->is_dc) { dc_down = down_op; } } if (node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } } if (integrity_lost) { if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no-quorum-policy is set to ignore)"); } } if (dc_down != NULL) { GListPtr gIter = NULL; crm_trace("Ordering shutdowns before %s on %s (DC)", dc_down->task, dc_down->node->details->uname); add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *node_stop = (action_t *) gIter->data; if (safe_str_neq(CRM_OP_SHUTDOWN, node_stop->task)) { continue; } else if (node_stop->node->details->is_dc) { continue; } crm_debug("Ordering shutdown on %s before %s on %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_optional); } if (last_stonith) { if (dc_down != last_stonith) { order_actions(last_stonith, dc_down, pe_order_optional); } } else { GListPtr gIter2 = NULL; for (gIter2 = stonith_ops; gIter2 != NULL; gIter2 = gIter2->next) { stonith_op = (action_t *) gIter2->data; if (dc_down != stonith_op) { order_actions(stonith_op, dc_down, pe_order_optional); } } } } if (dc_fence) { order_actions(dc_down, done, pe_order_implies_then); } else if (last_stonith) { order_actions(last_stonith, done, pe_order_implies_then); } order_actions(done, all_stopped, pe_order_implies_then); g_list_free(stonith_ops); return TRUE; } /* * Determine the sets of independent actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ static GListPtr find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if (list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *task = NULL; guint interval_ms = 0; if (parse_op_key(original_key, NULL, &task, &interval_ms)) { key = generate_op_key(rsc->id, task, interval_ms); list = find_actions(actions, key, NULL); } else { crm_err("search key: %s", original_key); } free(key); free(task); } return list; } static void rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc, pe__ordering_t *order) { GListPtr gIter = NULL; GListPtr rh_actions = NULL; action_t *rh_action = NULL; enum pe_ordering type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); type = order->type; rh_action = order->rh_action; crm_trace("Processing RH of ordering constraint %d", order->id); if (rh_action != NULL) { rh_actions = g_list_prepend(NULL, rh_action); } else if (rsc != NULL) { rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task); } if (rh_actions == NULL) { pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id, order->rh_action_task); if (lh_action) { pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid); } return; } if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) { pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid, order->rh_action_task); clear_bit(type, pe_order_implies_then); } gIter = rh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *rh_action_iter = (action_t *) gIter->data; if (lh_action) { order_actions(lh_action, rh_action_iter, type); } else if (type & pe_order_implies_then) { update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type); } } g_list_free(rh_actions); } static void rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order, pe_working_set_t *data_set) { GListPtr gIter = NULL; GListPtr lh_actions = NULL; action_t *lh_action = order->lh_action; resource_t *rh_rsc = order->rh_rsc; crm_trace("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if (lh_action != NULL) { lh_actions = g_list_prepend(NULL, lh_action); } else { lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task); } if (lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *op_type = NULL; guint interval_ms = 0; parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms); key = generate_op_key(lh_rsc->id, op_type, interval_ms); if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else { pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); lh_actions = g_list_prepend(NULL, lh_action); } free(op_type); } gIter = lh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *lh_action_iter = (action_t *) gIter->data; if (rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if (rh_rsc) { rsc_order_then(lh_action_iter, rh_rsc, order); } else if (order->rh_action) { order_actions(lh_action_iter, order->rh_action, order->type); } } g_list_free(lh_actions); } extern void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set); static int is_recurring_action(action_t *action) { const char *interval_ms_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); guint interval_ms = crm_parse_ms(interval_ms_s); return (interval_ms > 0); } static void apply_container_ordering(action_t *action, pe_working_set_t *data_set) { /* VMs are also classified as containers for these purposes... in * that they both involve a 'thing' running on a real or remote * cluster node. * * This allows us to be smarter about the type and extent of * recovery actions required in various scenarios */ resource_t *remote_rsc = NULL; resource_t *container = NULL; enum action_tasks task = text2task(action->task); CRM_ASSERT(action->rsc); CRM_ASSERT(action->node); CRM_ASSERT(is_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); container = remote_rsc->container; CRM_ASSERT(container); if(is_set(container->flags, pe_rsc_failed)) { pe_fence_node(data_set, action->node, "container failed"); } crm_trace("Order %s action %s relative to %s%s for %s%s", action->task, action->uuid, is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, is_set(container->flags, pe_rsc_failed)? "failed " : "", container->id); if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE) || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: /* Force resource recovery if the container is recovered */ order_start_then_action(container, action, pe_order_implies_then, data_set); /* Wait for the connection resource to be up too */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); break; case stop_rsc: case action_demote: if (is_set(container->flags, pe_rsc_failed)) { /* When the container representing a guest node fails, any stop * or demote actions for resources running on the guest node * are implied by the container stopping. This is similar to * how fencing operations work for cluster nodes and remote * nodes. */ } else { /* Ensure the operation happens before the connection is brought * down. * * If we really wanted to, we could order these after the * connection start, IFF the container's current role was * stopped (otherwise we re-introduce an ordering loop when the * connection is restarting). */ order_action_then_stop(action, remote_rsc, pe_order_none, data_set); } break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ if(task != no_action) { order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; } } static enum remote_connection_state get_remote_node_state(pe_node_t *node) { resource_t *remote_rsc = NULL; node_t *cluster_node = NULL; CRM_ASSERT(node); remote_rsc = node->details->remote_rsc; CRM_ASSERT(remote_rsc); cluster_node = pe__current_node(remote_rsc); /* If the cluster node the remote connection resource resides on * is unclean or went offline, we can't process any operations * on that remote node until after it starts elsewhere. */ if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) { /* The connection resource is not going to run anywhere */ if (cluster_node && cluster_node->details->unclean) { /* The remote connection is failed because its resource is on a * failed node and can't be recovered elsewhere, so we must fence. */ return remote_state_failed; } if (is_not_set(remote_rsc->flags, pe_rsc_failed)) { /* Connection resource is cleanly stopped */ return remote_state_stopped; } /* Connection resource is failed */ if ((remote_rsc->next_role == RSC_ROLE_STOPPED) && remote_rsc->remote_reconnect_ms && node->details->remote_was_fenced) { /* We won't know whether the connection is recoverable until the * reconnect interval expires and we reattempt connection. */ return remote_state_unknown; } /* The remote connection is in a failed state. If there are any * resources known to be active on it (stop) or in an unknown state * (probe), we must assume the worst and fence it. */ return remote_state_failed; } else if (cluster_node == NULL) { /* Connection is recoverable but not currently running anywhere, see if we can recover it first */ return remote_state_unknown; } else if(cluster_node->details->unclean == TRUE || cluster_node->details->online == FALSE) { /* Connection is running on a dead node, see if we can recover it first */ return remote_state_resting; } else if (g_list_length(remote_rsc->running_on) > 1 && remote_rsc->partial_migration_source && remote_rsc->partial_migration_target) { /* We're in the middle of migrating a connection resource, * wait until after the resource migrates before performing * any actions. */ return remote_state_resting; } return remote_state_alive; } /*! * \internal * \brief Order actions on remote node relative to actions for the connection */ static void apply_remote_ordering(action_t *action, pe_working_set_t *data_set) { resource_t *remote_rsc = NULL; enum action_tasks task = text2task(action->task); enum remote_connection_state state = get_remote_node_state(action->node); enum pe_ordering order_opts = pe_order_none; if (action->rsc == NULL) { return; } CRM_ASSERT(action->node); CRM_ASSERT(is_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, state2text(state)); if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE) || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: order_opts = pe_order_none; if (state == remote_state_failed) { /* Force recovery, by making this action required */ order_opts |= pe_order_implies_then; } /* Ensure connection is up before running this action */ order_start_then_action(remote_rsc, action, order_opts, data_set); break; case stop_rsc: if(state == remote_state_alive) { order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else if(state == remote_state_failed) { /* We would only be here if the resource is * running on the remote node. Since we have no * way to stop it, it is necessary to fence the * node. */ pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable"); order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) { /* State must be remote_state_unknown or remote_state_stopped. * Since the connection is not coming back up in this * transition, stop this resource first. */ order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else { /* The connection is going to be started somewhere else, so * stop this resource after that completes. */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; case action_demote: /* Only order this demote relative to the connection start if the * connection isn't being torn down. Otherwise, the demote would be * blocked because the connection start would not be allowed. */ if(state == remote_state_resting || state == remote_state_unknown) { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } /* Otherwise we can rely on the stop ordering */ break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } else { node_t *cluster_node = pe__current_node(remote_rsc); if(task == monitor_rsc && state == remote_state_failed) { /* We would only be here if we do not know the * state of the resource on the remote node. * Since we have no way to find out, it is * necessary to fence the node. */ pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable"); } if(cluster_node && state == remote_state_stopped) { /* The connection is currently up, but is going * down permanently. * * Make sure we check services are actually * stopped _before_ we let the connection get * closed */ order_action_then_stop(action, remote_rsc, pe_order_runnable_left, data_set); } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } } break; } } static void apply_remote_node_ordering(pe_working_set_t *data_set) { if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { return; } for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; resource_t *remote = NULL; // We are only interested in resource actions if (action->rsc == NULL) { continue; } /* Special case: If we are clearing the failcount of an actual * remote connection resource, then make sure this happens before * any start of the resource in this transition. */ if (action->rsc->is_remote_node && safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) { custom_action_order(action->rsc, NULL, action, action->rsc, generate_op_key(action->rsc->id, RSC_START, 0), NULL, pe_order_optional, data_set); continue; } // We are only interested in actions allocated to a node if (action->node == NULL) { continue; } if (is_remote_node(action->node) == FALSE) { continue; } /* We are only interested in real actions. * * @TODO This is probably wrong; pseudo-actions might be converted to * real actions and vice versa later in update_actions() at the end of * stage7(). */ if (is_set(action->flags, pe_action_pseudo)) { continue; } remote = action->node->details->remote_rsc; if (remote == NULL) { // Orphaned continue; } /* The action occurs across a remote connection, so create * ordering constraints that guarantee the action occurs while the node * is active (after start, before stop ... things like that). * * This is somewhat brittle in that we need to make sure the results of * this ordering are compatible with the result of get_router_node(). * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part * of this logic rather than action2xml(). */ if (remote->container) { crm_trace("Container ordering for %s", action->uuid); apply_container_ordering(action, data_set); } else { crm_trace("Remote ordering for %s", action->uuid); apply_remote_ordering(action, data_set); } } } static void order_probes(pe_working_set_t * data_set) { #if 0 GListPtr gIter = NULL; for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; /* Given "A then B", we would prefer to wait for A to be * started before probing B. * * If A was a filesystem on which the binaries and data for B * lived, it would have been useful if the author of B's agent * could assume that A is running before B.monitor will be * called. * * However we can't _only_ probe once A is running, otherwise * we'd not detect the state of B if A could not be started * for some reason. * * In practice however, we cannot even do an opportunistic * version of this because B may be moving: * * B.probe -> B.start * B.probe -> B.stop * B.stop -> B.start * A.stop -> A.start * A.start -> B.probe * * So far so good, but if we add the result of this code: * * B.stop -> A.stop * * Then we get a loop: * * B.probe -> B.stop -> A.stop -> A.start -> B.probe * * We could kill the 'B.probe -> B.stop' dependency, but that * could mean stopping B "too" soon, because B.start must wait * for the probes to complete. * * Another option is to allow it only if A is a non-unique * clone with clone-max == node-max (since we'll never be * moving it). However, we could still be stopping one * instance at the same time as starting another. * The complexity of checking for allowed conditions combined * with the ever narrowing usecase suggests that this code * should remain disabled until someone gets smarter. */ action_t *start = NULL; GListPtr actions = NULL; GListPtr probes = NULL; char *key = NULL; key = start_key(rsc); actions = find_actions(rsc->actions, key, NULL); free(key); if (actions) { start = actions->data; g_list_free(actions); } if(start == NULL) { crm_err("No start action for %s", rsc->id); continue; } key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0); probes = find_actions(rsc->actions, key, NULL); free(key); for (actions = start->actions_before; actions != NULL; actions = actions->next) { action_wrapper_t *before = (action_wrapper_t *) actions->data; GListPtr pIter = NULL; action_t *first = before->action; resource_t *first_rsc = first->rsc; if(first->required_runnable_before) { GListPtr clone_actions = NULL; for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) { before = (action_wrapper_t *) clone_actions->data; crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid); CRM_ASSERT(before->action->rsc); first_rsc = before->action->rsc; break; } } else if(safe_str_neq(first->task, RSC_START)) { crm_trace("Not a start op %s for %s", first->uuid, start->uuid); } if(first_rsc == NULL) { continue; } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) { crm_trace("Same parent %s for %s", first_rsc->id, start->uuid); continue; } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) { crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid); continue; } crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant); for (pIter = probes; pIter != NULL; pIter = pIter->next) { action_t *probe = (action_t *) pIter->data; crm_err("Ordering %s before %s", first->uuid, probe->uuid); order_actions(first, probe, pe_order_optional); } } } #endif } gboolean stage7(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying ordering constraints"); /* Don't ask me why, but apparently they need to be processed in * the order they were created in... go figure * * Also g_list_append() has horrendous performance characteristics * So we need to use g_list_prepend() and then reverse the list here */ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { pe__ordering_t *order = gIter->data; resource_t *rsc = order->lh_rsc; crm_trace("Applying ordering constraint: %d", order->id); if (rsc != NULL) { crm_trace("rsc_action-to-*"); rsc_order_first(rsc, order, data_set); continue; } rsc = order->rh_rsc; if (rsc != NULL) { crm_trace("action-to-rsc_action"); rsc_order_then(order->lh_action, rsc, order); } else { crm_trace("action-to-action"); order_actions(order->lh_action, order->rh_action, order->type); } } for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; update_colo_start_chain(action, data_set); } crm_trace("Ordering probes"); order_probes(data_set); crm_trace("Updating %d actions", g_list_length(data_set->actions)); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; update_action(action, data_set); } LogNodeActions(data_set, FALSE); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; LogActions(rsc, data_set, FALSE); } return TRUE; } int transition_id = -1; /* * Create a dependency graph to send to the transitioner (via the controller) */ gboolean stage8(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *value = NULL; transition_id++; crm_trace("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if (crm_int_helper(value, NULL) > 0) { crm_xml_add(data_set->graph, "migration-limit", value); } /* errors... slist_iter(action, action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); } crm_log_xml_trace(data_set->graph, "created resource-driven action list"); /* pseudo action to distribute list of nodes with maintenance state update */ add_maintenance_update(data_set); /* catch any non-resource specific actions */ crm_trace("processing non-resource actions"); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->rsc && action->node && action->node->details->shutdown && is_not_set(action->rsc->flags, pe_rsc_maintenance) && is_not_set(action->flags, pe_action_optional) && is_not_set(action->flags, pe_action_runnable) && crm_str_eq(action->task, RSC_STOP, TRUE) ) { /* Eventually we should just ignore the 'fence' case * But for now it's the best way to detect (in CTS) when * CIB resource updates are being lost */ if (is_set(data_set->flags, pe_flag_have_quorum) || data_set->no_quorum_policy == no_quorum_ignore) { crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)", action->node->details->unclean ? "fence" : "shut down", action->node->details->uname, action->rsc->id, is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked", is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "", action->uuid); } } graph_element_from_action(action, data_set); } crm_log_xml_trace(data_set->graph, "created generic action list"); crm_trace("Created transition graph %d.", transition_id); return TRUE; } void LogNodeActions(pe_working_set_t * data_set, gboolean terminal) { GListPtr gIter = NULL; for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { char *node_name = NULL; char *task = NULL; action_t *action = (action_t *) gIter->data; if (action->rsc != NULL) { continue; } else if (is_set(action->flags, pe_action_optional)) { continue; } if (is_container_remote_node(action->node)) { node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id); } else if(action->node) { node_name = crm_strdup_printf("%s", action->node->details->uname); } if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { task = strdup("Shutdown"); } else if (safe_str_eq(action->task, CRM_OP_FENCE)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); task = crm_strdup_printf("Fence (%s)", op); } if(task == NULL) { /* Nothing to report */ } else if(terminal && action->reason) { printf(" * %s %s '%s'\n", task, node_name, action->reason); } else if(terminal) { printf(" * %s %s\n", task, node_name); } else if(action->reason) { crm_notice(" * %s %s '%s'\n", task, node_name, action->reason); } else { crm_notice(" * %s %s\n", task, node_name); } free(node_name); free(task); } } diff --git a/daemons/schedulerd/sched_bundle.c b/daemons/schedulerd/sched_bundle.c index a73bc5fd32..abc19679e5 100644 --- a/daemons/schedulerd/sched_bundle.c +++ b/daemons/schedulerd/sched_bundle.c @@ -1,988 +1,995 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #define VARIANT_CONTAINER 1 #include static bool is_child_container_node(container_variant_data_t *data, pe_node_t *node) { for (GListPtr gIter = data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; if(node->details == tuple->node->details) { return TRUE; } } return FALSE; } gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); void distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes, int max, int per_host_max, pe_working_set_t * data_set); static GListPtr get_container_list(resource_t *rsc) { GListPtr containers = NULL; container_variant_data_t *data = NULL; if(rsc->variant == pe_container) { get_container_variant_data(data, rsc); for (GListPtr gIter = data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; containers = g_list_append(containers, tuple->docker); } } return containers; } static GListPtr get_containers_or_children(resource_t *rsc) { GListPtr containers = NULL; container_variant_data_t *data = NULL; if(rsc->variant == pe_container) { get_container_variant_data(data, rsc); for (GListPtr gIter = data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; containers = g_list_append(containers, tuple->docker); } return containers; } else { return rsc->children; } } static bool migration_threshold_reached(resource_t *rsc, node_t *node, pe_working_set_t *data_set) { int fail_count, countdown; /* Migration threshold of 0 means never force away */ if (rsc->migration_threshold == 0) { return FALSE; } // If we're ignoring failures, also ignore the migration threshold if (is_set(rsc->flags, pe_rsc_failure_ignored)) { return FALSE; } /* If there are no failures, there's no need to force away */ fail_count = pe_get_failcount(node, rsc, NULL, pe_fc_effective|pe_fc_fillers, NULL, data_set); if (fail_count <= 0) { return FALSE; } /* How many more times recovery will be tried on this node */ countdown = QB_MAX(rsc->migration_threshold - fail_count, 0); if (countdown == 0) { crm_warn("Forcing %s away from %s after %d failures (max=%d)", rsc->id, node->details->uname, fail_count, rsc->migration_threshold); return TRUE; } crm_info("%s can fail %d more times on %s before being forced off", rsc->id, countdown, node->details->uname); return FALSE; } node_t * container_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { GListPtr containers = NULL; GListPtr nodes = NULL; container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return NULL); get_container_variant_data(container_data, rsc); set_bit(rsc->flags, pe_rsc_allocating); containers = get_container_list(rsc); dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); containers = g_list_sort_with_data(containers, sort_clone_instance, data_set); distribute_children(rsc, containers, nodes, container_data->replicas, container_data->replicas_per_host, data_set); g_list_free(nodes); g_list_free(containers); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; pe_node_t *docker_host = tuple->docker->allocated_to; CRM_ASSERT(tuple); if(tuple->ip) { tuple->ip->cmds->allocate(tuple->ip, prefer, data_set); } if(tuple->remote && is_remote_node(docker_host)) { /* We need 'nested' connection resources to be on the same * host because pacemaker-remoted only supports a single * active connection */ rsc_colocation_new("child-remote-with-docker-remote", NULL, INFINITY, tuple->remote, docker_host->details->remote_rsc, NULL, NULL, data_set); } if(tuple->remote) { tuple->remote->cmds->allocate(tuple->remote, prefer, data_set); } // Explicitly allocate tuple->child before the container->child if(tuple->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, tuple->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if(node->details != tuple->node->details) { node->weight = -INFINITY; } else if(migration_threshold_reached(tuple->child, node, data_set) == FALSE) { node->weight = INFINITY; } } set_bit(tuple->child->parent->flags, pe_rsc_allocating); tuple->child->cmds->allocate(tuple->child, tuple->node, data_set); clear_bit(tuple->child->parent->flags, pe_rsc_allocating); } } if(container_data->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, container_data->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if(is_child_container_node(container_data, node)) { node->weight = 0; } else { node->weight = -INFINITY; } } container_data->child->cmds->allocate(container_data->child, prefer, data_set); } clear_bit(rsc->flags, pe_rsc_allocating); clear_bit(rsc->flags, pe_rsc_provisional); return NULL; } void container_create_actions(resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *action = NULL; GListPtr containers = NULL; container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); containers = get_container_list(rsc); get_container_variant_data(container_data, rsc); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->ip) { tuple->ip->cmds->create_actions(tuple->ip, data_set); } if(tuple->docker) { tuple->docker->cmds->create_actions(tuple->docker, data_set); } if(tuple->remote) { tuple->remote->cmds->create_actions(tuple->remote, data_set); } } clone_create_pseudo_actions(rsc, containers, NULL, NULL, data_set); if(container_data->child) { container_data->child->cmds->create_actions(container_data->child, data_set); if (is_set(container_data->child->flags, pe_rsc_promotable)) { /* promote */ action = create_pseudo_resource_op(rsc, RSC_PROMOTE, TRUE, TRUE, data_set); action = create_pseudo_resource_op(rsc, RSC_PROMOTED, TRUE, TRUE, data_set); action->priority = INFINITY; /* demote */ action = create_pseudo_resource_op(rsc, RSC_DEMOTE, TRUE, TRUE, data_set); action = create_pseudo_resource_op(rsc, RSC_DEMOTED, TRUE, TRUE, data_set); action->priority = INFINITY; } } g_list_free(containers); } void container_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); if(container_data->child) { new_rsc_order(rsc, RSC_START, container_data->child, RSC_START, pe_order_implies_first_printed, data_set); new_rsc_order(rsc, RSC_STOP, container_data->child, RSC_STOP, pe_order_implies_first_printed, data_set); if(container_data->child->children) { new_rsc_order(container_data->child, RSC_STARTED, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); new_rsc_order(container_data->child, RSC_STOPPED, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); } else { new_rsc_order(container_data->child, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); new_rsc_order(container_data->child, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); } } for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); CRM_ASSERT(tuple->docker); tuple->docker->cmds->internal_constraints(tuple->docker, data_set); order_start_start(rsc, tuple->docker, pe_order_runnable_left | pe_order_implies_first_printed); if(tuple->child) { order_stop_stop(rsc, tuple->child, pe_order_implies_first_printed); } order_stop_stop(rsc, tuple->docker, pe_order_implies_first_printed); new_rsc_order(tuple->docker, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); new_rsc_order(tuple->docker, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); if(tuple->ip) { tuple->ip->cmds->internal_constraints(tuple->ip, data_set); // Start ip then docker new_rsc_order(tuple->ip, RSC_START, tuple->docker, RSC_START, pe_order_runnable_left|pe_order_preserve, data_set); new_rsc_order(tuple->docker, RSC_STOP, tuple->ip, RSC_STOP, pe_order_implies_first|pe_order_preserve, data_set); rsc_colocation_new("ip-with-docker", NULL, INFINITY, tuple->ip, tuple->docker, NULL, NULL, data_set); } if(tuple->remote) { /* This handles ordering and colocating remote relative to docker * (via "resource-with-container"). Since IP is also ordered and * colocated relative to docker, we don't need to do anything * explicit here with IP. */ tuple->remote->cmds->internal_constraints(tuple->remote, data_set); } if(tuple->child) { CRM_ASSERT(tuple->remote); // "Start remote then child" is implicit in scheduler's remote logic } } if(container_data->child) { container_data->child->cmds->internal_constraints(container_data->child, data_set); if (is_set(container_data->child->flags, pe_rsc_promotable)) { promote_demote_constraints(rsc, data_set); /* child demoted before global demoted */ new_rsc_order(container_data->child, RSC_DEMOTED, rsc, RSC_DEMOTED, pe_order_implies_then_printed, data_set); /* global demote before child demote */ new_rsc_order(rsc, RSC_DEMOTE, container_data->child, RSC_DEMOTE, pe_order_implies_first_printed, data_set); /* child promoted before global promoted */ new_rsc_order(container_data->child, RSC_PROMOTED, rsc, RSC_PROMOTED, pe_order_implies_then_printed, data_set); /* global promote before child promote */ new_rsc_order(rsc, RSC_PROMOTE, container_data->child, RSC_PROMOTE, pe_order_implies_first_printed, data_set); } } else { // int type = pe_order_optional | pe_order_implies_then | pe_order_restart; // custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, // rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional, data_set); } } static resource_t * find_compatible_tuple_by_node(resource_t * rsc_lh, node_t * candidate, resource_t * rsc, enum rsc_role_e filter, gboolean current) { container_variant_data_t *container_data = NULL; CRM_CHECK(candidate != NULL, return NULL); get_container_variant_data(container_data, rsc); crm_trace("Looking for compatible child from %s for %s on %s", rsc_lh->id, rsc->id, candidate->details->uname); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; if(is_child_compatible(tuple->docker, candidate, filter, current)) { crm_trace("Pairing %s with %s on %s", rsc_lh->id, tuple->docker->id, candidate->details->uname); return tuple->docker; } } crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id); return NULL; } static resource_t * find_compatible_tuple(resource_t *rsc_lh, resource_t * rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { GListPtr scratch = NULL; resource_t *pair = NULL; node_t *active_node_lh = NULL; active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current); if (active_node_lh) { return find_compatible_tuple_by_node(rsc_lh, active_node_lh, rsc, filter, current); } scratch = g_hash_table_get_values(rsc_lh->allowed_nodes); scratch = sort_nodes_by_weight(scratch, NULL, data_set); for (GListPtr gIter = scratch; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; pair = find_compatible_tuple_by_node(rsc_lh, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none")); done: g_list_free(scratch); return pair; } void container_rsc_colocation_lh(pe_resource_t *rsc, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { /* -- Never called -- * * Instead we add the colocation constraints to the child and call from there */ CRM_ASSERT(FALSE); } int copies_per_node(resource_t * rsc) { /* Strictly speaking, there should be a 'copies_per_node' addition * to the resource function table and each case would be a * function. However that would be serious overkill to return an * int. In fact, it seems to me that both function tables * could/should be replaced by resources.{c,h} full of * rsc_{some_operation} functions containing a switch as below * which calls out to functions named {variant}_{some_operation} * as needed. */ switch(rsc->variant) { case pe_unknown: return 0; case pe_native: case pe_group: return 1; case pe_clone: { const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); return crm_parse_int(max_clones_node, "1"); } case pe_container: { container_variant_data_t *data = NULL; get_container_variant_data(data, rsc); return data->replicas_per_host; } } return 0; } void container_rsc_colocation_rh(pe_resource_t *rsc_lh, pe_resource_t *rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set) { GListPtr allocated_rhs = NULL; container_variant_data_t *container_data = NULL; CRM_CHECK(constraint != NULL, return); CRM_CHECK(rsc_lh != NULL, pe_err("rsc_lh was NULL for %s", constraint->id); return); CRM_CHECK(rsc != NULL, pe_err("rsc was NULL for %s", constraint->id); return); CRM_ASSERT(rsc_lh->variant == pe_native); if (is_set(rsc->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc, "%s is still provisional", rsc->id); return; } else if(constraint->rsc_lh->variant > pe_group) { resource_t *rh_child = find_compatible_tuple(rsc_lh, rsc, RSC_ROLE_UNKNOWN, FALSE, data_set); if (rh_child) { pe_rsc_debug(rsc, "Pairing %s with %s", rsc_lh->id, rh_child->id); rsc_lh->cmds->rsc_colocation_lh(rsc_lh, rh_child, constraint, data_set); } else if (constraint->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", rsc_lh->id, rsc->id); assign_node(rsc_lh, NULL, TRUE); } else { pe_rsc_debug(rsc, "Cannot pair %s with instance of %s", rsc_lh->id, rsc->id); } return; } get_container_variant_data(container_data, rsc); pe_rsc_trace(rsc, "Processing constraint %s: %s -> %s %d", constraint->id, rsc_lh->id, rsc->id, constraint->score); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; if (constraint->score < INFINITY) { tuple->docker->cmds->rsc_colocation_rh(rsc_lh, tuple->docker, constraint, data_set); } else { node_t *chosen = tuple->docker->fns->location(tuple->docker, NULL, FALSE); if (chosen == NULL || is_set_recursive(tuple->docker, pe_rsc_block, TRUE)) { continue; } if(constraint->role_rh >= RSC_ROLE_MASTER && tuple->child == NULL) { continue; } if(constraint->role_rh >= RSC_ROLE_MASTER && tuple->child->next_role < RSC_ROLE_MASTER) { continue; } pe_rsc_trace(rsc, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); allocated_rhs = g_list_prepend(allocated_rhs, chosen); } } if (constraint->score >= INFINITY) { node_list_exclude(rsc_lh->allowed_nodes, allocated_rhs, FALSE); } g_list_free(allocated_rhs); } enum pe_action_flags container_action_flags(action_t * action, node_t * node) { GListPtr containers = NULL; enum pe_action_flags flags = 0; container_variant_data_t *data = NULL; get_container_variant_data(data, action->rsc); if(data->child) { enum action_tasks task = get_complex_task(data->child, action->task, TRUE); switch(task) { case no_action: case action_notify: case action_notified: case action_promote: case action_promoted: case action_demote: case action_demoted: return summary_action_flags(action, data->child->children, node); default: break; } } containers = get_container_list(action->rsc); flags = summary_action_flags(action, containers, node); g_list_free(containers); return flags; } resource_t * find_compatible_child_by_node(resource_t * local_child, node_t * local_node, resource_t * rsc, enum rsc_role_e filter, gboolean current) { GListPtr gIter = NULL; GListPtr children = NULL; if (local_node == NULL) { crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); return NULL; } crm_trace("Looking for compatible child from %s for %s on %s", local_child->id, rsc->id, local_node->details->uname); children = get_containers_or_children(rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; if(is_child_compatible(child_rsc, local_node, filter, current)) { crm_trace("Pairing %s with %s on %s", local_child->id, child_rsc->id, local_node->details->uname); return child_rsc; } } crm_trace("Can't pair %s with %s", local_child->id, rsc->id); if(children != rsc->children) { g_list_free(children); } return NULL; } static container_grouping_t * tuple_for_docker(resource_t *rsc, resource_t *docker, node_t *node) { if(rsc->variant == pe_container) { container_variant_data_t *data = NULL; get_container_variant_data(data, rsc); for (GListPtr gIter = data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; if(tuple->child && docker == tuple->docker && node->details == tuple->node->details) { return tuple; } } } return NULL; } static enum pe_graph_flags container_update_interleave_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { GListPtr gIter = NULL; GListPtr children = NULL; gboolean current = FALSE; enum pe_graph_flags changed = pe_graph_none; /* Fix this - lazy */ if (crm_ends_with(first->uuid, "_stopped_0") || crm_ends_with(first->uuid, "_demoted_0")) { current = TRUE; } children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_child = gIter->data; pe_resource_t *first_child = find_compatible_child(then_child, first->rsc, RSC_ROLE_UNKNOWN, current, data_set); if (first_child == NULL && current) { crm_trace("Ignore"); } else if (first_child == NULL) { crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid); /* Me no like this hack - but what else can we do? * * If there is no-one active or about to be active * on the same node as then_child, then they must * not be allowed to start */ if (type & (pe_order_runnable_left | pe_order_implies_then) /* Mandatory */ ) { pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id); if(assign_node(then_child, NULL, TRUE)) { changed |= pe_graph_updated_then; } } } else { pe_action_t *first_action = NULL; pe_action_t *then_action = NULL; enum action_tasks task = clone_child_action(first); const char *first_task = task2text(task); container_grouping_t *first_tuple = tuple_for_docker(first->rsc, first_child, node); container_grouping_t *then_tuple = tuple_for_docker(then->rsc, then_child, node); if(strstr(first->task, "stop") && first_tuple && first_tuple->child) { /* Except for 'stopped' we should be looking at the * in-container resource, actions for the child will * happen later and are therefor more likely to align * with the user's intent. */ first_action = find_first_action(first_tuple->child->actions, NULL, task2text(task), node); } else { first_action = find_first_action(first_child->actions, NULL, task2text(task), node); } if(strstr(then->task, "mote") && then_tuple && then_tuple->child) { /* Promote/demote actions will never be found for the * docker resource, look in the child instead * * Alternatively treat: * 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and * 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY' */ then_action = find_first_action(then_tuple->child->actions, NULL, then->task, node); } else { then_action = find_first_action(then_child->actions, NULL, then->task, node); } if (first_action == NULL) { if (is_not_set(first_child->flags, pe_rsc_orphan) && crm_str_eq(first_task, RSC_STOP, TRUE) == FALSE && crm_str_eq(first_task, RSC_DEMOTE, TRUE) == FALSE) { crm_err("Internal error: No action found for %s in %s (first)", first_task, first_child->id); } else { crm_trace("No action found for %s in %s%s (first)", first_task, first_child->id, is_set(first_child->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } continue; } /* We're only interested if 'then' is neither stopping nor being demoted */ if (then_action == NULL) { if (is_not_set(then_child->flags, pe_rsc_orphan) && crm_str_eq(then->task, RSC_STOP, TRUE) == FALSE && crm_str_eq(then->task, RSC_DEMOTE, TRUE) == FALSE) { crm_err("Internal error: No action found for %s in %s (then)", then->task, then_child->id); } else { crm_trace("No action found for %s in %s%s (then)", then->task, then_child->id, is_set(then_child->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } continue; } if (order_actions(first_action, then_action, type)) { crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x", first_action->uuid, is_set(first_action->flags, pe_action_optional), then_action->uuid, is_set(then_action->flags, pe_action_optional), type); changed |= (pe_graph_updated_first | pe_graph_updated_then); } if(first_action && then_action) { changed |= then_child->cmds->update_actions(first_action, then_action, node, first_child->cmds->action_flags(first_action, node), filter, type, data_set); } else { crm_err("Nothing found either for %s (%p) or %s (%p) %s", first_child->id, first_action, then_child->id, then_action, task2text(task)); } } } if(children != then->rsc->children) { g_list_free(children); } return changed; } bool can_interleave_actions(pe_action_t *first, pe_action_t *then) { bool interleave = FALSE; resource_t *rsc = NULL; const char *interleave_s = NULL; if(first->rsc == NULL || then->rsc == NULL) { crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc == then->rsc) { crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) { crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid); return FALSE; } if (crm_ends_with(then->uuid, "_stop_0") || crm_ends_with(then->uuid, "_demote_0")) { rsc = first->rsc; } else { rsc = then->rsc; } interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE); interleave = crm_is_true(interleave_s); crm_trace("Interleave %s -> %s: %s (based on %s)", first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id); return interleave; } enum pe_graph_flags container_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { enum pe_graph_flags changed = pe_graph_none; crm_trace("%s -> %s", first->uuid, then->uuid); if(can_interleave_actions(first, then)) { changed = container_update_interleave_actions(first, then, node, flags, filter, type, data_set); } else if(then->rsc) { GListPtr gIter = NULL; GListPtr children = NULL; // Handle the 'primitive' ordering case changed |= native_update_actions(first, then, node, flags, filter, type, data_set); // Now any children (or containers in the case of a bundle) children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { resource_t *then_child = (resource_t *) gIter->data; enum pe_graph_flags then_child_changed = pe_graph_none; action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node); if (then_child_action) { enum pe_action_flags then_child_flags = then_child->cmds->action_flags(then_child_action, node); if (is_set(then_child_flags, pe_action_runnable)) { then_child_changed |= then_child->cmds->update_actions(first, then_child_action, node, flags, filter, type, data_set); } changed |= then_child_changed; if (then_child_changed & pe_graph_updated_then) { for (GListPtr lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) { action_wrapper_t *next = (action_wrapper_t *) lpc->data; update_action(next->action, data_set); } } } } if(children != then->rsc->children) { g_list_free(children); } } return changed; } void container_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { container_variant_data_t *container_data = NULL; get_container_variant_data(container_data, rsc); pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; if (tuple->docker) { tuple->docker->cmds->rsc_location(tuple->docker, constraint); } if(tuple->ip) { tuple->ip->cmds->rsc_location(tuple->ip, constraint); } } if(container_data->child && (constraint->role_filter == RSC_ROLE_SLAVE || constraint->role_filter == RSC_ROLE_MASTER)) { container_data->child->cmds->rsc_location(container_data->child, constraint); container_data->child->rsc_location = g_list_prepend(container_data->child->rsc_location, constraint); } } void container_expand(resource_t * rsc, pe_working_set_t * data_set) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); if(container_data->child) { container_data->child->cmds->expand(container_data->child, data_set); } for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if (tuple->remote && tuple->docker && container_fix_remote_addr(tuple->remote)) { // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside xmlNode *nvpair = get_xpath_object("//nvpair[@name='addr']", tuple->remote->xml, LOG_ERR); const char *calculated_addr = container_fix_remote_addr_in(tuple->remote, nvpair, "value"); if (calculated_addr) { crm_trace("Set address for bundle connection %s to bundle host %s", tuple->remote->id, calculated_addr); g_hash_table_replace(tuple->remote->parameters, strdup("addr"), strdup(calculated_addr)); } else { - crm_err("Could not determine address for bundle connection %s", - tuple->remote->id); + /* The only way to get here is if the remote connection is + * neither currently running nor scheduled to run. That means we + * won't be doing any operations that require addr (only start + * requires it; we additionally use it to compare digests when + * unpacking status, promote, and migrate_from history, but + * that's already happened by this point). + */ + crm_info("Unable to determine address for bundle %s remote connection", + rsc->id); } } if(tuple->ip) { tuple->ip->cmds->expand(tuple->ip, data_set); } if(tuple->docker) { tuple->docker->cmds->expand(tuple->docker, data_set); } if(tuple->remote) { tuple->remote->cmds->expand(tuple->remote, data_set); } } } gboolean container_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) { bool any_created = FALSE; container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return FALSE); get_container_variant_data(container_data, rsc); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->ip) { any_created |= tuple->ip->cmds->create_probe(tuple->ip, node, complete, force, data_set); } if(tuple->child && node->details == tuple->node->details) { any_created |= tuple->child->cmds->create_probe(tuple->child, node, complete, force, data_set); } if(tuple->docker) { bool created = tuple->docker->cmds->create_probe(tuple->docker, node, complete, force, data_set); if(created) { any_created = TRUE; /* If we're limited to one replica per host (due to * the lack of an IP range probably), then we don't * want any of our peer containers starting until * we've established that no other copies are already * running. * * Partly this is to ensure that replicas_per_host is * observed, but also to ensure that the containers * don't fail to start because the necessary port * mappings (which won't include an IP for uniqueness) * are already taken */ for (GListPtr tIter = container_data->tuples; tIter != NULL && container_data->replicas_per_host == 1; tIter = tIter->next) { container_grouping_t *other = (container_grouping_t *)tIter->data; if ((other != tuple) && (other != NULL) && (other->docker != NULL)) { custom_action_order(tuple->docker, generate_op_key(tuple->docker->id, RSC_STATUS, 0), NULL, other->docker, generate_op_key(other->docker->id, RSC_START, 0), NULL, pe_order_optional|pe_order_same_node, data_set); } } } } if (tuple->docker && tuple->remote && tuple->remote->cmds->create_probe(tuple->remote, node, complete, force, data_set)) { /* Do not probe the remote resource until we know where docker is running * Required for REMOTE_CONTAINER_HACK to correctly probe remote resources */ char *probe_uuid = generate_op_key(tuple->remote->id, RSC_STATUS, 0); action_t *probe = find_first_action(tuple->remote->actions, probe_uuid, NULL, node); free(probe_uuid); if (probe) { any_created = TRUE; crm_trace("Ordering %s probe on %s", tuple->remote->id, node->details->uname); custom_action_order(tuple->docker, generate_op_key(tuple->docker->id, RSC_START, 0), NULL, tuple->remote, NULL, probe, pe_order_probe, data_set); } } } return any_created; } void container_append_meta(resource_t * rsc, xmlNode * xml) { } GHashTable * container_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags); } void container_LogActions( resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->ip) { LogActions(tuple->ip, data_set, terminal); } if(tuple->docker) { LogActions(tuple->docker, data_set, terminal); } if(tuple->remote) { LogActions(tuple->remote, data_set, terminal); } if(tuple->child) { LogActions(tuple->child, data_set, terminal); } } } diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index 09f928abb4..1913d90e00 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -1,347 +1,353 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PE_INTERNAL__H # define PE_INTERNAL__H # include # include # include # define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "", fmt, ##args) # define pe_err(fmt...) { was_processing_error = TRUE; crm_config_error = TRUE; crm_err(fmt); } # define pe_warn(fmt...) { was_processing_warning = TRUE; crm_config_warning = TRUE; crm_warn(fmt); } # define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } # define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } # define pe_set_action_bit(action, bit) action->flags = crm_set_bit(__FUNCTION__, __LINE__, action->uuid, action->flags, bit) # define pe_clear_action_bit(action, bit) action->flags = crm_clear_bit(__FUNCTION__, __LINE__, action->uuid, action->flags, bit) typedef struct pe__location_constraint_s { char *id; // Constraint XML ID pe_resource_t *rsc_lh; // Resource being located enum rsc_role_e role_filter; // Role to locate enum pe_discover_e discover_mode; // Resource discovery GListPtr node_list_rh; // List of pe_node_t* } pe__location_t; typedef struct pe__order_constraint_s { int id; enum pe_ordering type; void *lh_opaque; resource_t *lh_rsc; action_t *lh_action; char *lh_action_task; void *rh_opaque; resource_t *rh_rsc; action_t *rh_action; char *rh_action_task; } pe__ordering_t; typedef struct notify_data_s { GHashTable *keys; const char *action; action_t *pre; action_t *post; action_t *pre_done; action_t *post_done; GListPtr active; /* notify_entry_t* */ GListPtr inactive; /* notify_entry_t* */ GListPtr start; /* notify_entry_t* */ GListPtr stop; /* notify_entry_t* */ GListPtr demote; /* notify_entry_t* */ GListPtr promote; /* notify_entry_t* */ GListPtr master; /* notify_entry_t* */ GListPtr slave; /* notify_entry_t* */ GHashTable *allowed_nodes; } notify_data_t; bool pe_can_fence(pe_working_set_t *data_set, node_t *node); int merge_weights(int w1, int w2); void add_hash_param(GHashTable * hash, const char *name, const char *value); char *native_parameter(resource_t * rsc, node_t * node, gboolean create, const char *name, pe_working_set_t * data_set); pe_node_t *native_location(const pe_resource_t *rsc, GList **list, int current); void pe_metadata(void); void verify_pe_options(GHashTable * options); void common_update_score(resource_t * rsc, const char *id, int score); void native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set); gboolean native_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean group_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean clone_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean container_unpack(resource_t * rsc, pe_working_set_t * data_set); resource_t *native_find_rsc(resource_t *rsc, const char *id, const node_t *node, int flags); gboolean native_active(resource_t * rsc, gboolean all); gboolean group_active(resource_t * rsc, gboolean all); gboolean clone_active(resource_t * rsc, gboolean all); gboolean container_active(resource_t * rsc, gboolean all); void native_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void group_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void clone_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void container_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void native_free(resource_t * rsc); void group_free(resource_t * rsc); void clone_free(resource_t * rsc); void container_free(resource_t * rsc); enum rsc_role_e native_resource_state(const resource_t * rsc, gboolean current); enum rsc_role_e group_resource_state(const resource_t * rsc, gboolean current); enum rsc_role_e clone_resource_state(const resource_t * rsc, gboolean current); enum rsc_role_e container_resource_state(const resource_t * rsc, gboolean current); gboolean common_unpack(xmlNode * xml_obj, resource_t ** rsc, resource_t * parent, pe_working_set_t * data_set); void common_free(resource_t * rsc); extern node_t *node_copy(const node_t *this_node); extern time_t get_effective_time(pe_working_set_t * data_set); /* Failure handling utilities (from failcounts.c) */ // bit flags for fail count handling options enum pe_fc_flags_e { pe_fc_default = 0x00, pe_fc_effective = 0x01, // don't count expired failures pe_fc_fillers = 0x02, // if container, include filler failures in count }; int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set); pe_action_t *pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set); /* Functions for finding/counting a resource's active nodes */ pe_node_t *pe__find_active_on(const pe_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean); pe_node_t *pe__find_active_requires(const pe_resource_t *rsc, unsigned int *count); static inline pe_node_t * pe__current_node(const pe_resource_t *rsc) { return pe__find_active_on(rsc, NULL, NULL); } /* Binary like operators for lists of nodes */ extern void node_list_exclude(GHashTable * list, GListPtr list2, gboolean merge_scores); extern GListPtr node_list_dup(GListPtr list, gboolean reset, gboolean filter); extern GHashTable *node_hash_from_list(GListPtr list); static inline gpointer pe_hash_table_lookup(GHashTable * hash, gconstpointer key) { if (hash) { return g_hash_table_lookup(hash, key); } return NULL; } extern action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set); extern gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order); GHashTable *node_hash_dup(GHashTable * hash); /* Printing functions for debug */ extern void print_node(const char *pre_text, node_t * node, gboolean details); extern void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details); extern void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes); extern void dump_node_capacity(int level, const char *comment, node_t * node); extern void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node); # define dump_node_scores(level, rsc, text, nodes) do { \ dump_node_scores_worker(level, __FILE__, __FUNCTION__, __LINE__, rsc, text, nodes); \ } while(0) /* Sorting functions */ extern gint sort_rsc_priority(gconstpointer a, gconstpointer b); extern gint sort_rsc_index(gconstpointer a, gconstpointer b); extern xmlNode *find_rsc_op_entry(resource_t * rsc, const char *key); extern action_t *custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean foo, pe_working_set_t * data_set); # define delete_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DELETE, 0) # define delete_action(rsc, node, optional) custom_action( \ rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \ optional, TRUE, data_set); # define stopped_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOPPED, 0) # define stopped_action(rsc, node, optional) custom_action( \ rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \ optional, TRUE, data_set); # define stop_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOP, 0) # define stop_action(rsc, node, optional) custom_action( \ rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \ optional, TRUE, data_set); # define reload_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_RELOAD, 0) # define start_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_START, 0) # define start_action(rsc, node, optional) custom_action( \ rsc, start_key(rsc), CRMD_ACTION_START, node, \ optional, TRUE, data_set) # define started_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STARTED, 0) # define started_action(rsc, node, optional) custom_action( \ rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \ optional, TRUE, data_set) # define promote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTE, 0) # define promote_action(rsc, node, optional) custom_action( \ rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \ optional, TRUE, data_set) # define promoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTED, 0) # define promoted_action(rsc, node, optional) custom_action( \ rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \ optional, TRUE, data_set) # define demote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTE, 0) # define demote_action(rsc, node, optional) custom_action( \ rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \ optional, TRUE, data_set) # define demoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTED, 0) # define demoted_action(rsc, node, optional) custom_action( \ rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \ optional, TRUE, data_set) extern int pe_get_configured_timeout(resource_t *rsc, const char *action, pe_working_set_t *data_set); extern action_t *find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node); extern enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic); extern GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node); extern GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node); extern GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node); extern void pe_free_action(action_t * action); extern void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set); extern gint sort_op_by_callid(gconstpointer a, gconstpointer b); extern gboolean get_target_role(resource_t * rsc, enum rsc_role_e *role); extern resource_t *find_clone_instance(resource_t * rsc, const char *sub_id, pe_working_set_t * data_set); extern void destroy_ticket(gpointer data); extern ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set); // Resources for manipulating resource names const char *pe_base_name_end(const char *id); char *clone_strip(const char *last_rsc_id); char *clone_zero(const char *last_rsc_id); static inline bool pe_base_name_eq(resource_t *rsc, const char *id) { if (id && rsc && rsc->id) { // Number of characters in rsc->id before any clone suffix size_t base_len = pe_base_name_end(rsc->id) - rsc->id + 1; return (strlen(id) == base_len) && !strncmp(id, rsc->id, base_len); } return FALSE; } int get_target_rc(xmlNode * xml_op); gint sort_node_uname(gconstpointer a, gconstpointer b); bool is_set_recursive(resource_t * rsc, long long flag, bool any); enum rsc_digest_cmp_val { /*! Digests are the same */ RSC_DIGEST_MATCH = 0, /*! Params that require a restart changed */ RSC_DIGEST_RESTART, /*! Some parameter changed. */ RSC_DIGEST_ALL, /*! rsc op didn't have a digest associated with it, so * it is unknown if parameters changed or not. */ RSC_DIGEST_UNKNOWN, }; typedef struct op_digest_cache_s { enum rsc_digest_cmp_val rc; xmlNode *params_all; xmlNode *params_secure; xmlNode *params_restart; char *digest_all_calc; char *digest_secure_calc; char *digest_restart_calc; } op_digest_cache_t; op_digest_cache_t *rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, pe_working_set_t * data_set); action_t *pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set); void trigger_unfencing( resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set); void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite); void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite); #define pe_action_required(action, reason, text) pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, reason, text, pe_action_optional, FALSE) #define pe_action_implies(action, reason, flag) pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, reason, NULL, flag, FALSE) void set_bit_recursive(resource_t * rsc, unsigned long long flag); void clear_bit_recursive(resource_t * rsc, unsigned long long flag); gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref); void print_rscs_brief(GListPtr rsc_list, const char * pre_text, long options, void * print_data, gboolean print_all); void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason); node_t *pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); bool remote_id_conflict(const char *remote_name, pe_working_set_t *data); void common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *node, long options, void *print_data); resource_t *find_container_child(const resource_t *bundle, const node_t *node); bool container_fix_remote_addr(resource_t *rsc); const char *container_fix_remote_addr_in(resource_t *rsc, xmlNode *xml, const char *field); const char *pe_node_attribute_calculated(const pe_node_t *node, const char *name, const resource_t *rsc); const char *pe_node_attribute_raw(pe_node_t *node, const char *name); bool pe__is_universal_clone(pe_resource_t *rsc, pe_working_set_t *data_set); - +void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, + enum pe_check_parameters, pe_working_set_t *data_set); +void pe__foreach_param_check(pe_working_set_t *data_set, + void (*cb)(pe_resource_t*, pe_node_t*, xmlNode*, + enum pe_check_parameters, + pe_working_set_t*)); +void pe__free_param_checks(pe_working_set_t *data_set); #endif diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index f59306b436..2341fda963 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -1,530 +1,545 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PENGINE_STATUS__H # define PENGINE_STATUS__H #ifdef __cplusplus extern "C" { #endif /*! * \file * \brief Cluster status and scheduling * \ingroup pengine */ # include # include # include # include typedef struct pe_node_s pe_node_t; typedef struct pe_action_s pe_action_t; typedef struct pe_resource_s pe_resource_t; typedef struct pe_working_set_s pe_working_set_t; # include enum pe_quorum_policy { no_quorum_freeze, no_quorum_stop, no_quorum_ignore, no_quorum_suicide }; enum node_type { node_ping, node_member, node_remote }; //! \deprecated will be removed in a future release enum pe_restart { pe_restart_restart, pe_restart_ignore }; //! Determine behavior of pe_find_resource_with_flags() enum pe_find { pe_find_renamed = 0x001, //!< match resource ID or LRM history ID pe_find_anon = 0x002, //!< match base name of anonymous clone instances pe_find_clone = 0x004, //!< match only clone instances pe_find_current = 0x008, //!< match resource active on specified node pe_find_inactive = 0x010, //!< match resource not running anywhere pe_find_any = 0x020, //!< match base name of any clone instance }; # define pe_flag_have_quorum 0x00000001ULL # define pe_flag_symmetric_cluster 0x00000002ULL # define pe_flag_maintenance_mode 0x00000008ULL # define pe_flag_stonith_enabled 0x00000010ULL # define pe_flag_have_stonith_resource 0x00000020ULL # define pe_flag_enable_unfencing 0x00000040ULL # define pe_flag_concurrent_fencing 0x00000080ULL # define pe_flag_stop_rsc_orphans 0x00000100ULL # define pe_flag_stop_action_orphans 0x00000200ULL # define pe_flag_stop_everything 0x00000400ULL # define pe_flag_start_failure_fatal 0x00001000ULL # define pe_flag_remove_after_stop 0x00002000ULL # define pe_flag_startup_fencing 0x00004000ULL # define pe_flag_startup_probes 0x00010000ULL # define pe_flag_have_status 0x00020000ULL # define pe_flag_have_remote_nodes 0x00040000ULL # define pe_flag_quick_location 0x00100000ULL # define pe_flag_sanitized 0x00200000ULL # define pe_flag_stdout 0x00400000ULL struct pe_working_set_s { xmlNode *input; crm_time_t *now; /* options extracted from the input */ char *dc_uuid; pe_node_t *dc_node; const char *stonith_action; const char *placement_strategy; unsigned long long flags; int stonith_timeout; enum pe_quorum_policy no_quorum_policy; GHashTable *config_hash; GHashTable *tickets; // Actions for which there can be only one (e.g. fence nodeX) GHashTable *singletons; GListPtr nodes; GListPtr resources; GListPtr placement_constraints; GListPtr ordering_constraints; GListPtr colocation_constraints; GListPtr ticket_constraints; GListPtr actions; xmlNode *failed; xmlNode *op_defaults; xmlNode *rsc_defaults; /* stats */ int num_synapse; int max_valid_nodes; int order_id; int action_id; /* final output */ xmlNode *graph; GHashTable *template_rsc_sets; const char *localhost; GHashTable *tags; int blocked_resources; int disabled_resources; + + GList *param_check; // History entries that need to be checked + GList *stop_needed; // Containers that need stop actions +}; + +enum pe_check_parameters { + /* Clear fail count if parameters changed for un-expired start or monitor + * last_failure. + */ + pe_check_last_failure, + + /* Clear fail count if parameters changed for start, monitor, promote, or + * migrate_from actions for active resources. + */ + pe_check_active, }; struct pe_node_shared_s { const char *id; const char *uname; enum node_type type; /* @TODO convert these flags into a bitfield */ gboolean online; gboolean standby; gboolean standby_onfail; gboolean pending; gboolean unclean; gboolean unseen; gboolean shutdown; gboolean expected_up; gboolean is_dc; gboolean maintenance; gboolean rsc_discovery_enabled; gboolean remote_requires_reset; gboolean remote_was_fenced; gboolean remote_maintenance; /* what the remote-rsc is thinking */ gboolean unpacked; int num_resources; pe_resource_t *remote_rsc; GListPtr running_rsc; /* pe_resource_t* */ GListPtr allocated_rsc; /* pe_resource_t* */ GHashTable *attrs; /* char* => char* */ GHashTable *utilization; GHashTable *digest_cache; //!< cache of calculated resource digests }; struct pe_node_s { int weight; gboolean fixed; int count; struct pe_node_shared_s *details; int rsc_discover_mode; }; # define pe_rsc_orphan 0x00000001ULL # define pe_rsc_managed 0x00000002ULL # define pe_rsc_block 0x00000004ULL # define pe_rsc_orphan_container_filler 0x00000008ULL # define pe_rsc_notify 0x00000010ULL # define pe_rsc_unique 0x00000020ULL # define pe_rsc_fence_device 0x00000040ULL # define pe_rsc_promotable 0x00000080ULL # define pe_rsc_provisional 0x00000100ULL # define pe_rsc_allocating 0x00000200ULL # define pe_rsc_merging 0x00000400ULL # define pe_rsc_reload 0x00002000ULL # define pe_rsc_allow_remote_remotes 0x00004000ULL # define pe_rsc_failed 0x00010000ULL # define pe_rsc_runnable 0x00040000ULL # define pe_rsc_start_pending 0x00080000ULL # define pe_rsc_starting 0x00100000ULL # define pe_rsc_stopping 0x00200000ULL # define pe_rsc_allow_migrate 0x00800000ULL # define pe_rsc_failure_ignored 0x01000000ULL # define pe_rsc_maintenance 0x04000000ULL # define pe_rsc_is_container 0x08000000ULL # define pe_rsc_needs_quorum 0x10000000ULL # define pe_rsc_needs_fencing 0x20000000ULL # define pe_rsc_needs_unfencing 0x40000000ULL enum pe_graph_flags { pe_graph_none = 0x00000, pe_graph_updated_first = 0x00001, pe_graph_updated_then = 0x00002, pe_graph_disable = 0x00004, }; /* *INDENT-OFF* */ enum pe_action_flags { pe_action_pseudo = 0x00001, pe_action_runnable = 0x00002, pe_action_optional = 0x00004, pe_action_print_always = 0x00008, pe_action_have_node_attrs = 0x00010, pe_action_implied_by_stonith = 0x00040, pe_action_migrate_runnable = 0x00080, pe_action_dumped = 0x00100, pe_action_processed = 0x00200, pe_action_clear = 0x00400, pe_action_dangle = 0x00800, /* This action requires one or more of its dependencies to be runnable. * We use this to clear the runnable flag before checking dependencies. */ pe_action_requires_any = 0x01000, pe_action_reschedule = 0x02000, pe_action_tracking = 0x04000, }; /* *INDENT-ON* */ struct pe_resource_s { char *id; char *clone_name; xmlNode *xml; xmlNode *orig_xml; xmlNode *ops_xml; pe_working_set_t *cluster; pe_resource_t *parent; enum pe_obj_types variant; void *variant_opaque; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; // @TODO only pe_restart_restart is of interest, so merge into flags enum pe_restart restart_type; //!< \deprecated will be removed in future release int priority; int stickiness; int sort_index; int failure_timeout; int migration_threshold; guint remote_reconnect_ms; char *pending_task; unsigned long long flags; // @TODO merge these into flags gboolean is_remote_node; gboolean exclusive_discover; //!@{ //! This field should be treated as internal to Pacemaker GListPtr rsc_cons_lhs; // List of rsc_colocation_t* GListPtr rsc_cons; // List of rsc_colocation_t* GListPtr rsc_location; // List of pe__location_t* GListPtr actions; // List of pe_action_t* GListPtr rsc_tickets; // List of rsc_ticket* //!@} pe_node_t *allocated_to; pe_node_t *partial_migration_target; pe_node_t *partial_migration_source; GListPtr running_on; /* pe_node_t* */ GHashTable *known_on; /* pe_node_t* */ GHashTable *allowed_nodes; /* pe_node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; GHashTable *utilization; GListPtr children; /* pe_resource_t* */ GListPtr dangling_migrations; /* pe_node_t* */ pe_resource_t *container; GListPtr fillers; pe_node_t *pending_node; // Node on which pending_task is happening #if ENABLE_VERSIONED_ATTRS xmlNode *versioned_parameters; #endif }; #if ENABLE_VERSIONED_ATTRS // Used as action->action_details if action->rsc is not NULL typedef struct pe_rsc_action_details_s { xmlNode *versioned_parameters; xmlNode *versioned_meta; } pe_rsc_action_details_t; #endif struct pe_action_s { int id; int priority; pe_resource_t *rsc; pe_node_t *node; xmlNode *op_entry; char *task; char *uuid; char *cancel_task; char *reason; enum pe_action_flags flags; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; GHashTable *meta; GHashTable *extra; /* * These two varables are associated with the constraint logic * that involves first having one or more actions runnable before * then allowing this action to execute. * * These varables are used with features such as 'clone-min' which * requires at minimum X number of cloned instances to be running * before an order dependency can run. Another option that uses * this is 'require-all=false' in ordering constrants. This option * says "only require one instance of a resource to start before * allowing dependencies to start" -- basically, require-all=false is * the same as clone-min=1. */ /* current number of known runnable actions in the before list. */ int runnable_before; /* the number of "before" runnable actions required for this action * to be considered runnable */ int required_runnable_before; GListPtr actions_before; /* pe_action_wrapper_t* */ GListPtr actions_after; /* pe_action_wrapper_t* */ /* Some of the above fields could be moved to the details, * except for API backward compatibility. */ void *action_details; // varies by type of action }; typedef struct pe_ticket_s { char *id; gboolean granted; time_t last_granted; gboolean standby; GHashTable *state; } pe_ticket_t; typedef struct pe_tag_s { char *id; GListPtr refs; } pe_tag_t; enum pe_link_state { pe_link_not_dumped, pe_link_dumped, pe_link_dup, }; enum pe_discover_e { pe_discover_always = 0, pe_discover_never, pe_discover_exclusive, }; /* *INDENT-OFF* */ enum pe_ordering { pe_order_none = 0x0, /* deleted */ pe_order_optional = 0x1, /* pure ordering, nothing implied */ pe_order_apply_first_non_migratable = 0x2, /* Only apply this constraint's ordering if first is not migratable. */ pe_order_implies_first = 0x10, /* If 'then' is required, ensure 'first' is too */ pe_order_implies_then = 0x20, /* If 'first' is required, ensure 'then' is too */ pe_order_implies_first_master = 0x40, /* Imply 'first' is required when 'then' is required and then's rsc holds Master role. */ /* first requires then to be both runnable and migrate runnable. */ pe_order_implies_first_migratable = 0x80, pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */ pe_order_pseudo_left = 0x200, /* 'then' can only be pseudo if 'first' is runnable */ pe_order_implies_then_on_node = 0x400, /* If 'first' is required on 'nodeX', * ensure instances of 'then' on 'nodeX' are too. * Only really useful if 'then' is a clone and 'first' is not */ pe_order_probe = 0x800, /* If 'first->rsc' is * - running but about to stop, ignore the constraint * - otherwise, behave as runnable_left */ pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */ pe_order_stonith_stop = 0x2000, /* only applies if the action is non-pseudo */ pe_order_serialize_only = 0x4000, /* serialize */ pe_order_same_node = 0x8000, /* applies only if 'first' and 'then' are on same node */ pe_order_implies_first_printed = 0x10000, /* Like ..implies_first but only ensures 'first' is printed, not mandatory */ pe_order_implies_then_printed = 0x20000, /* Like ..implies_then but only ensures 'then' is printed, not mandatory */ pe_order_asymmetrical = 0x100000, /* Indicates asymmetrical one way ordering constraint. */ pe_order_load = 0x200000, /* Only relevant if... */ pe_order_one_or_more = 0x400000, /* 'then' is runnable only if one or more of its dependencies are too */ pe_order_anti_colocation = 0x800000, pe_order_preserve = 0x1000000, /* Hack for breaking user ordering constraints with container resources */ pe_order_then_cancels_first = 0x2000000, // if 'then' becomes required, 'first' becomes optional pe_order_trace = 0x4000000, /* test marker */ }; /* *INDENT-ON* */ typedef struct pe_action_wrapper_s { enum pe_ordering type; enum pe_link_state state; pe_action_t *action; } pe_action_wrapper_t; const char *rsc_printable_id(pe_resource_t *rsc); gboolean cluster_status(pe_working_set_t * data_set); pe_working_set_t *pe_new_working_set(void); void pe_free_working_set(pe_working_set_t *data_set); void set_working_set_defaults(pe_working_set_t * data_set); void cleanup_calculations(pe_working_set_t * data_set); void pe_reset_working_set(pe_working_set_t *data_set); pe_resource_t *pe_find_resource(GListPtr rsc_list, const char *id_rh); pe_resource_t *pe_find_resource_with_flags(GListPtr rsc_list, const char *id, enum pe_find flags); pe_node_t *pe_find_node(GListPtr node_list, const char *uname); pe_node_t *pe_find_node_id(GListPtr node_list, const char *id); pe_node_t *pe_find_node_any(GListPtr node_list, const char *id, const char *uname); GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set); int pe_bundle_replicas(const pe_resource_t *rsc); #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *pe_rsc_action_details(pe_action_t *action); #endif /*! * \brief Check whether a resource is any clone type * * \param[in] rsc Resource to check * * \return TRUE if resource is clone, FALSE otherwise */ static inline bool pe_rsc_is_clone(pe_resource_t *rsc) { return rsc && (rsc->variant == pe_clone); } /*! * \brief Check whether a resource is a globally unique clone * * \param[in] rsc Resource to check * * \return TRUE if resource is unique clone, FALSE otherwise */ static inline bool pe_rsc_is_unique_clone(pe_resource_t *rsc) { return pe_rsc_is_clone(rsc) && is_set(rsc->flags, pe_rsc_unique); } /*! * \brief Check whether a resource is an anonymous clone * * \param[in] rsc Resource to check * * \return TRUE if resource is anonymous clone, FALSE otherwise */ static inline bool pe_rsc_is_anon_clone(pe_resource_t *rsc) { return pe_rsc_is_clone(rsc) && is_not_set(rsc->flags, pe_rsc_unique); } /*! * \brief Check whether a resource is part of a bundle * * \param[in] rsc Resource to check * * \return TRUE if resource is part of a bundle, FALSE otherwise */ static inline bool pe_rsc_is_bundled(pe_resource_t *rsc) { return uber_parent(rsc)->parent != NULL; } // Deprecated type aliases typedef struct pe_action_s action_t; //!< \deprecated Use pe_action_t instead typedef struct pe_action_wrapper_s action_wrapper_t; //!< \deprecated Use pe_action_wrapper_t instead typedef struct pe_node_s node_t; //!< \deprecated Use pe_node_t instead typedef struct pe_resource_s resource_t; //!< \deprecated Use pe_resource_t instead typedef struct pe_tag_s tag_t; //!< \deprecated Use pe_tag_t instead typedef struct pe_ticket_s ticket_t; //!< \deprecated Use pe_ticket_t instead typedef enum pe_quorum_policy no_quorum_policy_t; //!< \deprecated Use enum pe_quorum_policy instead #ifdef __cplusplus } #endif #endif diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am index f54e6b94fd..b13488cf95 100644 --- a/lib/pengine/Makefile.am +++ b/lib/pengine/Makefile.am @@ -1,33 +1,33 @@ # # Copyright 2004-2018 Andrew Beekhof # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/Makefile.common ## libraries lib_LTLIBRARIES = libpe_rules.la libpe_status.la ## SOURCES noinst_HEADERS = unpack.h variant.h pe_status_private.h libpe_rules_la_LDFLAGS = -version-info 26:0:0 libpe_rules_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpe_rules_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpe_rules_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la libpe_rules_la_SOURCES = rules.c rules_alerts.c common.c -libpe_status_la_LDFLAGS = -version-info 27:0:0 +libpe_status_la_LDFLAGS = -version-info 28:0:0 libpe_status_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpe_status_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpe_status_la_LIBADD = @CURSESLIBS@ $(top_builddir)/lib/common/libcrmcommon.la libpe_status_la_SOURCES = status.c unpack.c utils.c complex.c native.c container.c \ group.c clone.c rules.c common.c failcounts.c remote.c clean-generic: rm -f *.log *.debug *~ diff --git a/lib/pengine/container.c b/lib/pengine/container.c index 275a41b196..7f6b226b87 100644 --- a/lib/pengine/container.c +++ b/lib/pengine/container.c @@ -1,1633 +1,1628 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #define VARIANT_CONTAINER 1 #include "./variant.h" void tuple_free(container_grouping_t *tuple); static char * next_ip(const char *last_ip) { unsigned int oct1 = 0; unsigned int oct2 = 0; unsigned int oct3 = 0; unsigned int oct4 = 0; int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4); if (rc != 4) { /*@ TODO check for IPv6 */ return NULL; } else if (oct3 > 253) { return NULL; } else if (oct4 > 253) { ++oct3; oct4 = 1; } else { ++oct4; } return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4); } static int allocate_ip(container_variant_data_t *data, container_grouping_t *tuple, char *buffer, int max) { if(data->ip_range_start == NULL) { return 0; } else if(data->ip_last) { tuple->ipaddr = next_ip(data->ip_last); } else { tuple->ipaddr = strdup(data->ip_range_start); } data->ip_last = tuple->ipaddr; #if 0 return snprintf(buffer, max, " --add-host=%s-%d:%s --link %s-docker-%d:%s-link-%d", data->prefix, tuple->offset, tuple->ipaddr, data->prefix, tuple->offset, data->prefix, tuple->offset); #else if (data->type == PE_CONTAINER_TYPE_DOCKER || data->type == PE_CONTAINER_TYPE_PODMAN) { if (data->add_host == FALSE) { return 0; } return snprintf(buffer, max, " --add-host=%s-%d:%s", data->prefix, tuple->offset, tuple->ipaddr); } else if (data->type == PE_CONTAINER_TYPE_RKT) { return snprintf(buffer, max, " --hosts-entry=%s=%s-%d", tuple->ipaddr, data->prefix, tuple->offset); } else { return 0; } #endif } static xmlNode * create_resource(const char *name, const char *provider, const char *kind) { xmlNode *rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, name); crm_xml_add(rsc, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF); crm_xml_add(rsc, XML_AGENT_ATTR_PROVIDER, provider); crm_xml_add(rsc, XML_ATTR_TYPE, kind); return rsc; } /*! * \internal * \brief Check whether cluster can manage resource inside container * * \param[in] data Container variant data * * \return TRUE if networking configuration is acceptable, FALSE otherwise * * \note The resource is manageable if an IP range or control port has been * specified. If a control port is used without an IP range, replicas per * host must be 1. */ static bool valid_network(container_variant_data_t *data) { if(data->ip_range_start) { return TRUE; } if(data->control_port) { if(data->replicas_per_host > 1) { pe_err("Specifying the 'control-port' for %s requires 'replicas-per-host=1'", data->prefix); data->replicas_per_host = 1; /* @TODO to be sure: clear_bit(rsc->flags, pe_rsc_unique); */ } return TRUE; } return FALSE; } static bool create_ip_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { if(data->ip_range_start) { char *id = NULL; xmlNode *xml_ip = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-ip-%s", data->prefix, tuple->ipaddr); crm_xml_sanitize_id(id); xml_ip = create_resource(id, "heartbeat", "IPaddr2"); free(id); xml_obj = create_xml_node(xml_ip, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset); crm_create_nvpair_xml(xml_obj, NULL, "ip", tuple->ipaddr); if(data->host_network) { crm_create_nvpair_xml(xml_obj, NULL, "nic", data->host_network); } if(data->host_netmask) { crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", data->host_netmask); } else { crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", "32"); } xml_obj = create_xml_node(xml_ip, "operations"); crm_create_op_xml(xml_obj, ID(xml_ip), "monitor", "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? - crm_log_xml_trace(xml_ip, "Container-ip"); if (common_unpack(xml_ip, &tuple->ip, parent, data_set) == false) { return FALSE; } parent->children = g_list_append(parent->children, tuple->ip); } return TRUE; } static bool create_docker_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { int offset = 0, max = 4096; char *buffer = calloc(1, max+1); int doffset = 0, dmax = 1024; char *dbuffer = calloc(1, dmax+1); char *id = NULL; xmlNode *xml_docker = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-docker-%d", data->prefix, tuple->offset); crm_xml_sanitize_id(id); xml_docker = create_resource(id, "heartbeat", "docker"); free(id); xml_obj = create_xml_node(xml_docker, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset); crm_create_nvpair_xml(xml_obj, NULL, "image", data->image); crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", XML_BOOLEAN_TRUE); crm_create_nvpair_xml(xml_obj, NULL, "force_kill", XML_BOOLEAN_FALSE); crm_create_nvpair_xml(xml_obj, NULL, "reuse", XML_BOOLEAN_FALSE); offset += snprintf(buffer+offset, max-offset, " --restart=no"); /* Set a container hostname only if we have an IP to map it to. * The user can set -h or --uts=host themselves if they want a nicer * name for logs, but this makes applications happy who need their * hostname to match the IP they bind to. */ if (data->ip_range_start != NULL) { offset += snprintf(buffer+offset, max-offset, " -h %s-%d", data->prefix, tuple->offset); } offset += snprintf(buffer+offset, max-offset, " -e PCMK_stderr=1"); if(data->docker_network) { // offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", tuple->ipaddr); offset += snprintf(buffer+offset, max-offset, " --net=%s", data->docker_network); } if(data->control_port) { offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%s", data->control_port); } else { offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%d", DEFAULT_REMOTE_PORT); } for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) { container_mount_t *mount = pIter->data; if(mount->flags) { char *source = crm_strdup_printf( "%s/%s-%d", mount->source, data->prefix, tuple->offset); if(doffset > 0) { doffset += snprintf(dbuffer+doffset, dmax-doffset, ","); } doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source); offset += snprintf(buffer+offset, max-offset, " -v %s:%s", source, mount->target); free(source); } else { offset += snprintf(buffer+offset, max-offset, " -v %s:%s", mount->source, mount->target); } if(mount->options) { offset += snprintf(buffer+offset, max-offset, ":%s", mount->options); } } for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) { container_port_t *port = pIter->data; if(tuple->ipaddr) { offset += snprintf(buffer+offset, max-offset, " -p %s:%s:%s", tuple->ipaddr, port->source, port->target); } else if(safe_str_neq(data->docker_network, "host")) { // No need to do port mapping if net=host offset += snprintf(buffer+offset, max-offset, " -p %s:%s", port->source, port->target); } } if(data->docker_run_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_run_options); } if(data->docker_host_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_host_options); } crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer); free(buffer); crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer); free(dbuffer); if(tuple->child) { if(data->docker_run_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->docker_run_command); } else { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", SBIN_DIR "/pacemaker-remoted"); } /* TODO: Allow users to specify their own? * * We just want to know if the container is alive, we'll * monitor the child independently */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); /* } else if(child && data->untrusted) { * Support this use-case? * * The ability to have resources started/stopped by us, but * unable to set attributes, etc. * * Arguably better to control API access this with ACLs like * "normal" remote nodes * * crm_create_nvpair_xml(xml_obj, NULL, * "run_cmd", * "/usr/libexec/pacemaker/pacemaker-execd"); * crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", * "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke"); */ } else { if(data->docker_run_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->docker_run_command); } /* TODO: Allow users to specify their own? * * We don't know what's in the container, so we just want * to know if it is alive */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); } xml_obj = create_xml_node(xml_docker, "operations"); crm_create_op_xml(xml_obj, ID(xml_docker), "monitor", "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? - crm_log_xml_trace(xml_docker, "Container-docker"); if (common_unpack(xml_docker, &tuple->docker, parent, data_set) == FALSE) { return FALSE; } parent->children = g_list_append(parent->children, tuple->docker); return TRUE; } static bool create_podman_resource(resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { int offset = 0, max = 4096; char *buffer = calloc(1, max+1); int doffset = 0, dmax = 1024; char *dbuffer = calloc(1, dmax+1); char *id = NULL; xmlNode *xml_podman = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-podman-%d", data->prefix, tuple->offset); crm_xml_sanitize_id(id); xml_podman = create_resource(id, "heartbeat", "podman"); free(id); xml_obj = create_xml_node(xml_podman, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset); crm_create_nvpair_xml(xml_obj, NULL, "image", data->image); crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", XML_BOOLEAN_TRUE); crm_create_nvpair_xml(xml_obj, NULL, "force_kill", XML_BOOLEAN_FALSE); crm_create_nvpair_xml(xml_obj, NULL, "reuse", XML_BOOLEAN_FALSE); // FIXME: (bandini 2018-08) podman has no restart policies //offset += snprintf(buffer+offset, max-offset, " --restart=no"); /* Set a container hostname only if we have an IP to map it to. * The user can set -h or --uts=host themselves if they want a nicer * name for logs, but this makes applications happy who need their * hostname to match the IP they bind to. */ if (data->ip_range_start != NULL) { offset += snprintf(buffer+offset, max-offset, " -h %s-%d", data->prefix, tuple->offset); } offset += snprintf(buffer+offset, max-offset, " -e PCMK_stderr=1"); if(data->docker_network) { // FIXME: (bandini 2018-08) podman has no support for --link-local-ip //offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", tuple->ipaddr); offset += snprintf(buffer+offset, max-offset, " --net=%s", data->docker_network); } if(data->control_port) { offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%s", data->control_port); } else { offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%d", DEFAULT_REMOTE_PORT); } for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) { container_mount_t *mount = pIter->data; if(mount->flags) { char *source = crm_strdup_printf( "%s/%s-%d", mount->source, data->prefix, tuple->offset); if(doffset > 0) { doffset += snprintf(dbuffer+doffset, dmax-doffset, ","); } doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source); offset += snprintf(buffer+offset, max-offset, " -v %s:%s", source, mount->target); free(source); } else { offset += snprintf(buffer+offset, max-offset, " -v %s:%s", mount->source, mount->target); } if(mount->options) { offset += snprintf(buffer+offset, max-offset, ":%s", mount->options); } } for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) { container_port_t *port = pIter->data; if(tuple->ipaddr) { offset += snprintf(buffer+offset, max-offset, " -p %s:%s:%s", tuple->ipaddr, port->source, port->target); } else if(safe_str_neq(data->docker_network, "host")) { // No need to do port mapping if net=host offset += snprintf(buffer+offset, max-offset, " -p %s:%s", port->source, port->target); } } if(data->docker_run_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_run_options); } if(data->docker_host_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_host_options); } crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer); free(buffer); crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer); free(dbuffer); if(tuple->child) { if(data->docker_run_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->docker_run_command); } else { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", SBIN_DIR "/pacemaker-remoted"); } /* TODO: Allow users to specify their own? * * We just want to know if the container is alive, we'll * monitor the child independently */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); /* } else if(child && data->untrusted) { * Support this use-case? * * The ability to have resources started/stopped by us, but * unable to set attributes, etc. * * Arguably better to control API access this with ACLs like * "normal" remote nodes * * crm_create_nvpair_xml(xml_obj, NULL, * "run_cmd", * "/usr/libexec/pacemaker/pacemaker-execd"); * crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", * "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke"); */ } else { if(data->docker_run_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->docker_run_command); } /* TODO: Allow users to specify their own? * * We don't know what's in the container, so we just want * to know if it is alive */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); } xml_obj = create_xml_node(xml_podman, "operations"); crm_create_op_xml(xml_obj, ID(xml_podman), "monitor", "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? - crm_log_xml_trace(xml_podman, "Container-podman"); if (common_unpack(xml_podman, &tuple->docker, parent, data_set) == FALSE) { return FALSE; } parent->children = g_list_append(parent->children, tuple->docker); return TRUE; } static bool create_rkt_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { int offset = 0, max = 4096; char *buffer = calloc(1, max+1); int doffset = 0, dmax = 1024; char *dbuffer = calloc(1, dmax+1); char *id = NULL; xmlNode *xml_docker = NULL; xmlNode *xml_obj = NULL; int volid = 0; id = crm_strdup_printf("%s-rkt-%d", data->prefix, tuple->offset); crm_xml_sanitize_id(id); xml_docker = create_resource(id, "heartbeat", "rkt"); free(id); xml_obj = create_xml_node(xml_docker, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset); crm_create_nvpair_xml(xml_obj, NULL, "image", data->image); crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", "true"); crm_create_nvpair_xml(xml_obj, NULL, "force_kill", "false"); crm_create_nvpair_xml(xml_obj, NULL, "reuse", "false"); /* Set a container hostname only if we have an IP to map it to. * The user can set -h or --uts=host themselves if they want a nicer * name for logs, but this makes applications happy who need their * hostname to match the IP they bind to. */ if (data->ip_range_start != NULL) { offset += snprintf(buffer+offset, max-offset, " --hostname=%s-%d", data->prefix, tuple->offset); } offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_stderr=1"); if(data->docker_network) { // offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", tuple->ipaddr); offset += snprintf(buffer+offset, max-offset, " --net=%s", data->docker_network); } if(data->control_port) { offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_remote_port=%s", data->control_port); } else { offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_remote_port=%d", DEFAULT_REMOTE_PORT); } for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) { container_mount_t *mount = pIter->data; if(mount->flags) { char *source = crm_strdup_printf( "%s/%s-%d", mount->source, data->prefix, tuple->offset); if(doffset > 0) { doffset += snprintf(dbuffer+doffset, dmax-doffset, ","); } doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source); offset += snprintf(buffer+offset, max-offset, " --volume vol%d,kind=host,source=%s", volid, source); if(mount->options) { offset += snprintf(buffer+offset, max-offset, ",%s", mount->options); } offset += snprintf(buffer+offset, max-offset, " --mount volume=vol%d,target=%s", volid, mount->target); free(source); } else { offset += snprintf(buffer+offset, max-offset, " --volume vol%d,kind=host,source=%s", volid, mount->source); if(mount->options) { offset += snprintf(buffer+offset, max-offset, ",%s", mount->options); } offset += snprintf(buffer+offset, max-offset, " --mount volume=vol%d,target=%s", volid, mount->target); } volid++; } for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) { container_port_t *port = pIter->data; if(tuple->ipaddr) { offset += snprintf(buffer+offset, max-offset, " --port=%s:%s:%s", port->target, tuple->ipaddr, port->source); } else { offset += snprintf(buffer+offset, max-offset, " --port=%s:%s", port->target, port->source); } } if(data->docker_run_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_run_options); } if(data->docker_host_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_host_options); } crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer); free(buffer); crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer); free(dbuffer); if(tuple->child) { if(data->docker_run_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->docker_run_command); } else { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", SBIN_DIR "/pacemaker-remoted"); } /* TODO: Allow users to specify their own? * * We just want to know if the container is alive, we'll * monitor the child independently */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); /* } else if(child && data->untrusted) { * Support this use-case? * * The ability to have resources started/stopped by us, but * unable to set attributes, etc. * * Arguably better to control API access this with ACLs like * "normal" remote nodes * * crm_create_nvpair_xml(xml_obj, NULL, * "run_cmd", * "/usr/libexec/pacemaker/pacemaker-execd"); * crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", * "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke"); */ } else { if(data->docker_run_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->docker_run_command); } /* TODO: Allow users to specify their own? * * We don't know what's in the container, so we just want * to know if it is alive */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); } xml_obj = create_xml_node(xml_docker, "operations"); crm_create_op_xml(xml_obj, ID(xml_docker), "monitor", "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? - crm_log_xml_trace(xml_docker, "Container-rkt"); if (common_unpack(xml_docker, &tuple->docker, parent, data_set) == FALSE) { return FALSE; } parent->children = g_list_append(parent->children, tuple->docker); return TRUE; } /*! * \brief Ban a node from a resource's (and its children's) allowed nodes list * * \param[in,out] rsc Resource to modify * \param[in] uname Name of node to ban */ static void disallow_node(resource_t *rsc, const char *uname) { gpointer match = g_hash_table_lookup(rsc->allowed_nodes, uname); if (match) { ((pe_node_t *) match)->weight = -INFINITY; ((pe_node_t *) match)->rsc_discover_mode = pe_discover_never; } if (rsc->children) { GListPtr child; for (child = rsc->children; child != NULL; child = child->next) { disallow_node((resource_t *) (child->data), uname); } } } static bool create_remote_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { if (tuple->child && valid_network(data)) { GHashTableIter gIter; GListPtr rsc_iter = NULL; node_t *node = NULL; xmlNode *xml_remote = NULL; char *id = crm_strdup_printf("%s-%d", data->prefix, tuple->offset); char *port_s = NULL; const char *uname = NULL; const char *connect_name = NULL; if (remote_id_conflict(id, data_set)) { free(id); // The biggest hammer we have id = crm_strdup_printf("pcmk-internal-%s-remote-%d", tuple->child->id, tuple->offset); CRM_ASSERT(remote_id_conflict(id, data_set) == FALSE); } /* REMOTE_CONTAINER_HACK: Using "#uname" as the server name when the * connection does not have its own IP is a magic string that we use to * support nested remotes (i.e. a bundle running on a remote node). */ connect_name = (tuple->ipaddr? tuple->ipaddr : "#uname"); if (data->control_port == NULL) { port_s = crm_itoa(DEFAULT_REMOTE_PORT); } /* This sets tuple->docker as tuple->remote's container, which is * similar to what happens with guest nodes. This is how the PE knows * that the bundle node is fenced by recovering docker, and that * remote should be ordered relative to docker. */ xml_remote = pe_create_remote_xml(NULL, id, tuple->docker->id, NULL, NULL, NULL, connect_name, (data->control_port? data->control_port : port_s)); free(port_s); /* Abandon our created ID, and pull the copy from the XML, because we * need something that will get freed during data set cleanup to use as * the node ID and uname. */ free(id); id = NULL; uname = ID(xml_remote); /* Ensure a node has been created for the guest (it may have already * been, if it has a permanent node attribute), and ensure its weight is * -INFINITY so no other resources can run on it. */ node = pe_find_node(data_set->nodes, uname); if (node == NULL) { node = pe_create_node(uname, uname, "remote", "-INFINITY", data_set); } else { node->weight = -INFINITY; } node->rsc_discover_mode = pe_discover_never; /* unpack_remote_nodes() ensures that each remote node and guest node * has a pe_node_t entry. Ideally, it would do the same for bundle nodes. * Unfortunately, a bundle has to be mostly unpacked before it's obvious * what nodes will be needed, so we do it just above. * * Worse, that means that the node may have been utilized while * unpacking other resources, without our weight correction. The most * likely place for this to happen is when common_unpack() calls * resource_location() to set a default score in symmetric clusters. * This adds a node *copy* to each resource's allowed nodes, and these * copies will have the wrong weight. * * As a hacky workaround, fix those copies here. * * @TODO Possible alternative: ensure bundles are unpacked before other * resources, so the weight is correct before any copies are made. */ for (rsc_iter = data_set->resources; rsc_iter; rsc_iter = rsc_iter->next) { disallow_node((resource_t *) (rsc_iter->data), uname); } tuple->node = node_copy(node); tuple->node->weight = 500; tuple->node->rsc_discover_mode = pe_discover_exclusive; /* Ensure the node shows up as allowed and with the correct discovery set */ if (tuple->child->allowed_nodes != NULL) { g_hash_table_destroy(tuple->child->allowed_nodes); } tuple->child->allowed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); g_hash_table_insert(tuple->child->allowed_nodes, (gpointer) tuple->node->details->id, node_copy(tuple->node)); { node_t *copy = node_copy(tuple->node); copy->weight = -INFINITY; g_hash_table_insert(tuple->child->parent->allowed_nodes, (gpointer) tuple->node->details->id, copy); } - crm_log_xml_trace(xml_remote, "Container-remote"); if (common_unpack(xml_remote, &tuple->remote, parent, data_set) == FALSE) { return FALSE; } g_hash_table_iter_init(&gIter, tuple->remote->allowed_nodes); while (g_hash_table_iter_next(&gIter, NULL, (void **)&node)) { if(is_remote_node(node)) { /* Remote resources can only run on 'normal' cluster node */ node->weight = -INFINITY; } } tuple->node->details->remote_rsc = tuple->remote; tuple->remote->container = tuple->docker; // Ensures is_container_remote_node() functions correctly immediately /* A bundle's #kind is closer to "container" (guest node) than the * "remote" set by pe_create_node(). */ g_hash_table_insert(tuple->node->details->attrs, strdup(CRM_ATTR_KIND), strdup("container")); /* One effect of this is that setup_container() will add * tuple->remote to tuple->docker's fillers, which will make * rsc_contains_remote_node() true for tuple->docker. * * tuple->child does NOT get added to tuple->docker's fillers. * The only noticeable effect if it did would be for its fail count to * be taken into account when checking tuple->docker's migration * threshold. */ parent->children = g_list_append(parent->children, tuple->remote); } return TRUE; } static bool create_container( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { if (data->type == PE_CONTAINER_TYPE_DOCKER && create_docker_resource(parent, data, tuple, data_set) == FALSE) { return FALSE; } if (data->type == PE_CONTAINER_TYPE_PODMAN && create_podman_resource(parent, data, tuple, data_set) == FALSE) { return FALSE; } if (data->type == PE_CONTAINER_TYPE_RKT && create_rkt_resource(parent, data, tuple, data_set) == FALSE) { return FALSE; } if(create_ip_resource(parent, data, tuple, data_set) == FALSE) { return FALSE; } if(create_remote_resource(parent, data, tuple, data_set) == FALSE) { return FALSE; } if(tuple->child && tuple->ipaddr) { add_hash_param(tuple->child->meta, "external-ip", tuple->ipaddr); } if(tuple->remote) { /* * Allow the remote connection resource to be allocated to a * different node than the one on which the docker container * is active. * * This makes it possible to have Pacemaker Remote nodes running * containers with pacemaker-remoted inside in order to start * services inside those containers. */ set_bit(tuple->remote->flags, pe_rsc_allow_remote_remotes); } return TRUE; } static void mount_add(container_variant_data_t *container_data, const char *source, const char *target, const char *options, int flags) { container_mount_t *mount = calloc(1, sizeof(container_mount_t)); mount->source = strdup(source); mount->target = strdup(target); if (options) { mount->options = strdup(options); } mount->flags = flags; container_data->mounts = g_list_append(container_data->mounts, mount); } static void mount_free(container_mount_t *mount) { free(mount->source); free(mount->target); free(mount->options); free(mount); } static void port_free(container_port_t *port) { free(port->source); free(port->target); free(port); } static container_grouping_t * tuple_for_remote(resource_t *remote) { resource_t *top = remote; container_variant_data_t *container_data = NULL; if (top == NULL) { return NULL; } while (top->parent != NULL) { top = top->parent; } get_container_variant_data(container_data, top); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; if(tuple->remote == remote) { return tuple; } } CRM_LOG_ASSERT(FALSE); return NULL; } bool container_fix_remote_addr(resource_t *rsc) { const char *name; const char *value; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; const char *value_list[] = { "remote", PCMK_RESOURCE_CLASS_OCF, "pacemaker" }; if(rsc == NULL) { return FALSE; } name = "addr"; value = g_hash_table_lookup(rsc->parameters, name); if (safe_str_eq(value, "#uname") == FALSE) { return FALSE; } for (int lpc = 0; lpc < DIMOF(attr_list); lpc++) { value = crm_element_value(rsc->xml, attr_list[lpc]); if (safe_str_eq(value, value_list[lpc]) == FALSE) { return FALSE; } } return TRUE; } const char * container_fix_remote_addr_in(resource_t *rsc, xmlNode *xml, const char *field) { // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside pe_node_t *node = NULL; container_grouping_t *tuple = NULL; if(container_fix_remote_addr(rsc) == FALSE) { return NULL; } tuple = tuple_for_remote(rsc); if(tuple == NULL) { return NULL; } node = tuple->docker->allocated_to; if (node == NULL) { /* If it won't be running anywhere after the * transition, go with where it's running now. */ node = pe__current_node(tuple->docker); } if(node == NULL) { crm_trace("Cannot determine address for bundle connection %s", rsc->id); return NULL; } crm_trace("Setting address for bundle connection %s to bundle host %s", rsc->id, node->details->uname); if(xml != NULL && field != NULL) { crm_xml_add(xml, field, node->details->uname); } return node->details->uname; } gboolean container_unpack(resource_t * rsc, pe_working_set_t * data_set) { const char *value = NULL; xmlNode *xml_obj = NULL; xmlNode *xml_resource = NULL; container_variant_data_t *container_data = NULL; CRM_ASSERT(rsc != NULL); pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); container_data = calloc(1, sizeof(container_variant_data_t)); rsc->variant_opaque = container_data; container_data->prefix = strdup(rsc->id); xml_obj = first_named_child(rsc->xml, "docker"); if (xml_obj != NULL) { container_data->type = PE_CONTAINER_TYPE_DOCKER; } else { xml_obj = first_named_child(rsc->xml, "rkt"); if (xml_obj != NULL) { container_data->type = PE_CONTAINER_TYPE_RKT; } else { xml_obj = first_named_child(rsc->xml, "podman"); if (xml_obj != NULL) { container_data->type = PE_CONTAINER_TYPE_PODMAN; } else { return FALSE; } } } value = crm_element_value(xml_obj, XML_RSC_ATTR_PROMOTED_MAX); if (value == NULL) { // @COMPAT deprecated since 2.0.0 value = crm_element_value(xml_obj, "masters"); } container_data->promoted_max = crm_parse_int(value, "0"); if (container_data->promoted_max < 0) { pe_err("%s for %s must be nonnegative integer, using 0", XML_RSC_ATTR_PROMOTED_MAX, rsc->id); container_data->promoted_max = 0; } value = crm_element_value(xml_obj, "replicas"); if ((value == NULL) && container_data->promoted_max) { container_data->replicas = container_data->promoted_max; } else { container_data->replicas = crm_parse_int(value, "1"); } if (container_data->replicas < 1) { pe_err("'replicas' for %s must be positive integer, using 1", rsc->id); container_data->replicas = 1; } /* * Communication between containers on the same host via the * floating IPs only works if docker is started with: * --userland-proxy=false --ip-masq=false */ value = crm_element_value(xml_obj, "replicas-per-host"); container_data->replicas_per_host = crm_parse_int(value, "1"); if (container_data->replicas_per_host < 1) { pe_err("'replicas-per-host' for %s must be positive integer, using 1", rsc->id); container_data->replicas_per_host = 1; } if (container_data->replicas_per_host == 1) { clear_bit(rsc->flags, pe_rsc_unique); } container_data->docker_run_command = crm_element_value_copy(xml_obj, "run-command"); container_data->docker_run_options = crm_element_value_copy(xml_obj, "options"); container_data->image = crm_element_value_copy(xml_obj, "image"); container_data->docker_network = crm_element_value_copy(xml_obj, "network"); xml_obj = first_named_child(rsc->xml, "network"); if(xml_obj) { container_data->ip_range_start = crm_element_value_copy(xml_obj, "ip-range-start"); container_data->host_netmask = crm_element_value_copy(xml_obj, "host-netmask"); container_data->host_network = crm_element_value_copy(xml_obj, "host-interface"); container_data->control_port = crm_element_value_copy(xml_obj, "control-port"); value = crm_element_value(xml_obj, "add-host"); if (check_boolean(value) == FALSE) { container_data->add_host = TRUE; } else { crm_str_to_boolean(value, &container_data->add_host); } for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL; xml_child = __xml_next_element(xml_child)) { container_port_t *port = calloc(1, sizeof(container_port_t)); port->source = crm_element_value_copy(xml_child, "port"); if(port->source == NULL) { port->source = crm_element_value_copy(xml_child, "range"); } else { port->target = crm_element_value_copy(xml_child, "internal-port"); } if(port->source != NULL && strlen(port->source) > 0) { if(port->target == NULL) { port->target = strdup(port->source); } container_data->ports = g_list_append(container_data->ports, port); } else { pe_err("Invalid port directive %s", ID(xml_child)); port_free(port); } } } xml_obj = first_named_child(rsc->xml, "storage"); for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL; xml_child = __xml_next_element(xml_child)) { const char *source = crm_element_value(xml_child, "source-dir"); const char *target = crm_element_value(xml_child, "target-dir"); const char *options = crm_element_value(xml_child, "options"); int flags = 0; if (source == NULL) { source = crm_element_value(xml_child, "source-dir-root"); flags = 1; } if (source && target) { mount_add(container_data, source, target, options, flags); } else { pe_err("Invalid mount directive %s", ID(xml_child)); } } xml_obj = first_named_child(rsc->xml, "primitive"); if (xml_obj && valid_network(container_data)) { char *value = NULL; xmlNode *xml_set = NULL; xml_resource = create_xml_node(NULL, XML_CIB_TAG_INCARNATION); /* @COMPAT We no longer use the tag, but we need to keep it as * part of the resource name, so that bundles don't restart in a rolling * upgrade. (It also avoids needing to change regression tests.) */ crm_xml_set_id(xml_resource, "%s-%s", container_data->prefix, (container_data->promoted_max? "master" : (const char *)xml_resource->name)); xml_set = create_xml_node(xml_resource, XML_TAG_META_SETS); crm_xml_set_id(xml_set, "%s-%s-meta", container_data->prefix, xml_resource->name); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_ORDERED, XML_BOOLEAN_TRUE); value = crm_itoa(container_data->replicas); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_INCARNATION_MAX, value); free(value); value = crm_itoa(container_data->replicas_per_host); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_INCARNATION_NODEMAX, value); free(value); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_UNIQUE, (container_data->replicas_per_host > 1)? XML_BOOLEAN_TRUE : XML_BOOLEAN_FALSE); if (container_data->promoted_max) { crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_PROMOTABLE, XML_BOOLEAN_TRUE); value = crm_itoa(container_data->promoted_max); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_PROMOTED_MAX, value); free(value); } //crm_xml_add(xml_obj, XML_ATTR_ID, container_data->prefix); add_node_copy(xml_resource, xml_obj); } else if(xml_obj) { pe_err("Cannot control %s inside %s without either ip-range-start or control-port", rsc->id, ID(xml_obj)); return FALSE; } if(xml_resource) { int lpc = 0; GListPtr childIter = NULL; resource_t *new_rsc = NULL; container_port_t *port = NULL; int offset = 0, max = 1024; char *buffer = NULL; if (common_unpack(xml_resource, &new_rsc, rsc, data_set) == FALSE) { pe_err("Failed unpacking resource %s", ID(rsc->xml)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } return FALSE; } container_data->child = new_rsc; /* Currently, we always map the default authentication key location * into the same location inside the container. * * Ideally, we would respect the host's PCMK_authkey_location, but: * - it may be different on different nodes; * - the actual connection will do extra checking to make sure the key * file exists and is readable, that we can't do here on the DC * - tools such as crm_resource and crm_simulate may not have the same * environment variables as the cluster, causing operation digests to * differ * * Always using the default location inside the container is fine, * because we control the pacemaker_remote environment, and it avoids * having to pass another environment variable to the container. * * @TODO A better solution may be to have only pacemaker_remote use the * environment variable, and have the cluster nodes use a new * cluster option for key location. This would introduce the limitation * of the location being the same on all cluster nodes, but that's * reasonable. */ mount_add(container_data, DEFAULT_REMOTE_KEY_LOCATION, DEFAULT_REMOTE_KEY_LOCATION, NULL, 0); mount_add(container_data, CRM_BUNDLE_DIR, "/var/log", NULL, 1); port = calloc(1, sizeof(container_port_t)); if(container_data->control_port) { port->source = strdup(container_data->control_port); } else { /* If we wanted to respect PCMK_remote_port, we could use * crm_default_remote_port() here and elsewhere in this file instead * of DEFAULT_REMOTE_PORT. * * However, it gains nothing, since we control both the container * environment and the connection resource parameters, and the user * can use a different port if desired by setting control-port. */ port->source = crm_itoa(DEFAULT_REMOTE_PORT); } port->target = strdup(port->source); container_data->ports = g_list_append(container_data->ports, port); buffer = calloc(1, max+1); for(childIter = container_data->child->children; childIter != NULL; childIter = childIter->next) { container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t)); tuple->child = childIter->data; tuple->child->exclusive_discover = TRUE; tuple->offset = lpc++; // Ensure the child's notify gets set based on the underlying primitive's value if(is_set(tuple->child->flags, pe_rsc_notify)) { set_bit(container_data->child->flags, pe_rsc_notify); } offset += allocate_ip(container_data, tuple, buffer+offset, max-offset); container_data->tuples = g_list_append(container_data->tuples, tuple); container_data->attribute_target = g_hash_table_lookup(tuple->child->meta, XML_RSC_ATTR_TARGET); } container_data->docker_host_options = buffer; if(container_data->attribute_target) { g_hash_table_replace(rsc->meta, strdup(XML_RSC_ATTR_TARGET), strdup(container_data->attribute_target)); g_hash_table_replace(container_data->child->meta, strdup(XML_RSC_ATTR_TARGET), strdup(container_data->attribute_target)); } } else { // Just a naked container, no pacemaker-remote int offset = 0, max = 1024; char *buffer = calloc(1, max+1); for(int lpc = 0; lpc < container_data->replicas; lpc++) { container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t)); tuple->offset = lpc; offset += allocate_ip(container_data, tuple, buffer+offset, max-offset); container_data->tuples = g_list_append(container_data->tuples, tuple); } container_data->docker_host_options = buffer; } for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; if (create_container(rsc, container_data, tuple, data_set) == FALSE) { pe_err("Failed unpacking resource %s", rsc->id); rsc->fns->free(rsc); return FALSE; } } if(container_data->child) { rsc->children = g_list_append(rsc->children, container_data->child); } return TRUE; } static int tuple_rsc_active(resource_t *rsc, gboolean all) { if (rsc) { gboolean child_active = rsc->fns->active(rsc, all); if (child_active && !all) { return TRUE; } else if (!child_active && all) { return FALSE; } } return -1; } gboolean container_active(resource_t * rsc, gboolean all) { container_variant_data_t *container_data = NULL; GListPtr iter = NULL; get_container_variant_data(container_data, rsc); for (iter = container_data->tuples; iter != NULL; iter = iter->next) { container_grouping_t *tuple = (container_grouping_t *)(iter->data); int rsc_active; rsc_active = tuple_rsc_active(tuple->ip, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } rsc_active = tuple_rsc_active(tuple->child, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } rsc_active = tuple_rsc_active(tuple->docker, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } rsc_active = tuple_rsc_active(tuple->remote, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } } /* If "all" is TRUE, we've already checked that no resources were inactive, * so return TRUE; if "all" is FALSE, we didn't find any active resources, * so return FALSE. */ return all; } /*! * \internal * \brief Find the container child corresponding to a given node * * \param[in] bundle Top-level bundle resource * \param[in] node Node to search for * * \return Container child if found, NULL otherwise */ resource_t * find_container_child(const resource_t *bundle, const node_t *node) { container_variant_data_t *container_data = NULL; CRM_ASSERT(bundle && node); get_container_variant_data(container_data, bundle); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple && tuple->node); if (tuple->node->details == node->details) { return tuple->child; } } return NULL; } static void print_rsc_in_list(resource_t *rsc, const char *pre_text, long options, void *print_data) { if (rsc != NULL) { if (options & pe_print_html) { status_print("
  • "); } rsc->fns->print(rsc, pre_text, options, print_data); if (options & pe_print_html) { status_print("
  • \n"); } } } static const char* container_type_as_string(enum container_type t) { if (t == PE_CONTAINER_TYPE_DOCKER) { return PE_CONTAINER_TYPE_DOCKER_S; } else if (t == PE_CONTAINER_TYPE_RKT) { return PE_CONTAINER_TYPE_RKT_S; } else if (t == PE_CONTAINER_TYPE_PODMAN) { return PE_CONTAINER_TYPE_PODMAN_S; } else { return PE_CONTAINER_TYPE_UNKNOWN_S; } } static void container_print_xml(resource_t * rsc, const char *pre_text, long options, void *print_data) { container_variant_data_t *container_data = NULL; char *child_text = NULL; CRM_CHECK(rsc != NULL, return); if (pre_text == NULL) { pre_text = ""; } child_text = crm_concat(pre_text, " ", ' '); get_container_variant_data(container_data, rsc); status_print("%sid); // Always lowercase the container technology type for use as XML value status_print("type=\""); for (const char *c = container_type_as_string(container_data->type); *c; ++c) { status_print("%c", tolower(*c)); } status_print("\" "); status_print("image=\"%s\" ", container_data->image); status_print("unique=\"%s\" ", is_set(rsc->flags, pe_rsc_unique)? "true" : "false"); status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false"); status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false"); status_print(">\n"); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); status_print("%s \n", pre_text, tuple->offset); print_rsc_in_list(tuple->ip, child_text, options, print_data); print_rsc_in_list(tuple->child, child_text, options, print_data); print_rsc_in_list(tuple->docker, child_text, options, print_data); print_rsc_in_list(tuple->remote, child_text, options, print_data); status_print("%s \n", pre_text); } status_print("%s\n", pre_text); free(child_text); } static void tuple_print(container_grouping_t * tuple, const char *pre_text, long options, void *print_data) { node_t *node = NULL; resource_t *rsc = tuple->child; int offset = 0; char buffer[LINE_MAX]; if(rsc == NULL) { rsc = tuple->docker; } if(tuple->remote) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(tuple->remote)); } else { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(tuple->docker)); } if(tuple->ipaddr) { offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", tuple->ipaddr); } node = pe__current_node(tuple->docker); common_print(rsc, pre_text, buffer, node, options, print_data); } void container_print(resource_t * rsc, const char *pre_text, long options, void *print_data) { container_variant_data_t *container_data = NULL; char *child_text = NULL; CRM_CHECK(rsc != NULL, return); if (options & pe_print_xml) { container_print_xml(rsc, pre_text, options, print_data); return; } get_container_variant_data(container_data, rsc); if (pre_text == NULL) { pre_text = " "; } status_print("%s%s container%s: %s [%s]%s%s\n", pre_text, container_type_as_string(container_data->type), container_data->replicas>1?" set":"", rsc->id, container_data->image, is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); if (options & pe_print_html) { status_print("
    \n
      \n"); } for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if (options & pe_print_html) { status_print("
    • "); } if (is_set(options, pe_print_implicit)) { child_text = crm_strdup_printf(" %s", pre_text); if(g_list_length(container_data->tuples) > 1) { status_print(" %sReplica[%d]\n", pre_text, tuple->offset); } if (options & pe_print_html) { status_print("
      \n
        \n"); } print_rsc_in_list(tuple->ip, child_text, options, print_data); print_rsc_in_list(tuple->docker, child_text, options, print_data); print_rsc_in_list(tuple->remote, child_text, options, print_data); print_rsc_in_list(tuple->child, child_text, options, print_data); if (options & pe_print_html) { status_print("
      \n"); } } else { child_text = crm_strdup_printf("%s ", pre_text); tuple_print(tuple, child_text, options, print_data); } free(child_text); if (options & pe_print_html) { status_print("
    • \n"); } } if (options & pe_print_html) { status_print("
    \n"); } } void tuple_free(container_grouping_t *tuple) { if(tuple == NULL) { return; } if(tuple->node) { free(tuple->node); tuple->node = NULL; } if(tuple->ip) { free_xml(tuple->ip->xml); tuple->ip->xml = NULL; tuple->ip->fns->free(tuple->ip); tuple->ip = NULL; } if(tuple->docker) { free_xml(tuple->docker->xml); tuple->docker->xml = NULL; tuple->docker->fns->free(tuple->docker); tuple->docker = NULL; } if(tuple->remote) { free_xml(tuple->remote->xml); tuple->remote->xml = NULL; tuple->remote->fns->free(tuple->remote); tuple->remote = NULL; } free(tuple->ipaddr); free(tuple); } void container_free(resource_t * rsc) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); pe_rsc_trace(rsc, "Freeing %s", rsc->id); free(container_data->prefix); free(container_data->image); free(container_data->control_port); free(container_data->host_network); free(container_data->host_netmask); free(container_data->ip_range_start); free(container_data->docker_network); free(container_data->docker_run_options); free(container_data->docker_run_command); free(container_data->docker_host_options); g_list_free_full(container_data->tuples, (GDestroyNotify)tuple_free); g_list_free_full(container_data->mounts, (GDestroyNotify)mount_free); g_list_free_full(container_data->ports, (GDestroyNotify)port_free); g_list_free(rsc->children); if(container_data->child) { free_xml(container_data->child->xml); container_data->child->xml = NULL; container_data->child->fns->free(container_data->child); } common_free(rsc); } enum rsc_role_e container_resource_state(const resource_t * rsc, gboolean current) { enum rsc_role_e container_role = RSC_ROLE_UNKNOWN; return container_role; } /*! * \brief Get the number of configured replicas in a bundle * * \param[in] rsc Bundle resource * * \return Number of configured replicas, or 0 on error */ int pe_bundle_replicas(const resource_t *rsc) { if ((rsc == NULL) || (rsc->variant != pe_container)) { return 0; } else { container_variant_data_t *container_data = NULL; get_container_variant_data(container_data, rsc); return container_data->replicas; } } diff --git a/lib/pengine/failcounts.c b/lib/pengine/failcounts.c index ef9f0b9883..8f01c0770d 100644 --- a/lib/pengine/failcounts.c +++ b/lib/pengine/failcounts.c @@ -1,363 +1,375 @@ /* * Copyright 2008-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include static gboolean is_matched_failure(const char *rsc_id, xmlNode *conf_op_xml, xmlNode *lrm_op_xml) { gboolean matched = FALSE; const char *conf_op_name = NULL; const char *lrm_op_task = NULL; const char *conf_op_interval_spec = NULL; guint conf_op_interval_ms = 0; guint lrm_op_interval_ms = 0; const char *lrm_op_id = NULL; char *last_failure_key = NULL; if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) { return FALSE; } // Get name and interval from configured op conf_op_name = crm_element_value(conf_op_xml, "name"); conf_op_interval_spec = crm_element_value(conf_op_xml, XML_LRM_ATTR_INTERVAL); conf_op_interval_ms = crm_parse_interval_spec(conf_op_interval_spec); // Get name and interval from op history entry lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK); crm_element_value_ms(lrm_op_xml, XML_LRM_ATTR_INTERVAL_MS, &lrm_op_interval_ms); if ((conf_op_interval_ms != lrm_op_interval_ms) || safe_str_neq(conf_op_name, lrm_op_task)) { return FALSE; } lrm_op_id = ID(lrm_op_xml); last_failure_key = generate_op_key(rsc_id, "last_failure", 0); if (safe_str_eq(last_failure_key, lrm_op_id)) { matched = TRUE; } else { char *expected_op_key = generate_op_key(rsc_id, conf_op_name, conf_op_interval_ms); if (safe_str_eq(expected_op_key, lrm_op_id)) { int rc = 0; int target_rc = get_target_rc(lrm_op_xml); crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc); if (rc != target_rc) { matched = TRUE; } } free(expected_op_key); } free(last_failure_key); return matched; } static gboolean block_failure(node_t *node, resource_t *rsc, xmlNode *xml_op, pe_working_set_t *data_set) { char *xml_name = clone_strip(rsc->id); /* @TODO This xpath search occurs after template expansion, but it is unable * to properly detect on-fail in id-ref, operation meta-attributes, or * op_defaults, or evaluate rules. * * Also, on-fail defaults to block (in unpack_operation()) for stop actions * when stonith is disabled. * * Ideally, we'd unpack the operation before this point, and pass in a * meta-attributes table that takes all that into consideration. */ char *xpath = crm_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']", xml_name); xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath); gboolean should_block = FALSE; free(xpath); if (xpathObj) { int max = numXpathResults(xpathObj); int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *pref = getXpathResult(xpathObj, lpc); if (xml_op) { should_block = is_matched_failure(xml_name, pref, xml_op); if (should_block) { break; } } else { const char *conf_op_name = NULL; const char *conf_op_interval_spec = NULL; guint conf_op_interval_ms = 0; char *lrm_op_xpath = NULL; xmlXPathObject *lrm_op_xpathObj = NULL; // Get name and interval from configured op conf_op_name = crm_element_value(pref, "name"); conf_op_interval_spec = crm_element_value(pref, XML_LRM_ATTR_INTERVAL); conf_op_interval_ms = crm_parse_interval_spec(conf_op_interval_spec); lrm_op_xpath = crm_strdup_printf("//node_state[@uname='%s']" "//lrm_resource[@id='%s']" "/lrm_rsc_op[@operation='%s'][@interval='%u']", node->details->uname, xml_name, conf_op_name, conf_op_interval_ms); lrm_op_xpathObj = xpath_search(data_set->input, lrm_op_xpath); free(lrm_op_xpath); if (lrm_op_xpathObj) { int max2 = numXpathResults(lrm_op_xpathObj); int lpc2 = 0; for (lpc2 = 0; lpc2 < max2; lpc2++) { xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj, lpc2); should_block = is_matched_failure(xml_name, pref, lrm_op_xml); if (should_block) { break; } } } freeXpathObject(lrm_op_xpathObj); if (should_block) { break; } } } } free(xml_name); freeXpathObject(xpathObj); return should_block; } /*! * \internal * \brief Get resource name as used in failure-related node attributes * * \param[in] rsc Resource to check * * \return Newly allocated string containing resource's fail name * \note The caller is responsible for freeing the result. */ static inline char * rsc_fail_name(resource_t *rsc) { const char *name = (rsc->clone_name? rsc->clone_name : rsc->id); return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } /*! * \internal * \brief Compile regular expression to match a failure-related node attribute * * \param[in] prefix Attribute prefix to match * \param[in] rsc_name Resource name to match as used in failure attributes * \param[in] is_legacy Whether DC uses per-resource fail counts * \param[in] is_unique Whether the resource is a globally unique clone * \param[out] re Where to store resulting regular expression * * \note Fail attributes are named like PREFIX-RESOURCE#OP_INTERVAL. * The caller is responsible for freeing re with regfree(). */ static void generate_fail_regex(const char *prefix, const char *rsc_name, gboolean is_legacy, gboolean is_unique, regex_t *re) { char *pattern; /* @COMPAT DC < 1.1.17: Fail counts used to be per-resource rather than * per-operation. */ const char *op_pattern = (is_legacy? "" : "#.+_[0-9]+"); /* Ignore instance numbers for anything other than globally unique clones. * Anonymous clone fail counts could contain an instance number if the * clone was initially unique, failed, then was converted to anonymous. * @COMPAT Also, before 1.1.8, anonymous clone fail counts always contained * clone instance numbers. */ const char *instance_pattern = (is_unique? "" : "(:[0-9]+)?"); pattern = crm_strdup_printf("^%s-%s%s%s$", prefix, rsc_name, instance_pattern, op_pattern); CRM_LOG_ASSERT(regcomp(re, pattern, REG_EXTENDED|REG_NOSUB) == 0); free(pattern); } /*! * \internal * \brief Compile regular expressions to match failure-related node attributes * * \param[in] rsc Resource being checked for failures * \param[in] data_set Data set (for CRM feature set version) * \param[out] failcount_re Storage for regular expression for fail count * \param[out] lastfailure_re Storage for regular expression for last failure * * \note The caller is responsible for freeing the expressions with regfree(). */ static void generate_fail_regexes(resource_t *rsc, pe_working_set_t *data_set, regex_t *failcount_re, regex_t *lastfailure_re) { char *rsc_name = rsc_fail_name(rsc); const char *version = crm_element_value(data_set->input, XML_ATTR_CRM_VERSION); gboolean is_legacy = (compare_version(version, "3.0.13") < 0); generate_fail_regex(CRM_FAIL_COUNT_PREFIX, rsc_name, is_legacy, is_set(rsc->flags, pe_rsc_unique), failcount_re); generate_fail_regex(CRM_LAST_FAILURE_PREFIX, rsc_name, is_legacy, is_set(rsc->flags, pe_rsc_unique), lastfailure_re); free(rsc_name); } int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set) { char *key = NULL; const char *value = NULL; regex_t failcount_re, lastfailure_re; int failcount = 0; time_t last = 0; GHashTableIter iter; generate_fail_regexes(rsc, data_set, &failcount_re, &lastfailure_re); /* Resource fail count is sum of all matching operation fail counts */ g_hash_table_iter_init(&iter, node->details->attrs); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (regexec(&failcount_re, key, 0, NULL, 0) == 0) { failcount = merge_weights(failcount, char2score(value)); } else if (regexec(&lastfailure_re, key, 0, NULL, 0) == 0) { last = QB_MAX(last, crm_int_helper(value, NULL)); } } regfree(&failcount_re); regfree(&lastfailure_re); if ((failcount > 0) && (last > 0) && (last_failure != NULL)) { *last_failure = last; } /* If failure blocks the resource, disregard any failure timeout */ if ((failcount > 0) && rsc->failure_timeout && block_failure(node, rsc, xml_op, data_set)) { pe_warn("Ignoring failure timeout %d for %s because it conflicts with on-fail=block", rsc->failure_timeout, rsc->id); rsc->failure_timeout = 0; } /* If all failures have expired, ignore fail count */ if (is_set(flags, pe_fc_effective) && (failcount > 0) && (last > 0) && rsc->failure_timeout) { time_t now = get_effective_time(data_set); if (now > (last + rsc->failure_timeout)) { crm_debug("Failcount for %s on %s expired after %ds", rsc->id, node->details->uname, rsc->failure_timeout); failcount = 0; } } - if (is_set(flags, pe_fc_fillers) && rsc->fillers) { + /* We never want the fail counts of a bundle container's fillers to + * count towards the container's fail count. + * + * Most importantly, a Pacemaker Remote connection to a bundle container + * is a filler of the container, but can reside on a different node than the + * container itself. Counting its fail count on its node towards the + * container's fail count on that node could lead to attempting to stop the + * container on the wrong node. + */ + + if (is_set(flags, pe_fc_fillers) && rsc->fillers + && !pe_rsc_is_bundled(rsc)) { + GListPtr gIter = NULL; for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { resource_t *filler = (resource_t *) gIter->data; time_t filler_last_failure = 0; failcount += pe_get_failcount(node, filler, &filler_last_failure, flags, xml_op, data_set); if (last_failure && filler_last_failure > *last_failure) { *last_failure = filler_last_failure; } } if (failcount > 0) { char *score = score2char(failcount); crm_info("Container %s and the resources within it have failed %s times on %s", rsc->id, score, node->details->uname); free(score); } } else if (failcount > 0) { char *score = score2char(failcount); crm_info("%s has failed %s times on %s", rsc->id, score, node->details->uname); free(score); } return failcount; } /*! * \brief Schedule a controller operation to clear a fail count * * \param[in] rsc Resource with failure * \param[in] node Node failure occurred on * \param[in] reason Readable description why needed (for logging) * \param[in] data_set Working set for cluster * * \return Scheduled action */ pe_action_t * pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set) { char *key = NULL; action_t *clear = NULL; CRM_CHECK(rsc && node && reason && data_set, return NULL); key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); clear = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s", rsc->id, node->details->uname, reason, clear->uuid); return clear; } diff --git a/lib/pengine/remote.c b/lib/pengine/remote.c index 4e649a8529..a3d281e41b 100644 --- a/lib/pengine/remote.c +++ b/lib/pengine/remote.c @@ -1,202 +1,250 @@ /* - * Copyright (C) 2013 Andrew Beekhof + * Copyright 2013-2018 Andrew Beekhof * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ + #include #include #include #include #include gboolean is_rsc_baremetal_remote_node(resource_t *rsc, pe_working_set_t * data_set) { node_t *node; if (rsc == NULL) { return FALSE; } else if (rsc->is_remote_node == FALSE) { return FALSE; } node = pe_find_node(data_set->nodes, rsc->id); if (node == NULL) { return FALSE; } return is_baremetal_remote_node(node); } gboolean is_baremetal_remote_node(node_t *node) { if (is_remote_node(node) && (node->details->remote_rsc == NULL || node->details->remote_rsc->container == FALSE)) { return TRUE; } return FALSE; } gboolean is_container_remote_node(node_t *node) { if (is_remote_node(node) && (node->details->remote_rsc && node->details->remote_rsc->container)) { return TRUE; } return FALSE; } gboolean is_remote_node(node_t *node) { if (node && node->details->type == node_remote) { return TRUE; } return FALSE; } resource_t * rsc_contains_remote_node(pe_working_set_t * data_set, resource_t *rsc) { if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { return NULL; } if (rsc->fillers) { GListPtr gIter = NULL; for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { resource_t *filler = (resource_t *) gIter->data; if (filler->is_remote_node) { return filler; } } } return NULL; } gboolean xml_contains_remote_node(xmlNode *xml) { const char *class = crm_element_value(xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); const char *agent = crm_element_value(xml, XML_ATTR_TYPE); if (safe_str_eq(agent, "remote") && safe_str_eq(provider, "pacemaker") && safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF)) { return TRUE; } return FALSE; } /*! * \internal * \brief Execute a supplied function for each guest node running on a host * * \param[in] data_set Working set for cluster * \param[in] host Host node to check * \param[in] helper Function to call for each guest node * \param[in,out] user_data Pointer to pass to helper function */ void pe_foreach_guest_node(const pe_working_set_t *data_set, const node_t *host, void (*helper)(const node_t*, void*), void *user_data) { GListPtr iter; CRM_CHECK(data_set && host && host->details && helper, return); if (!is_set(data_set->flags, pe_flag_have_remote_nodes)) { return; } for (iter = host->details->running_rsc; iter != NULL; iter = iter->next) { resource_t *rsc = (resource_t *) iter->data; if (rsc->is_remote_node && (rsc->container != NULL)) { node_t *guest_node = pe_find_node(data_set->nodes, rsc->id); if (guest_node) { (*helper)(guest_node, user_data); } } } } /*! * \internal * \brief Create CIB XML for an implicit remote connection * * \param[in] parent If not NULL, use as parent XML element * \param[in] uname Name of Pacemaker Remote node * \param[in] container If not NULL, use this as connection container * \param[in] migrateable If not NULL, use as allow-migrate value * \param[in] is_managed If not NULL, use as is-managed value * \param[in] start_timeout If not NULL, use as remote connect timeout * \param[in] server If not NULL, use as remote server value * \param[in] port If not NULL, use as remote port value */ xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port) { xmlNode *remote; xmlNode *xml_sub; remote = create_xml_node(parent, XML_CIB_TAG_RESOURCE); // Add identity crm_xml_add(remote, XML_ATTR_ID, uname); crm_xml_add(remote, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF); crm_xml_add(remote, XML_AGENT_ATTR_PROVIDER, "pacemaker"); crm_xml_add(remote, XML_ATTR_TYPE, "remote"); // Add meta-attributes xml_sub = create_xml_node(remote, XML_TAG_META_SETS); crm_xml_set_id(xml_sub, "%s-%s", uname, XML_TAG_META_SETS); crm_create_nvpair_xml(xml_sub, NULL, XML_RSC_ATTR_INTERNAL_RSC, XML_BOOLEAN_TRUE); if (container_id) { crm_create_nvpair_xml(xml_sub, NULL, XML_RSC_ATTR_CONTAINER, container_id); } if (migrateable) { crm_create_nvpair_xml(xml_sub, NULL, XML_OP_ATTR_ALLOW_MIGRATE, migrateable); } if (is_managed) { crm_create_nvpair_xml(xml_sub, NULL, XML_RSC_ATTR_MANAGED, is_managed); } // Add instance attributes if (port || server) { xml_sub = create_xml_node(remote, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_sub, "%s-%s", uname, XML_TAG_ATTR_SETS); if (server) { crm_create_nvpair_xml(xml_sub, NULL, "addr", server); } if (port) { crm_create_nvpair_xml(xml_sub, NULL, "port", port); } } // Add operations xml_sub = create_xml_node(remote, "operations"); crm_create_op_xml(xml_sub, uname, "monitor", "30s", "30s"); if (start_timeout) { crm_create_op_xml(xml_sub, uname, "start", "0", start_timeout); } return remote; } + +// History entry to be checked for fail count clearing +struct check_op { + xmlNode *rsc_op; // History entry XML + pe_resource_t *rsc; // Known resource corresponding to history entry + pe_node_t *node; // Known node corresponding to history entry + enum pe_check_parameters check_type; // What needs checking +}; + +void +pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, + enum pe_check_parameters flag, pe_working_set_t *data_set) +{ + struct check_op *check_op = NULL; + + CRM_CHECK(data_set && rsc_op && rsc && node, return); + + check_op = calloc(1, sizeof(struct check_op)); + CRM_ASSERT(check_op != NULL); + + crm_trace("Deferring checks of %s until after allocation", ID(rsc_op)); + check_op->rsc_op = rsc_op; + check_op->rsc = rsc; + check_op->node = node; + check_op->check_type = flag; + data_set->param_check = g_list_prepend(data_set->param_check, check_op); +} + +/*! + * \internal + * \brief Call a function for each action to be checked for addr substitution + * + * \param[in] data_set Working set for cluster + * \param[in] cb Function to be called + */ +void +pe__foreach_param_check(pe_working_set_t *data_set, + void (*cb)(pe_resource_t*, pe_node_t*, xmlNode*, + enum pe_check_parameters, pe_working_set_t*)) +{ + CRM_CHECK(data_set && cb, return); + + for (GList *item = data_set->param_check; item != NULL; item = item->next) { + struct check_op *check_op = item->data; + + cb(check_op->rsc, check_op->node, check_op->rsc_op, + check_op->check_type, data_set); + } +} + +void +pe__free_param_checks(pe_working_set_t *data_set) +{ + if (data_set && data_set->param_check) { + g_list_free_full(data_set->param_check, free); + data_set->param_check = NULL; + } +} diff --git a/lib/pengine/status.c b/lib/pengine/status.c index c176b36efd..dcbdc16c4c 100644 --- a/lib/pengine/status.c +++ b/lib/pengine/status.c @@ -1,408 +1,410 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include /*! * \brief Create a new working set * * \return New, initialized working set on success, else NULL (and set errno) * \note Only pe_working_set_t objects created with this function (as opposed * to statically declared or directly allocated) should be used with the * functions in this library, to allow for future extensions to the * data type. The caller is responsible for freeing the memory with * pe_free_working_set() when the instance is no longer needed. */ pe_working_set_t * pe_new_working_set() { pe_working_set_t *data_set = calloc(1, sizeof(pe_working_set_t)); if (data_set != NULL) { set_working_set_defaults(data_set); } return data_set; } /*! * \brief Free a working set * * \param[in] data_set Working set to free */ void pe_free_working_set(pe_working_set_t *data_set) { if (data_set != NULL) { pe_reset_working_set(data_set); free(data_set); } } /* * Unpack everything * At the end you'll have: * - A list of nodes * - A list of resources (each with any dependencies on other resources) * - A list of constraints between resources and nodes * - A list of constraints between start/stop actions * - A list of nodes that need to be stonith'd * - A list of nodes that need to be shutdown * - A list of the possible stop/start actions (without dependencies) */ gboolean cluster_status(pe_working_set_t * data_set) { xmlNode *config = get_xpath_object("//"XML_CIB_TAG_CRMCONFIG, data_set->input, LOG_TRACE); xmlNode *cib_nodes = get_xpath_object("//"XML_CIB_TAG_NODES, data_set->input, LOG_TRACE); xmlNode *cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE); xmlNode *cib_status = get_xpath_object("//"XML_CIB_TAG_STATUS, data_set->input, LOG_TRACE); xmlNode *cib_tags = get_xpath_object("//"XML_CIB_TAG_TAGS, data_set->input, LOG_TRACE); const char *value = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); crm_trace("Beginning unpack"); /* reset remaining global variables */ data_set->failed = create_xml_node(NULL, "failed-ops"); if (data_set->input == NULL) { return FALSE; } if (data_set->now == NULL) { data_set->now = crm_time_new(NULL); } if (data_set->dc_uuid == NULL) { data_set->dc_uuid = crm_element_value_copy(data_set->input, XML_ATTR_DC_UUID); } clear_bit(data_set->flags, pe_flag_have_quorum); if (crm_is_true(value)) { set_bit(data_set->flags, pe_flag_have_quorum); } data_set->op_defaults = get_xpath_object("//"XML_CIB_TAG_OPCONFIG, data_set->input, LOG_TRACE); data_set->rsc_defaults = get_xpath_object("//"XML_CIB_TAG_RSCCONFIG, data_set->input, LOG_TRACE); unpack_config(config, data_set); if (is_not_set(data_set->flags, pe_flag_quick_location) && is_not_set(data_set->flags, pe_flag_have_quorum) && data_set->no_quorum_policy != no_quorum_ignore) { crm_warn("Fencing and resource management disabled due to lack of quorum"); } unpack_nodes(cib_nodes, data_set); if(is_not_set(data_set->flags, pe_flag_quick_location)) { unpack_remote_nodes(cib_resources, data_set); } unpack_resources(cib_resources, data_set); unpack_tags(cib_tags, data_set); if(is_not_set(data_set->flags, pe_flag_quick_location)) { unpack_status(cib_status, data_set); } set_bit(data_set->flags, pe_flag_have_status); return TRUE; } static void pe_free_resources(GListPtr resources) { resource_t *rsc = NULL; GListPtr iterator = resources; while (iterator != NULL) { rsc = (resource_t *) iterator->data; iterator = iterator->next; rsc->fns->free(rsc); } if (resources != NULL) { g_list_free(resources); } } static void pe_free_actions(GListPtr actions) { GListPtr iterator = actions; while (iterator != NULL) { pe_free_action(iterator->data); iterator = iterator->next; } if (actions != NULL) { g_list_free(actions); } } static void pe_free_nodes(GListPtr nodes) { GListPtr iterator = nodes; while (iterator != NULL) { node_t *node = (node_t *) iterator->data; struct pe_node_shared_s *details = node->details; iterator = iterator->next; crm_trace("deleting node"); print_node("delete", node, FALSE); if (details != NULL) { crm_trace("%s is being deleted", details->uname); if (details->attrs != NULL) { g_hash_table_destroy(details->attrs); } if (details->utilization != NULL) { g_hash_table_destroy(details->utilization); } if (details->digest_cache != NULL) { g_hash_table_destroy(details->digest_cache); } g_list_free(details->running_rsc); g_list_free(details->allocated_rsc); free(details); } free(node); } if (nodes != NULL) { g_list_free(nodes); } } static void pe__free_ordering(GListPtr constraints) { GListPtr iterator = constraints; while (iterator != NULL) { pe__ordering_t *order = iterator->data; iterator = iterator->next; free(order->lh_action_task); free(order->rh_action_task); free(order); } if (constraints != NULL) { g_list_free(constraints); } } static void pe__free_location(GListPtr constraints) { GListPtr iterator = constraints; while (iterator != NULL) { pe__location_t *cons = iterator->data; iterator = iterator->next; g_list_free_full(cons->node_list_rh, free); free(cons->id); free(cons); } if (constraints != NULL) { g_list_free(constraints); } } /*! * \brief Reset working set to default state without freeing it or constraints * * \param[in,out] data_set Working set to reset * * \deprecated This function is deprecated as part of the API; * pe_reset_working_set() should be used instead. */ void cleanup_calculations(pe_working_set_t * data_set) { if (data_set == NULL) { return; } clear_bit(data_set->flags, pe_flag_have_status); if (data_set->config_hash != NULL) { g_hash_table_destroy(data_set->config_hash); } if (data_set->singletons != NULL) { g_hash_table_destroy(data_set->singletons); } if (data_set->tickets) { g_hash_table_destroy(data_set->tickets); } if (data_set->template_rsc_sets) { g_hash_table_destroy(data_set->template_rsc_sets); } if (data_set->tags) { g_hash_table_destroy(data_set->tags); } free(data_set->dc_uuid); crm_trace("deleting resources"); pe_free_resources(data_set->resources); crm_trace("deleting actions"); pe_free_actions(data_set->actions); crm_trace("deleting nodes"); pe_free_nodes(data_set->nodes); + pe__free_param_checks(data_set); + g_list_free(data_set->stop_needed); free_xml(data_set->graph); crm_time_free(data_set->now); free_xml(data_set->input); free_xml(data_set->failed); set_working_set_defaults(data_set); CRM_CHECK(data_set->ordering_constraints == NULL,; ); CRM_CHECK(data_set->placement_constraints == NULL,; ); } /*! * \brief Reset a working set to default state without freeing it * * \param[in,out] data_set Working set to reset */ void pe_reset_working_set(pe_working_set_t *data_set) { if (data_set == NULL) { return; } crm_trace("Deleting %d ordering constraints", g_list_length(data_set->ordering_constraints)); pe__free_ordering(data_set->ordering_constraints); data_set->ordering_constraints = NULL; crm_trace("Deleting %d location constraints", g_list_length(data_set->placement_constraints)); pe__free_location(data_set->placement_constraints); data_set->placement_constraints = NULL; crm_trace("Deleting %d colocation constraints", g_list_length(data_set->colocation_constraints)); g_list_free_full(data_set->colocation_constraints, free); data_set->colocation_constraints = NULL; crm_trace("Deleting %d ticket constraints", g_list_length(data_set->ticket_constraints)); g_list_free_full(data_set->ticket_constraints, free); data_set->ticket_constraints = NULL; cleanup_calculations(data_set); } void set_working_set_defaults(pe_working_set_t * data_set) { memset(data_set, 0, sizeof(pe_working_set_t)); data_set->order_id = 1; data_set->action_id = 1; data_set->no_quorum_policy = no_quorum_freeze; data_set->flags = 0x0ULL; set_bit(data_set->flags, pe_flag_stop_rsc_orphans); set_bit(data_set->flags, pe_flag_symmetric_cluster); set_bit(data_set->flags, pe_flag_stop_action_orphans); } resource_t * pe_find_resource(GListPtr rsc_list, const char *id) { return pe_find_resource_with_flags(rsc_list, id, pe_find_renamed); } resource_t * pe_find_resource_with_flags(GListPtr rsc_list, const char *id, enum pe_find flags) { GListPtr rIter = NULL; for (rIter = rsc_list; id && rIter; rIter = rIter->next) { resource_t *parent = rIter->data; resource_t *match = parent->fns->find_rsc(parent, id, NULL, flags); if (match != NULL) { return match; } } crm_trace("No match for %s", id); return NULL; } node_t * pe_find_node_any(GListPtr nodes, const char *id, const char *uname) { node_t *match = pe_find_node_id(nodes, id); if (match) { return match; } crm_trace("Looking up %s via its uname instead", uname); return pe_find_node(nodes, uname); } node_t * pe_find_node_id(GListPtr nodes, const char *id) { GListPtr gIter = nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node && safe_str_eq(node->details->id, id)) { return node; } } /* error */ return NULL; } node_t * pe_find_node(GListPtr nodes, const char *uname) { GListPtr gIter = nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node && safe_str_eq(node->details->uname, uname)) { return node; } } /* error */ return NULL; } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 9e3f796aa3..ac27121ef2 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,3441 +1,3474 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit(data_set->flags, flag); \ } else { \ clear_bit(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response *failed, pe_working_set_t * data_set); static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node); // Bitmask for warnings we only want to print once uint32_t pe_wo = 0; static gboolean is_dangling_container_remote_node(node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (is_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) { return TRUE; } return FALSE; } /*! * \brief Schedule a fence action for a node * * \param[in,out] data_set Current working set of cluster * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed */ void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (is_container_remote_node(node)) { resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { if (!is_set(rsc->flags, pe_rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", node->details->uname, reason, rsc->id); } else { crm_warn("Guest node %s will be fenced " "(by recovering its guest resource %s): %s", node->details->uname, rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; set_bit(rsc->flags, pe_rsc_failed); } } } else if (is_dangling_container_remote_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", node->details->uname, reason); set_bit(node->details->remote_rsc->flags, pe_rsc_failed); } else if (is_baremetal_remote_node(node)) { resource_t *rsc = node->details->remote_rsc; if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", node->details->uname, reason); } else if(node->details->remote_requires_reset == FALSE) { node->details->remote_requires_reset = TRUE; crm_warn("Remote node %s %s: %s", node->details->uname, pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, data_set); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", node->details->uname, pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean", reason); } else { crm_warn("Cluster node %s %s: %s", node->details->uname, pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, data_set); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \ "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \ "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \ "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \ "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(unsigned long long flag, const char *xpath, pe_working_set_t *data_set) { xmlXPathObjectPtr result = NULL; if (is_not_set(data_set->flags, flag)) { result = xpath_search(data_set->input, xpath); if (result && (numXpathResults(result) > 0)) { set_bit(data_set->flags, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = crm_str_table_new(); data_set->config_hash = config_hash; unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if(is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_notice("Watchdog will be used via SBD if fencing is required"); set_bit(data_set->flags, pe_flag_have_stonith_resource); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); if (!strcmp(data_set->stonith_action, "poweroff")) { pe_warn_once(pe_wo_poweroff, "Support for stonith-action of 'poweroff' is deprecated " "and will be removed in a future release (use 'off' instead)"); data_set->stonith_action = "off"; } crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); crm_debug("Concurrent fencing is %s", is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled"); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if (safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if (safe_str_eq(value, "suicide")) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { int do_panic = 0; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) { data_set->no_quorum_policy = no_quorum_suicide; } else { crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing); } if (is_set(data_set->flags, pe_flag_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes"); } node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); return TRUE; } static void destroy_digest_cache(gpointer ptr) { op_digest_cache_t *data = ptr; free_xml(data->params_all); free_xml(data->params_secure); free_xml(data->params_restart); free(data->digest_all_calc); free(data->digest_restart_calc); free(data->digest_secure_calc); free(data); } node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } new_node = calloc(1, sizeof(node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct pe_node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->type = node_ping; if (safe_str_eq(type, "remote")) { new_node->details->type = node_remote; set_bit(data_set->flags, pe_flag_have_remote_nodes); } else if ((type == NULL) || safe_str_eq(type, "member")) { new_node->details->type = node_member; } new_node->details->attrs = crm_str_table_new(); if (is_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("cluster")); } new_node->details->utilization = crm_str_table_new(); new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_digest_cache); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname); return new_node; } bool remote_id_conflict(const char *remote_name, pe_working_set_t *data) { bool match = FALSE; #if 1 pe_find_resource(data->resources, remote_name); #else if (data->name_check == NULL) { data->name_check = g_hash_table_new(crm_str_hash, g_str_equal); for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) { const char *id = ID(xml_rsc); /* avoiding heap allocation here because we know the duration of this hashtable allows us to */ g_hash_table_insert(data->name_check, (char *) id, (char *) id); } } if (g_hash_table_lookup(data->name_check, remote_name)) { match = TRUE; } #endif if (match) { crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name); return NULL; } return match; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *container_managed = NULL; for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) { if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) { continue; } for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) { remote_name = value; } else if (safe_str_eq(name, "remote-addr")) { remote_server = value; } else if (safe_str_eq(name, "remote-port")) { remote_port = value; } else if (safe_str_eq(name, "remote-connect-timeout")) { connect_timeout = value; } else if (safe_str_eq(name, "remote-allow-migrate")) { remote_allow_migrate=value; } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) { container_managed = value; } } } if (remote_name == NULL) { return NULL; } if (remote_id_conflict(remote_name, data)) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, container_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node) { if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (is_set(data_set->flags, pe_flag_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { crm_config_err("Must specify id tag in "); continue; } new_node = pe_create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL, new_node->details->utilization, NULL, FALSE, data_set->now); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); pe_create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void setup_container(resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; setup_container(child_rsc, data_set); } return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && safe_str_neq(container_id, rsc->id)) { resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; set_bit(container->flags, pe_rsc_is_container); container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; /* generate remote nodes from resource config before unpacking resources */ for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { const char *new_node_id = NULL; /* first check if this is a bare metal remote node. Bare metal remote nodes * are defined as a resource primitive only. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found baremetal remote node %s in container resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Now check for guest remote nodes. * guest remote nodes are defined within a resource primitive. * Example1: a vm resource might be configured as a remote node. * Example2: a vm resource might be configured within a group to be a remote node. * Note: right now we only support guest remote nodes in as a standalone primitive * or a primitive within a group. No cloned primitives can be a guest remote node * right now */ if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) { /* expands a metadata defined remote resource into the xml config * as an actual rsc primitive to be unpacked later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest remote node %s in container resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } else if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) { xmlNode *xml_obj2 = NULL; /* search through a group to see if any of the primitive contain a remote node. */ for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest remote node %s in container resource %s which is in group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the PE calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc) { node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } print_resource(LOG_TRACE, "Linking remote-node connection resource, ", new_rsc, FALSE); remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return;); remote_node->details->remote_rsc = new_rsc; /* If this is a baremetal remote-node (no container resource * associated with it) then we need to handle startup fencing the same way * as cluster nodes. */ if (new_rsc->container == NULL) { handle_startup_fencing(data_set, remote_node); } else { /* At this point we know if the remote node is a container or baremetal * remote node, update the #kind attribute if a container is involved */ g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("container")); } } static void destroy_tag(gpointer data) { tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] data_set Where to put resource information * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when common_unpack() calls resource_location() */ gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GListPtr gIter = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_tag); for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { resource_t *new_rsc = NULL; if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append(data_set->resources, new_rsc); print_resource(LOG_TRACE, "Added ", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_set(data_set->flags, pe_flag_quick_location)) { /* Ignore */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) { xmlNode *xml_tag = NULL; data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_tag); for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) { continue; } if (tag_id == NULL) { crm_config_err("Failed unpacking %s: %s should be specified", crm_element_name(xml_tag), XML_ATTR_ID); continue; } for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) { continue; } if (obj_ref == NULL) { crm_config_err("Failed unpacking %s for tag %s: %s should be specified", crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID); continue; } if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (ticket_id == NULL || strlen(ticket_id) == 0) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } static void unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set) { const char *resource_discovery_enabled = NULL; xmlNode *attrs = NULL; resource_t *rsc = NULL; const char *shutdown = NULL; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { return; } if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { return; } crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname); this_node->details->remote_maintenance = crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0"); rsc = this_node->details->remote_rsc; if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { crm_info("Node %s is shutting down", this_node->details->uname); this_node->details->shutdown = TRUE; if (rsc) { rsc->next_role = RSC_ROLE_STOPPED; } } if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) || (rsc && !is_set(rsc->flags, pe_rsc_managed))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } else { /* if we're here, this is either a baremetal node and fencing is enabled, * or this is a container node which we don't care if fencing is enabled * or not on. container nodes are 'fenced' by recovering the container resource * regardless of whether fencing is enabled. */ crm_info("Node %s has resource discovery disabled", this_node->details->uname); this_node->details->rsc_discovery_enabled = FALSE; } } } static bool unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) { bool changed = false; xmlNode *lrm_rsc = NULL; for (xmlNode *state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { const char *id = NULL; const char *uname = NULL; node_t *this_node = NULL; bool process = FALSE; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (this_node->details->unpacked) { crm_info("Node %s is already processed", id); continue; } else if (is_remote_node(this_node) == FALSE && is_set(data_set->flags, pe_flag_stonith_enabled)) { // A redundant test, but preserves the order for regression tests process = TRUE; } else if (is_remote_node(this_node)) { bool check = FALSE; resource_t *rsc = this_node->details->remote_rsc; if(fence) { check = TRUE; } else if(rsc == NULL) { /* Not ready yet */ } else if (is_container_remote_node(this_node) && rsc->role == RSC_ROLE_STARTED && rsc->container->role == RSC_ROLE_STARTED) { /* Both the connection and the underlying container * need to be known 'up' before we volunterily process * resources inside it */ check = TRUE; crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED); } else if (is_container_remote_node(this_node) == FALSE && rsc->role == RSC_ROLE_STARTED) { check = TRUE; crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED); } if (check) { determine_remote_online_status(data_set, this_node); unpack_handle_remote_attrs(this_node, state, data_set); process = TRUE; } } else if (this_node->details->online) { process = TRUE; } else if (fence) { process = TRUE; } if(process) { crm_trace("Processing lrm resource entries on %shealthy%s node: %s", fence?"un":"", is_remote_node(this_node)?" remote":"", this_node->details->uname); changed = TRUE; this_node->details->unpacked = TRUE; lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } return changed; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *state = NULL; node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_ticket); } for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) { unpack_tickets_state((xmlNode *) state, data_set); } else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) { xmlNode *attrs = NULL; const char *resource_discovery_enabled = NULL; id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } else if (is_remote_node(this_node)) { /* online state for remote nodes is determined by the * rsc state after all the unpacking is done. we do however * need to mark whether or not the node has been fenced as this plays * a role during unpacking cluster node resource state */ this_node->details->remote_was_fenced = crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0"); continue; } crm_trace("Processing node id=%s, uname=%s", id, uname); /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } crm_trace("determining node state"); determine_online_status(state, this_node, data_set); if (is_not_set(data_set->flags, pe_flag_have_quorum) && this_node->details->online && (data_set->no_quorum_policy == no_quorum_suicide)) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ pe_fence_node(data_set, this_node, "cluster does not have quorum"); } } } while(unpack_node_loop(status, FALSE, data_set)) { crm_trace("Start another loop"); } // Now catch any nodes we didn't see unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set); + /* Now that we know where resources are, we can schedule stops of containers + * with failed bundle connections + */ + if (data_set->stop_needed != NULL) { + for (GList *item = data_set->stop_needed; item; item = item->next) { + pe_resource_t *container = item->data; + pe_node_t *node = pe__current_node(container); + + if (node) { + stop_action(container, node, FALSE); + } + } + g_list_free(data_set->stop_needed); + } + for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *this_node = gIter->data; if (this_node == NULL) { continue; } else if(is_remote_node(this_node) == FALSE) { continue; } else if(this_node->details->unpacked) { continue; } determine_remote_online_status(data_set, this_node); } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster)); } else if (safe_str_eq(is_peer, ONLINESTATUS)) { if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster)); crm_trace("\tis_peer=%s, join=%s, expected=%s", crm_str(is_peer), crm_str(join), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "peer is unexpectedly down"); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; bool crmd_online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = pe_node_attribute_raw(this_node, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate); online = crm_is_true(in_cluster); crmd_online = safe_str_eq(is_peer, ONLINESTATUS); if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", this_node->details->uname); /* Slightly different criteria since we can't shut down a dead peer */ online = crmd_online; } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "peer has not been seen by the cluster"); } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) { pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria"); } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) { if (crm_is_true(in_cluster) || crmd_online) { crm_info("- Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", this_node->details->uname); } } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN) && crm_is_true(in_cluster) == FALSE && !crmd_online) { crm_info("Node %s was just shot", this_node->details->uname); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { pe_fence_node(data_set, this_node, "peer is no longer part of the cluster"); } else if (!crmd_online) { pe_fence_node(data_set, this_node, "peer process is no longer available"); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "termination was requested"); } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is active", this_node->details->uname); } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING) || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "peer was in an unknown state"); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); } return online; } static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) { resource_t *rsc = this_node->details->remote_rsc; resource_t *container = NULL; pe_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will * be NULL. Consider it an offline remote node in that case. */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; if (container && (g_list_length(rsc->running_on) == 1)) { host = rsc->running_on->data; } /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && is_set(container->flags, pe_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if(is_set(rsc->flags, pe_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if (rsc->role == RSC_ROLE_STOPPED || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); return this_node->details->online; } gboolean determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("Node %s is not a pacemaker node", this_node->details->uname); } else if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } return online; } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!crm_strlen_zero(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = calloc(base_name_len + 3, sizeof(char)); CRM_ASSERT(zero); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pe_find_node(data_set->nodes, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set); } link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); set_bit(rsc->flags, pe_rsc_orphan_container_filler); } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history */ static pe_resource_t * create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, pe_node_t *node, pe_working_set_t *data_set) { pe_resource_t *top = pe__create_clone_child(parent, data_set); // find_rsc() because we might be a cloned group pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of clone-max instances); * (3) a newly created orphan (i.e. clone-max instances are already active). * * \param[in] data_set Cluster information * \param[in] node Node on which to check for instance * \param[in] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (without instance) */ static resource_t * find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; pe_resource_t *rsc = NULL; pe_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); // Check for active (or partially active, for cloned groups) instance pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr locations = NULL; resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if globally-unique * was flipped from true to false); and * (3) when we re-run calculations on the same data set as part of a * simulation. */ child->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (((pe_node_t *)locations->data)->details == node->details) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if globally-unique is switched from true to * false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->running_on) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; } else { pe_rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pe_rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && is_not_set(child->flags, pe_rsc_block)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); /* ... but don't use it if it was already associated with a * pending action on another node */ if (inactive_instance && inactive_instance->pending_node && (inactive_instance->pending_node->details != node->details)) { inactive_instance = NULL; } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has "requires" set to "quorum" or "nothing", and we don't * have a clone instance for every node, we don't want to consume a valid * instance number for unclean nodes. Such instances may appear to be active * according to the history, but should be considered inactive, so we can * start an instance elsewhere. Treat such instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !is_container_remote_node(node) && !pe__is_universal_clone(parent, data_set)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, data_set); pe_rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static resource_t * unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id, xmlNode * rsc_entry) { resource_t *rsc = NULL; resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when clone-max=0, we create * a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->variant > pe_native) { crm_trace("Resource history for %s is orphaned because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pe_rsc_is_anon_clone(parent)) { if (pe_rsc_is_bundled(parent)) { rsc = find_container_child(parent->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && safe_str_neq(rsc_id, rsc->id) && safe_str_neq(rsc_id, rsc->clone_name)) { free(rsc->clone_name); rsc->clone_name = strdup(rsc_id); pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : "")); } return rsc; } static resource_t * process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { print_resource(LOG_TRACE, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t * rsc, node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { node_t *tmpnode = NULL; char *reason = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { node_t *n = node_copy(node); pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name, n->details->uname); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && node->details->maintenance == FALSE && is_set(rsc->flags, pe_rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (is_container_remote_node(node)) { set_bit(rsc->flags, pe_rsc_failed); should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { if (is_baremetal_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { /* setting unseen = true means that fencing of the remote node will * only occur if the connection resource is not going to start somewhere. * This allows connection resources on a failed cluster-node to move to * another node without requiring the baremetal remote nodes to be fenced * as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(data_set, node, reason); } free(reason); } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(data_set, node, reason); free(reason); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: set_bit(rsc->flags, pe_rsc_failed); - if (rsc->container) { + if (rsc->container && pe_rsc_is_bundled(rsc)) { + /* A bundle's remote connection can run on a different node than + * the bundle's container. We don't necessarily know where the + * container is running yet, so remember it and add a stop + * action for it later. + */ + data_set->stop_needed = g_list_prepend(data_set->stop_needed, + rsc->container); + } else if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; case action_fail_reset_remote: set_bit(rsc->flags, pe_rsc_failed); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); } if (tmpnode && is_baremetal_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* connection resource to baremetal resource failed in a way that * should result in fencing the remote-node. */ pe_fence_node(data_set, tmpnode, "remote connection is unrecoverable"); } } /* require the stop action regardless if fencing is occurring or not. */ if (rsc->role > RSC_ROLE_STOPPED) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_ms) { rsc->next_role = RSC_ROLE_STOPPED; } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { crm_config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { crm_config_warn("Cluster configured not to stop active orphans." " %s must be stopped manually on %s", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); if (on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; stop->flags |= pe_action_optional; } g_list_free(possible_matches); free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t * node, resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_ms_s = NULL; counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS); interval_ms = crm_parse_ms(interval_ms_s); if (interval_ms == 0) { pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(status, "-1")) { pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = counter; } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { implied_monitor_start = counter; } } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } static resource_t * unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { resource_t *rsc; resource_t *container; const char *rsc_id; const char *container_id; if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } gboolean unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; gboolean found_orphaned_container_filler = FALSE; CRM_CHECK(node != NULL, return FALSE); crm_trace("Unpacking resources on %s", node->details->uname); for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set); if (!rsc) { continue; } if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = TRUE; } } } /* now that all the resource state has been unpacked for this node * we have to go back and map any orphaned container fillers to their * container resource */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(lrm_rsc_list, data_set); } return TRUE; } static void set_active(resource_t * rsc) { resource_t *top = uber_parent(rsc); if (top && is_set(top->flags, pe_rsc_promotable)) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, pe_working_set_t * data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } CRM_LOG_ASSERT(offset > 0); return get_xpath_object(xpath, data_set->input, LOG_DEBUG); } static bool stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set); if (stop_op) { int stop_id = 0; int task_id = 0; crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); if (stop_id > task_id) { return TRUE; } } return FALSE; } static void unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { /* A successful migration sequence is: * migrate_to on source node * migrate_from on target node * stop on source node * * If a migrate_to is followed by a stop, the entire migration (successful * or failed) is complete, and we don't care what happened on the target. * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from failed, make sure the resource gets * stopped on both source and target (if up). * * If the migrate_to and migrate_from both succeeded (which also implies the * resource is no longer running on the source), but there is no stop, the * migration is considered to be "dangling". */ int from_rc = 0; int from_status = 0; const char *migrate_source = NULL; const char *migrate_target = NULL; pe_node_t *target = NULL; pe_node_t *source = NULL; xmlNode *migrate_from = NULL; if (stop_happened_after(rsc, node, xml_op, data_set)) { return; } // Clones are not allowed to migrate, so role can't be master rsc->role = RSC_ROLE_STARTED; migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); target = pe_find_node(data_set->nodes, migrate_target); source = pe_find_node(data_set->nodes, migrate_source); // Check whether there was a migrate_from action migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ID(migrate_from), migrate_target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_OCF_OK && from_status == PCMK_LRM_OP_DONE) { /* The migrate_to and migrate_from both succeeded, so mark the migration * as "dangling". This will be used to schedule a stop action on the * source without affecting the target. */ pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), migrate_source); rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); } } else { // Pending, or complete but erased if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); if (source && source->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ rsc->partial_migration_target = target; rsc->partial_migration_source = source; } } else { /* Consider it failed here - forces a restart, prevents migration */ set_bit(rsc->flags, pe_rsc_failed); clear_bit(rsc->flags, pe_rsc_allow_migrate); } } } static void unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); CRM_ASSERT(rsc); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *source = pe_find_node(data_set->nodes, migrate_source); if (source && source->details->online) { native_add_running(rsc, source, data_set); } } } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *target = pe_find_node(data_set->nodes, migrate_target); pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op, migrate_id); if (target && target->details->online) { native_add_running(rsc, target, data_set); } } else if (migrate_op == NULL) { /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } } static void record_failed_op(xmlNode *op, node_t* node, resource_t *rsc, pe_working_set_t * data_set) { xmlNode *xIter = NULL; const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY); if (node->details->online == FALSE) { return; } for (xIter = data_set->failed->children; xIter; xIter = xIter->next) { const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY); const char *uname = crm_element_value(xIter, XML_ATTR_UNAME); if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) { crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname); return; } } crm_trace("Adding entry %s on %s", op_key, node->details->uname); crm_xml_add(op, XML_ATTR_UNAME, node->details->uname); crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id); add_node_copy(data_set->failed, op); } static const char *get_op_key(xmlNode *xml_op) { const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if(key == NULL) { key = ID(xml_op); } return key; } static void unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { guint interval_ms = 0; bool is_probe = FALSE; action_t *action = NULL; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); CRM_ASSERT(rsc); *last_failure = xml_op; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; pe_rsc_trace(rsc, "is a probe: %s", key); } if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d", (is_probe? "probe" : task), rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); if (is_probe && (rc != PCMK_OCF_OK) && (rc != PCMK_OCF_NOT_RUNNING) && (rc != PCMK_OCF_RUNNING_MASTER)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the resource-discovery option for location constraints", rsc->id, node->details->uname); } record_failed_op(xml_op, node, rsc, data_set); } else { crm_trace("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); } action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) || (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) || (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) || (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) { pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), fail2text(action->on_fail), action->uuid, key); *on_fail = action->on_fail; } if (safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { unpack_rsc_migration_failure(rsc, node, xml_op, data_set); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; rsc->next_role = RSC_ROLE_STOPPED; } else if(rc == PCMK_OCF_NOT_RUNNING) { rsc->role = RSC_ROLE_STOPPED; } else { /* * Staying in master role would put the PE/TE into a loop. Setting * slave role is not dangerous because the resource will be stopped * as part of recovery, and any master promotion will be ordered * after that stop. */ rsc->role = RSC_ROLE_SLAVE; } } if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { /* leave stopped */ pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); rsc->role = RSC_ROLE_STOPPED; } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "Setting %s active", rsc->id); set_active(rsc); } pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; resource_t *fail_rsc = rsc; if (fail_rsc->parent) { resource_t *parent = uber_parent(fail_rsc); if (pe_rsc_is_clone(parent) && is_not_set(parent->flags, pe_rsc_unique)) { /* For clone resources, if a child fails on an operation * with on-fail = stop, all the resources fail. Do this by preventing * the parent from coming up again. */ fail_rsc = parent; } } crm_warn("Making sure %s doesn't come up again", fail_rsc->id); /* make sure it doesn't come up again */ if (fail_rsc->allowed_nodes != NULL) { g_hash_table_destroy(fail_rsc->allowed_nodes); } fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); } static int determine_op_status( resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { guint interval_ms = 0; int result = PCMK_LRM_OP_DONE; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); bool is_probe = FALSE; CRM_ASSERT(rsc); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if (target_rc >= 0 && target_rc != rc) { result = PCMK_LRM_OP_ERROR; pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); } /* we could clean this up significantly except for old LRMs and CRMs that * didn't include target_rc and liked to remap status */ switch (rc) { case PCMK_OCF_OK: if (is_probe && target_rc == 7) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Operation %s found resource %s active on %s", task, rsc->id, node->details->uname); } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) { result = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if (safe_str_neq(task, CRMD_ACTION_STOP)) { result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_RUNNING_MASTER: if (is_probe) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s", task, rsc->id, node->details->uname); } else if (target_rc == rc) { /* nothing to do */ } else if (target_rc >= 0) { result = PCMK_LRM_OP_ERROR; } rsc->role = RSC_ROLE_MASTER; break; case PCMK_OCF_DEGRADED_MASTER: case PCMK_OCF_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; result = PCMK_LRM_OP_ERROR; break; case PCMK_OCF_NOT_CONFIGURED: result = PCMK_LRM_OP_ERROR_FATAL; break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: case PCMK_OCF_UNIMPLEMENT_FEATURE: if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && (interval_ms > 0)) { result = PCMK_LRM_OP_NOTSUPPORTED; break; } else if (pe_can_fence(data_set, node) == FALSE && safe_str_eq(task, CRMD_ACTION_STOP)) { /* If a stop fails and we can't fence, there's nothing else we can do */ pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)", rsc->id, task, services_ocf_exitcode_str(rc), rc); clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); } result = PCMK_LRM_OP_ERROR_HARD; break; default: if (result == PCMK_LRM_OP_DONE) { crm_info("Treating %s (rc=%d) on %s as an ERROR", key, rc, node->details->uname); result = PCMK_LRM_OP_ERROR; } } return result; } static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set) { bool expired = FALSE; time_t last_failure = 0; guint interval_ms = 0; int failure_timeout = rsc->failure_timeout; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *clear_reason = NULL; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); /* clearing recurring monitor operation failures automatically * needs to be carefully considered */ if ((interval_ms != 0) && safe_str_eq(task, "monitor")) { /* TODO, in the future we should consider not clearing recurring monitor * op failures unless the last action for a resource was a "stop" action. * otherwise it is possible that clearing the monitor failure will result * in the resource being in an undeterministic state. * * For now we handle this potential undeterministic condition for remote * node connection resources by not clearing a recurring monitor op failure * until after the node has been fenced. */ if (is_set(data_set->flags, pe_flag_stonith_enabled) && rsc->remote_reconnect_ms) { node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); if (remote_node && remote_node->details->remote_was_fenced == 0) { if (strstr(ID(xml_op), "last_failure")) { crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id); } /* disabling failure timeout for this operation because we believe * fencing of the remote node should occur first. */ failure_timeout = 0; } } } if (failure_timeout > 0) { int last_run = 0; if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) { time_t now = get_effective_time(data_set); if (now > (last_run + failure_timeout)) { expired = TRUE; } } } if (expired) { if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default, xml_op, data_set)) { // There is a fail count ignoring timeout if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective, xml_op, data_set) == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { expired = FALSE; } } else if (rsc->remote_reconnect_ms && strstr(ID(xml_op), "last_failure")) { // Always clear last failure when reconnect interval is set clear_reason = "reconnect interval is set"; } } else if (strstr(ID(xml_op), "last_failure") && ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) { - op_digest_cache_t *digest_data = NULL; - - digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); + if (container_fix_remote_addr(rsc)) { + /* We haven't allocated resources yet, so we can't reliably + * substitute addr parameters for the REMOTE_CONTAINER_HACK. + * When that's needed, defer the check until later. + */ + pe__add_param_check(xml_op, rsc, node, pe_check_last_failure, + data_set); - if (digest_data->rc == RSC_DIGEST_UNKNOWN) { - crm_trace("rsc op %s/%s on node %s does not have a op digest to compare against", rsc->id, - key, node->details->id); - } else if(container_fix_remote_addr(rsc) && digest_data->rc != RSC_DIGEST_MATCH) { - // We can't sanely check the changing 'addr' attribute. Yet - crm_trace("Ignoring rsc op %s/%s on node %s", rsc->id, key, node->details->id); + } else { + op_digest_cache_t *digest_data = NULL; - } else if (digest_data->rc != RSC_DIGEST_MATCH) { - clear_reason = "resource parameters have changed"; + digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); + switch (digest_data->rc) { + case RSC_DIGEST_UNKNOWN: + crm_trace("Resource %s history entry %s on %s has no digest to compare", + rsc->id, key, node->details->id); + break; + case RSC_DIGEST_MATCH: + break; + default: + clear_reason = "resource parameters have changed"; + break; + } } } if (clear_reason != NULL) { node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason, data_set); if (is_set(data_set->flags, pe_flag_stonith_enabled) && rsc->remote_reconnect_ms && remote_node && remote_node->details->unclean) { action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, data_set); crm_notice("Waiting for %s to complete before clearing %s failure for remote node %s", fence?fence->uuid:"nil", task, rsc->id); order_actions(fence, clear_op, pe_order_implies_then); } } if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) { switch(rc) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Don't expire probes that return these values */ expired = FALSE; break; } } return expired; } int get_target_rc(xmlNode *xml_op) { int dummy = 0; int target_rc = 0; char *dummy_string = NULL; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); free(dummy_string); return target_rc; } static enum action_fail_response get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) { int result = action_fail_recover; action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); result = action->on_fail; pe_free_action(action); return result; } static void update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc, xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { gboolean clear_past_failure = FALSE; CRM_ASSERT(rsc); CRM_ASSERT(xml_op); if (rc == PCMK_OCF_NOT_RUNNING) { clear_past_failure = TRUE; } else if (rc == PCMK_OCF_NOT_INSTALLED) { rsc->role = RSC_ROLE_STOPPED; } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { if (last_failure) { const char *op_key = get_op_key(xml_op); const char *last_failure_key = get_op_key(last_failure); if (safe_str_eq(op_key, last_failure_key)) { clear_past_failure = TRUE; } } if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } } else if (safe_str_eq(task, CRMD_ACTION_START)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* Demote from Master does not clear an error */ rsc->role = RSC_ROLE_SLAVE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { unpack_rsc_migration(rsc, node, xml_op, data_set); } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_block: case action_fail_ignore: case action_fail_recover: case action_fail_restart_container: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; break; case action_fail_reset_remote: if (rsc->remote_reconnect_ms == 0) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and * completely stopped. (With a reconnect interval, we wait * for the failure to be cleared entirely before attempting * to reconnect.) */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; } } } gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int task_id = 0; const char *key = NULL; const char *task = NULL; const char *task_key = NULL; int rc = 0; int status = PCMK_LRM_OP_UNKNOWN; int target_rc = get_target_rc(xml_op); guint interval_ms = 0; gboolean expired = FALSE; resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); task_key = get_op_key(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE); CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE); if (safe_str_eq(task, CRMD_ACTION_NOTIFY) || safe_str_eq(task, CRMD_ACTION_METADATA)) { /* safe to ignore these */ return TRUE; } if (is_not_set(rsc->flags, pe_rsc_unique)) { parent = uber_parent(rsc); } pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role)); if (node->details->unclean) { pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribute", node->details->uname, rsc->id); } if(status != PCMK_LRM_OP_NOT_INSTALLED) { expired = check_operation_expiry(rsc, node, rc, xml_op, data_set); } /* Degraded results are informational only, re-map them to their error-free equivalents */ if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) { rc = PCMK_OCF_OK; /* Add them to the failed list to highlight them for the user */ if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK); record_failed_op(xml_op, node, rsc, data_set); } } else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) { rc = PCMK_OCF_RUNNING_MASTER; /* Add them to the failed list to highlight them for the user */ if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED_MASTER, PCMK_OCF_RUNNING_MASTER); record_failed_op(xml_op, node, rsc, data_set); } } if (expired && target_rc != rc) { const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); if (interval_ms == 0) { crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); goto done; } else if(node->details->online && node->details->unclean == FALSE) { crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); goto done; } } if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) { status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set); } pe_rsc_trace(rsc, "Handling status: %d", status); switch (status) { case PCMK_LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Don't know what to do for cancelled ops yet"); break; case PCMK_LRM_OP_PENDING: if (safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) { /* If a pending migrate_to action is out on a unclean node, * we have to force the stop action on the target. */ const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); if (target) { stop_action(rsc, target, FALSE); } } if (rsc->pending_task == NULL) { if (safe_str_eq(task, CRMD_ACTION_STATUS) && (interval_ms == 0)) { /* Pending probes are not printed, even if pending * operations are requested. If someone ever requests that * behavior, uncomment this and the corresponding part of * native.c:native_pending_task(). */ /*rsc->pending_task = strdup("probe");*/ /*rsc->pending_node = node;*/ } else { rsc->pending_task = strdup(task); rsc->pending_node = node; } } break; case PCMK_LRM_OP_DONE: pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname); update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_INSTALLED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: " "Resource agent doesn't exist", task_key, status, rc, node->details->uname); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } resource_location(parent, node, -INFINITY, "hard-error", data_set); unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); break; case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_ERROR_HARD: case PCMK_LRM_OP_ERROR_FATAL: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container && safe_str_eq(task, CRMD_ACTION_STOP))) { crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded", task_key, rc, node->details->uname); update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); set_bit(rsc->flags, pe_rsc_failure_ignored); record_failed_op(xml_op, node, rsc, data_set); if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); if(status == PCMK_LRM_OP_ERROR_HARD) { do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)", parent->id, node->details->uname, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, node, -INFINITY, "hard-error", data_set); } else if(status == PCMK_LRM_OP_ERROR_FATAL) { crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)", parent->id, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); } } break; } done: pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role)); return TRUE; } gboolean add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set) { const char *cluster_name = NULL; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_UNAME), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID), strdup(node->details->id)); if (safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME), strdup(cluster_name)); } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) { const char *site_name = pe_node_attribute_raw(node, "site-name"); if (site_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(cluster_name)); } } return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (is_remote_node(this_node)) { determine_remote_online_status(data_set, this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next_element(lrm_rsc)) { if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; }