diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in
index 3dd5c9473a..268cc3dd3a 100644
--- a/cts/cts-scheduler.in
+++ b/cts/cts-scheduler.in
@@ -1,1595 +1,1601 @@
#!@PYTHON@
""" Regression tests for Pacemaker's scheduler
"""
__copyright__ = "Copyright 2004-2022 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
import io
import os
import re
import sys
import stat
import shlex
import shutil
import argparse
import subprocess
import platform
import tempfile
DESC = """Regression tests for Pacemaker's scheduler"""
# Each entry in TESTS is a group of tests, where each test consists of a
# test base name, test description, and additional test arguments.
# Test groups will be separated by newlines in output.
TESTS = [
[
[ "simple1", "Offline" ],
[ "simple2", "Start" ],
[ "simple3", "Start 2" ],
[ "simple4", "Start Failed" ],
[ "simple6", "Stop Start" ],
[ "simple7", "Shutdown" ],
#[ "simple8", "Stonith" ],
#[ "simple9", "Lower version" ],
#[ "simple10", "Higher version" ],
[ "simple11", "Priority (ne)" ],
[ "simple12", "Priority (eq)" ],
[ "simple8", "Stickiness" ],
],
[
[ "group1", "Group" ],
[ "group2", "Group + Native" ],
[ "group3", "Group + Group" ],
[ "group4", "Group + Native (nothing)" ],
[ "group5", "Group + Native (move)" ],
[ "group6", "Group + Group (move)" ],
[ "group7", "Group colocation" ],
[ "group13", "Group colocation (cant run)" ],
[ "group8", "Group anti-colocation" ],
[ "group9", "Group recovery" ],
[ "group10", "Group partial recovery" ],
[ "group11", "Group target_role" ],
[ "group14", "Group stop (graph terminated)" ],
[ "group15", "Negative group colocation" ],
[ "bug-1573", "Partial stop of a group with two children" ],
[ "bug-1718", "Mandatory group ordering - Stop group_FUN" ],
[ "bug-lf-2613", "Move group on failure" ],
[ "bug-lf-2619", "Move group on clone failure" ],
[ "group-fail", "Ensure stop order is preserved for partially active groups" ],
[ "group-unmanaged", "No need to restart r115 because r114 is unmanaged" ],
[ "group-unmanaged-stopped", "Make sure r115 is stopped when r114 fails" ],
[ "group-dependents", "Account for the location preferences of things colocated with a group" ],
[ "group-stop-ordering", "Ensure blocked group member stop does not force other member stops" ],
[ "colocate-unmanaged-group", "Respect mandatory colocations even if earlier group member is unmanaged" ],
],
[
[ "rsc_dep1", "Must not" ],
[ "rsc_dep3", "Must" ],
[ "rsc_dep5", "Must not 3" ],
[ "rsc_dep7", "Must 3" ],
[ "rsc_dep10", "Must (but cant)" ],
[ "rsc_dep2", "Must (running)" ],
[ "rsc_dep8", "Must (running : alt)" ],
[ "rsc_dep4", "Must (running + move)" ],
[ "asymmetric", "Asymmetric - require explicit location constraints" ],
],
[
[ "orphan-0", "Orphan ignore" ],
[ "orphan-1", "Orphan stop" ],
[ "orphan-2", "Orphan stop, remove failcount" ],
],
[
[ "params-0", "Params: No change" ],
[ "params-1", "Params: Changed" ],
[ "params-2", "Params: Resource definition" ],
[ "params-3", "Params: Restart instead of reload if start pending" ],
[ "params-4", "Params: Reload" ],
[ "params-5", "Params: Restart based on probe digest" ],
[ "novell-251689", "Resource definition change + target_role=stopped" ],
[ "bug-lf-2106", "Restart all anonymous clone instances after config change" ],
[ "params-6", "Params: Detect reload in previously migrated resource" ],
[ "nvpair-id-ref", "Support id-ref in nvpair with optional name" ],
[ "not-reschedule-unneeded-monitor",
"Do not reschedule unneeded monitors while resource definitions have changed" ],
[ "reload-becomes-restart", "Cancel reload if restart becomes required" ],
[ "restart-with-extra-op-params", "Restart if with extra operation parameters upon changes of any" ],
],
[
[ "target-0", "Target Role : baseline" ],
[ "target-1", "Target Role : promoted" ],
[ "target-2", "Target Role : invalid" ],
],
[
[ "base-score", "Set a node's default score for all nodes" ],
],
[
[ "date-1", "Dates", [ "-t", "2005-020" ] ],
[ "date-2", "Date Spec - Pass", [ "-t", "2005-020T12:30" ] ],
[ "date-3", "Date Spec - Fail", [ "-t", "2005-020T11:30" ] ],
[ "origin", "Timing of recurring operations", [ "-t", "2014-05-07 00:28:00" ] ],
[ "probe-0", "Probe (anon clone)" ],
[ "probe-1", "Pending Probe" ],
[ "probe-2", "Correctly re-probe cloned groups" ],
[ "probe-3", "Probe (pending node)" ],
[ "probe-4", "Probe (pending node + stopped resource)" ],
[ "probe-pending-node", "Probe (pending node + unmanaged resource)" ],
[ "failed-probe-primitive", "Maskable vs. unmaskable probe failures on primitive resources" ],
[ "failed-probe-clone", "Maskable vs. unmaskable probe failures on cloned resources" ],
[ "expired-failed-probe-primitive", "Maskable, expired probe failure on primitive resources" ],
[ "standby", "Standby" ],
[ "comments", "Comments" ],
],
[
[ "one-or-more-0", "Everything starts" ],
[ "one-or-more-1", "Nothing starts because of A" ],
[ "one-or-more-2", "D can start because of C" ],
[ "one-or-more-3", "D cannot start because of B and C" ],
[ "one-or-more-4", "D cannot start because of target-role" ],
[ "one-or-more-5", "Start A and F even though C and D are stopped" ],
[ "one-or-more-6", "Leave A running even though B is stopped" ],
[ "one-or-more-7", "Leave A running even though C is stopped" ],
[ "bug-5140-require-all-false", "Allow basegrp:0 to stop" ],
[ "clone-require-all-1", "clone B starts node 3 and 4" ],
[ "clone-require-all-2", "clone B remains stopped everywhere" ],
[ "clone-require-all-3", "clone B stops everywhere because A stops everywhere" ],
[ "clone-require-all-4", "clone B remains on node 3 and 4 with only one instance of A remaining" ],
[ "clone-require-all-5", "clone B starts on node 1 3 and 4" ],
[ "clone-require-all-6", "clone B remains active after shutting down instances of A" ],
[ "clone-require-all-7",
"clone A and B both start at the same time. all instances of A start before B" ],
[ "clone-require-all-no-interleave-1", "C starts everywhere after A and B" ],
[ "clone-require-all-no-interleave-2",
"C starts on nodes 1, 2, and 4 with only one active instance of B" ],
[ "clone-require-all-no-interleave-3",
"C remains active when instance of B is stopped on one node and started on another" ],
[ "one-or-more-unrunnable-instances", "Avoid dependencies on instances that won't ever be started" ],
],
[
[ "location-date-rules-1", "Use location constraints with ineffective date-based rules" ],
[ "location-date-rules-2", "Use location constraints with effective date-based rules" ],
[ "nvpair-date-rules-1", "Use nvpair blocks with a variety of date-based rules" ],
[ "value-source", "Use location constraints with node attribute expressions using value-source" ],
[ "rule-dbl-as-auto-number-match",
"Floating-point rule values default to number comparison: match" ],
[ "rule-dbl-as-auto-number-no-match",
"Floating-point rule values default to number comparison: no "
"match" ],
[ "rule-dbl-as-integer-match",
"Floating-point rule values set to integer comparison: match" ],
[ "rule-dbl-as-integer-no-match",
"Floating-point rule values set to integer comparison: no match" ],
[ "rule-dbl-as-number-match",
"Floating-point rule values set to number comparison: match" ],
[ "rule-dbl-as-number-no-match",
"Floating-point rule values set to number comparison: no match" ],
[ "rule-dbl-parse-fail-default-str-match",
"Floating-point rule values fail to parse, default to string "
"comparison: match" ],
[ "rule-dbl-parse-fail-default-str-no-match",
"Floating-point rule values fail to parse, default to string "
"comparison: no match" ],
[ "rule-int-as-auto-integer-match",
"Integer rule values default to integer comparison: match" ],
[ "rule-int-as-auto-integer-no-match",
"Integer rule values default to integer comparison: no match" ],
[ "rule-int-as-integer-match",
"Integer rule values set to integer comparison: match" ],
[ "rule-int-as-integer-no-match",
"Integer rule values set to integer comparison: no match" ],
[ "rule-int-as-number-match",
"Integer rule values set to number comparison: match" ],
[ "rule-int-as-number-no-match",
"Integer rule values set to number comparison: no match" ],
[ "rule-int-parse-fail-default-str-match",
"Integer rule values fail to parse, default to string "
"comparison: match" ],
[ "rule-int-parse-fail-default-str-no-match",
"Integer rule values fail to parse, default to string "
"comparison: no match" ],
],
[
[ "order1", "Order start 1" ],
[ "order2", "Order start 2" ],
[ "order3", "Order stop" ],
[ "order4", "Order (multiple)" ],
[ "order5", "Order (move)" ],
[ "order6", "Order (move w/ restart)" ],
[ "order7", "Order (mandatory)" ],
[ "order-optional", "Order (score=0)" ],
[ "order-required", "Order (score=INFINITY)" ],
[ "bug-lf-2171", "Prevent group start when clone is stopped" ],
[ "order-clone", "Clone ordering should be able to prevent startup of dependent clones" ],
[ "order-sets", "Ordering for resource sets" ],
[ "order-serialize", "Serialize resources without inhibiting migration" ],
[ "order-serialize-set", "Serialize a set of resources without inhibiting migration" ],
[ "clone-order-primitive", "Order clone start after a primitive" ],
[ "clone-order-16instances", "Verify ordering of 16 cloned resources" ],
[ "order-optional-keyword", "Order (optional keyword)" ],
[ "order-mandatory", "Order (mandatory keyword)" ],
[ "bug-lf-2493",
"Don't imply colocation requirements when applying ordering constraints with clones" ],
[ "ordered-set-basic-startup", "Constraint set with default order settings" ],
[ "ordered-set-natural", "Allow natural set ordering" ],
[ "order-wrong-kind", "Order (error)" ],
],
[
[ "coloc-loop", "Colocation - loop" ],
[ "coloc-many-one", "Colocation - many-to-one" ],
[ "coloc-list", "Colocation - many-to-one with list" ],
[ "coloc-group", "Colocation - groups" ],
[ "coloc-unpromoted-anti", "Anti-colocation with unpromoted shouldn't prevent promoted colocation" ],
[ "coloc-attr", "Colocation based on node attributes" ],
[ "coloc-negative-group", "Negative colocation with a group" ],
[ "coloc-intra-set", "Intra-set colocation" ],
[ "bug-lf-2435", "Colocation sets with a negative score" ],
[ "coloc-clone-stays-active",
"Ensure clones don't get stopped/demoted because a dependent must stop" ],
[ "coloc_fp_logic", "Verify floating point calculations in colocation are working" ],
[ "colo_promoted_w_native",
"cl#5070 - Verify promotion order is affected when colocating promoted with primitive" ],
[ "colo_unpromoted_w_native",
"cl#5070 - Verify promotion order is affected when colocating unpromoted with primitive" ],
[ "anti-colocation-order",
"cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node" ],
[ "anti-colocation-promoted", "Organize order of actions for promoted resources in anti-colocations" ],
[ "anti-colocation-unpromoted", "Organize order of actions for unpromoted resources in anti-colocations" ],
+ [ "group-anticolocation", "Group with failed last member anti-colocated with another group" ],
+ [ "group-colocation-failure",
+ "Group with sole member failed, colocated with another group"
+ ],
[ "enforce-colo1", "Always enforce B with A INFINITY" ],
[ "complex_enforce_colo", "Always enforce B with A INFINITY. (make sure heat-engine stops)" ],
[ "coloc-dependee-should-stay", "Stickiness outweighs group colocation" ],
[ "coloc-dependee-should-move", "Group colocation outweighs stickiness" ],
[ "colocation-influence", "Respect colocation influence" ],
+ [ "colocation-priority-group", "Apply group colocations in order of primary priority" ],
+ [ "colocation-vs-stickiness", "Group stickiness outweighs anti-colocation score" ],
],
[
[ "rsc-sets-seq-true", "Resource Sets - sequential=false" ],
[ "rsc-sets-seq-false", "Resource Sets - sequential=true" ],
[ "rsc-sets-clone", "Resource Sets - Clone" ],
[ "rsc-sets-promoted", "Resource Sets - Promoted" ],
[ "rsc-sets-clone-1", "Resource Sets - Clone (lf#2404)" ],
],
[
[ "attrs1", "string: eq (and)" ],
[ "attrs2", "string: lt / gt (and)" ],
[ "attrs3", "string: ne (or)" ],
[ "attrs4", "string: exists" ],
[ "attrs5", "string: not_exists" ],
[ "attrs6", "is_dc: true" ],
[ "attrs7", "is_dc: false" ],
[ "attrs8", "score_attribute" ],
[ "per-node-attrs", "Per node resource parameters" ],
],
[
[ "mon-rsc-1", "Schedule Monitor - start" ],
[ "mon-rsc-2", "Schedule Monitor - move" ],
[ "mon-rsc-3", "Schedule Monitor - pending start" ],
[ "mon-rsc-4", "Schedule Monitor - move/pending start" ],
],
[
[ "rec-rsc-0", "Resource Recover - no start" ],
[ "rec-rsc-1", "Resource Recover - start" ],
[ "rec-rsc-2", "Resource Recover - monitor" ],
[ "rec-rsc-3", "Resource Recover - stop - ignore" ],
[ "rec-rsc-4", "Resource Recover - stop - block" ],
[ "rec-rsc-5", "Resource Recover - stop - fence" ],
[ "rec-rsc-6", "Resource Recover - multiple - restart" ],
[ "rec-rsc-7", "Resource Recover - multiple - stop" ],
[ "rec-rsc-8", "Resource Recover - multiple - block" ],
[ "rec-rsc-9", "Resource Recover - group/group" ],
[ "stop-unexpected", "Recover multiply active group with stop_unexpected" ],
[ "stop-unexpected-2", "Resource multiply active primitve with stop_unexpected" ],
[ "monitor-recovery", "on-fail=block + resource recovery detected by recurring monitor" ],
[ "stop-failure-no-quorum", "Stop failure without quorum" ],
[ "stop-failure-no-fencing", "Stop failure without fencing available" ],
[ "stop-failure-with-fencing", "Stop failure with fencing available" ],
[ "multiple-active-block-group", "Support of multiple-active=block for resource groups" ],
[ "multiple-monitor-one-failed",
"Consider resource failed if any of the configured monitor operations failed" ],
],
[
[ "quorum-1", "No quorum - ignore" ],
[ "quorum-2", "No quorum - freeze" ],
[ "quorum-3", "No quorum - stop" ],
[ "quorum-4", "No quorum - start anyway" ],
[ "quorum-5", "No quorum - start anyway (group)" ],
[ "quorum-6", "No quorum - start anyway (clone)" ],
[ "bug-cl-5212", "No promotion with no-quorum-policy=freeze" ],
[ "suicide-needed-inquorate", "no-quorum-policy=suicide: suicide necessary" ],
[ "suicide-not-needed-initial-quorum",
"no-quorum-policy=suicide: suicide not necessary at initial quorum" ],
[ "suicide-not-needed-never-quorate",
"no-quorum-policy=suicide: suicide not necessary if never quorate" ],
[ "suicide-not-needed-quorate", "no-quorum-policy=suicide: suicide necessary if quorate" ],
],
[
[ "rec-node-1", "Node Recover - Startup - no fence" ],
[ "rec-node-2", "Node Recover - Startup - fence" ],
[ "rec-node-3", "Node Recover - HA down - no fence" ],
[ "rec-node-4", "Node Recover - HA down - fence" ],
[ "rec-node-5", "Node Recover - CRM down - no fence" ],
[ "rec-node-6", "Node Recover - CRM down - fence" ],
[ "rec-node-7", "Node Recover - no quorum - ignore" ],
[ "rec-node-8", "Node Recover - no quorum - freeze" ],
[ "rec-node-9", "Node Recover - no quorum - stop" ],
[ "rec-node-10", "Node Recover - no quorum - stop w/fence" ],
[ "rec-node-11", "Node Recover - CRM down w/ group - fence" ],
[ "rec-node-12", "Node Recover - nothing active - fence" ],
[ "rec-node-13", "Node Recover - failed resource + shutdown - fence" ],
[ "rec-node-15", "Node Recover - unknown lrm section" ],
[ "rec-node-14", "Serialize all stonith's" ],
],
[
[ "multi1", "Multiple Active (stop/start)" ],
],
[
[ "migrate-begin", "Normal migration" ],
[ "migrate-success", "Completed migration" ],
[ "migrate-partial-1", "Completed migration, missing stop on source" ],
[ "migrate-partial-2", "Successful migrate_to only" ],
[ "migrate-partial-3", "Successful migrate_to only, target down" ],
[ "migrate-partial-4", "Migrate from the correct host after migrate_to+migrate_from" ],
[ "bug-5186-partial-migrate", "Handle partial migration when src node loses membership" ],
[ "migrate-fail-2", "Failed migrate_from" ],
[ "migrate-fail-3", "Failed migrate_from + stop on source" ],
[ "migrate-fail-4",
"Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target" ],
[ "migrate-fail-5", "Failed migrate_from + stop on source and target" ],
[ "migrate-fail-6", "Failed migrate_to" ],
[ "migrate-fail-7", "Failed migrate_to + stop on source" ],
[ "migrate-fail-8",
"Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target" ],
[ "migrate-fail-9", "Failed migrate_to + stop on source and target" ],
[ "migration-ping-pong", "Old migrate_to failure + successful migrate_from on same node" ],
[ "migrate-stop", "Migration in a stopping stack" ],
[ "migrate-start", "Migration in a starting stack" ],
[ "migrate-stop_start", "Migration in a restarting stack" ],
[ "migrate-stop-complex", "Migration in a complex stopping stack" ],
[ "migrate-start-complex", "Migration in a complex starting stack" ],
[ "migrate-stop-start-complex", "Migration in a complex moving stack" ],
[ "migrate-shutdown", "Order the post-migration 'stop' before node shutdown" ],
[ "migrate-1", "Migrate (migrate)" ],
[ "migrate-2", "Migrate (stable)" ],
[ "migrate-3", "Migrate (failed migrate_to)" ],
[ "migrate-4", "Migrate (failed migrate_from)" ],
[ "novell-252693", "Migration in a stopping stack" ],
[ "novell-252693-2", "Migration in a starting stack" ],
[ "novell-252693-3", "Non-Migration in a starting and stopping stack" ],
[ "bug-1820", "Migration in a group" ],
[ "bug-1820-1", "Non-migration in a group" ],
[ "migrate-5", "Primitive migration with a clone" ],
[ "migrate-fencing", "Migration after Fencing" ],
[ "migrate-both-vms", "Migrate two VMs that have no colocation" ],
[ "migration-behind-migrating-remote", "Migrate resource behind migrating remote connection" ],
[ "1-a-then-bm-move-b", "Advanced migrate logic. A then B. migrate B" ],
[ "2-am-then-b-move-a", "Advanced migrate logic, A then B, migrate A without stopping B" ],
[ "3-am-then-bm-both-migrate", "Advanced migrate logic. A then B. migrate both" ],
[ "4-am-then-bm-b-not-migratable", "Advanced migrate logic, A then B, B not migratable" ],
[ "5-am-then-bm-a-not-migratable", "Advanced migrate logic. A then B. move both, a not migratable" ],
[ "6-migrate-group", "Advanced migrate logic, migrate a group" ],
[ "7-migrate-group-one-unmigratable",
"Advanced migrate logic, migrate group mixed with allow-migrate true/false" ],
[ "8-am-then-bm-a-migrating-b-stopping",
"Advanced migrate logic, A then B, A migrating, B stopping" ],
[ "9-am-then-bm-b-migrating-a-stopping",
"Advanced migrate logic, A then B, B migrate, A stopping" ],
[ "10-a-then-bm-b-move-a-clone",
"Advanced migrate logic, A clone then B, migrate B while stopping A" ],
[ "11-a-then-bm-b-move-a-clone-starting",
"Advanced migrate logic, A clone then B, B moving while A is start/stopping" ],
[ "a-promote-then-b-migrate", "A promote then B start. migrate B" ],
[ "a-demote-then-b-migrate", "A demote then B stop. migrate B" ],
[ "probe-target-of-failed-migrate_to-1", "Failed migrate_to, target rejoins" ],
[ "probe-target-of-failed-migrate_to-2", "Failed migrate_to, target rejoined and probed" ],
[ "partial-live-migration-multiple-active", "Prevent running on multiple nodes due to partial live migration" ],
[ "bug-lf-2422", "Dependency on partially active group - stop ocfs:*" ],
],
[
[ "clone-anon-probe-1", "Probe the correct (anonymous) clone instance for each node" ],
[ "clone-anon-probe-2", "Avoid needless re-probing of anonymous clones" ],
[ "clone-anon-failcount", "Merge failcounts for anonymous clones" ],
[ "force-anon-clone-max", "Update clone-max properly when forcing a clone to be anonymous" ],
[ "anon-instance-pending", "Assign anonymous clone instance numbers properly when action pending" ],
[ "inc0", "Incarnation start" ],
[ "inc1", "Incarnation start order" ],
[ "inc2", "Incarnation silent restart, stop, move" ],
[ "inc3", "Inter-incarnation ordering, silent restart, stop, move" ],
[ "inc4", "Inter-incarnation ordering, silent restart, stop, move (ordered)" ],
[ "inc5", "Inter-incarnation ordering, silent restart, stop, move (restart 1)" ],
[ "inc6", "Inter-incarnation ordering, silent restart, stop, move (restart 2)" ],
[ "inc7", "Clone colocation" ],
[ "inc8", "Clone anti-colocation" ],
[ "inc9", "Non-unique clone" ],
[ "inc10", "Non-unique clone (stop)" ],
[ "inc11", "Primitive colocation with clones" ],
[ "inc12", "Clone shutdown" ],
[ "cloned-group", "Make sure only the correct number of cloned groups are started" ],
[ "cloned-group-stop", "Ensure stopping qpidd also stops glance and cinder" ],
[ "clone-no-shuffle", "Don't prioritize allocation of instances that must be moved" ],
[ "clone-max-zero", "Orphan processing with clone-max=0" ],
[ "clone-anon-dup",
"Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" ],
[ "bug-lf-2160", "Don't shuffle clones due to colocation" ],
[ "bug-lf-2213", "clone-node-max enforcement for cloned groups" ],
[ "bug-lf-2153", "Clone ordering constraints" ],
[ "bug-lf-2361", "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable" ],
[ "bug-lf-2317", "Avoid needless restart of primitive depending on a clone" ],
[ "bug-lf-2453", "Enforce mandatory clone ordering without colocation" ],
[ "bug-lf-2508", "Correctly reconstruct the status of anonymous cloned groups" ],
[ "bug-lf-2544", "Balanced clone placement" ],
[ "bug-lf-2445", "Redistribute clones with node-max > 1 and stickiness = 0" ],
[ "bug-lf-2574", "Avoid clone shuffle" ],
[ "bug-lf-2581", "Avoid group restart due to unrelated clone (re)start" ],
[ "bug-cl-5168", "Don't shuffle clones" ],
[ "bug-cl-5170", "Prevent clone from starting with on-fail=block" ],
[ "clone-fail-block-colocation", "Move colocated group when failed clone has on-fail=block" ],
[ "clone-interleave-1",
"Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)" ],
[ "clone-interleave-2", "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)" ],
[ "clone-interleave-3",
"Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)" ],
[ "rebalance-unique-clones", "Rebalance unique clone instances with no stickiness" ],
[ "clone-requires-quorum-recovery", "Clone with requires=quorum on failed node needing recovery" ],
[ "clone-requires-quorum",
"Clone with requires=quorum with presumed-inactive instance on failed node" ],
],
[
[ "cloned_start_one", "order first clone then clone... first clone_min=2" ],
[ "cloned_start_two", "order first clone then clone... first clone_min=2" ],
[ "cloned_stop_one", "order first clone then clone... first clone_min=2" ],
[ "cloned_stop_two", "order first clone then clone... first clone_min=2" ],
[ "clone_min_interleave_start_one",
"order first clone then clone... first clone_min=2 and then has interleave=true" ],
[ "clone_min_interleave_start_two",
"order first clone then clone... first clone_min=2 and then has interleave=true" ],
[ "clone_min_interleave_stop_one",
"order first clone then clone... first clone_min=2 and then has interleave=true" ],
[ "clone_min_interleave_stop_two",
"order first clone then clone... first clone_min=2 and then has interleave=true" ],
[ "clone_min_start_one", "order first clone then primitive... first clone_min=2" ],
[ "clone_min_start_two", "order first clone then primitive... first clone_min=2" ],
[ "clone_min_stop_all", "order first clone then primitive... first clone_min=2" ],
[ "clone_min_stop_one", "order first clone then primitive... first clone_min=2" ],
[ "clone_min_stop_two", "order first clone then primitive... first clone_min=2" ],
],
[
[ "unfence-startup", "Clean unfencing" ],
[ "unfence-definition", "Unfencing when the agent changes" ],
[ "unfence-parameters", "Unfencing when the agent parameters changes" ],
[ "unfence-device", "Unfencing when a cluster has only fence devices" ],
],
[
[ "promoted-0", "Stopped -> Unpromoted" ],
[ "promoted-1", "Stopped -> Promote" ],
[ "promoted-2", "Stopped -> Promote : notify" ],
[ "promoted-3", "Stopped -> Promote : promoted location" ],
[ "promoted-4", "Started -> Promote : promoted location" ],
[ "promoted-5", "Promoted -> Promoted" ],
[ "promoted-6", "Promoted -> Promoted (2)" ],
[ "promoted-7", "Promoted -> Fenced" ],
[ "promoted-8", "Promoted -> Fenced -> Moved" ],
[ "promoted-9", "Stopped + Promotable + No quorum" ],
[ "promoted-10", "Stopped -> Promotable : notify with monitor" ],
[ "promoted-11", "Stopped -> Promote : colocation" ],
[ "novell-239082", "Demote/Promote ordering" ],
[ "novell-239087", "Stable promoted placement" ],
[ "promoted-12", "Promotion based solely on rsc_location constraints" ],
[ "promoted-13", "Include preferences of colocated resources when placing promoted" ],
[ "promoted-demote", "Ordering when actions depends on demoting an unpromoted resource" ],
[ "promoted-ordering", "Prevent resources from starting that need a promoted" ],
[ "bug-1765", "Verify promoted-with-promoted colocation does not stop unpromoted instances" ],
[ "promoted-group", "Promotion of cloned groups" ],
[ "bug-lf-1852", "Don't shuffle promotable instances unnecessarily" ],
[ "promoted-failed-demote", "Don't retry failed demote actions" ],
[ "promoted-failed-demote-2", "Don't retry failed demote actions (notify=false)" ],
[ "promoted-depend",
"Ensure resources that depend on promoted instance don't get allocated until that does" ],
[ "promoted-reattach", "Re-attach to a running promoted" ],
[ "promoted-allow-start", "Don't include promoted score if it would prevent allocation" ],
[ "promoted-colocation",
"Allow promoted instances placemaker to be influenced by colocation constraints" ],
[ "promoted-pseudo", "Make sure promote/demote pseudo actions are created correctly" ],
[ "promoted-role", "Prevent target-role from promoting more than promoted-max instances" ],
[ "bug-lf-2358", "Anti-colocation of promoted instances" ],
[ "promoted-promotion-constraint", "Mandatory promoted colocation constraints" ],
[ "unmanaged-promoted", "Ensure role is preserved for unmanaged resources" ],
[ "promoted-unmanaged-monitor", "Start correct monitor for unmanaged promoted instances" ],
[ "promoted-demote-2", "Demote does not clear past failure" ],
[ "promoted-move", "Move promoted based on failure of colocated group" ],
[ "promoted-probed-score", "Observe the promotion score of probed resources" ],
[ "colocation_constraint_stops_promoted",
"cl#5054 - Ensure promoted is demoted when stopped by colocation constraint" ],
[ "colocation_constraint_stops_unpromoted",
"cl#5054 - Ensure unpromoted is not demoted when stopped by colocation constraint" ],
[ "order_constraint_stops_promoted",
"cl#5054 - Ensure promoted is demoted when stopped by order constraint" ],
[ "order_constraint_stops_unpromoted",
"cl#5054 - Ensure unpromoted is not demoted when stopped by order constraint" ],
[ "promoted_monitor_restart", "cl#5072 - Ensure promoted monitor operation will start after promotion" ],
[ "bug-rh-880249", "Handle replacement of an m/s resource with a primitive" ],
[ "bug-5143-ms-shuffle", "Prevent promoted instance shuffling due to promotion score" ],
[ "promoted-demote-block", "Block promotion if demote fails with on-fail=block" ],
[ "promoted-dependent-ban",
"Don't stop instances from being active because a dependent is banned from that host" ],
[ "promoted-stop", "Stop instances due to location constraint with role=Started" ],
[ "promoted-partially-demoted-group", "Allow partially demoted group to finish demoting" ],
[ "bug-cl-5213", "Ensure role colocation with -INFINITY is enforced" ],
[ "bug-cl-5219", "Allow unrelated resources with a common colocation target to remain promoted" ],
[ "promoted-asymmetrical-order",
"Fix the behaviors of multi-state resources with asymmetrical ordering" ],
[ "promoted-notify", "Promotion with notifications" ],
[ "promoted-score-startup", "Use permanent promoted scores without LRM history" ],
[ "failed-demote-recovery", "Recover resource in unpromoted role after demote fails" ],
[ "failed-demote-recovery-promoted", "Recover resource in promoted role after demote fails" ],
[ "on_fail_demote1", "Recovery with on-fail=\"demote\" on healthy cluster, remote, guest, and bundle nodes" ],
[ "on_fail_demote2", "Recovery with on-fail=\"demote\" with promotion on different node" ],
[ "on_fail_demote3", "Recovery with on-fail=\"demote\" with no promotion" ],
[ "on_fail_demote4", "Recovery with on-fail=\"demote\" on failed cluster, remote, guest, and bundle nodes" ],
[ "no_quorum_demote", "Promotable demotion and primitive stop with no-quorum-policy=\"demote\"" ],
[ "no-promote-on-unrunnable-guest", "Don't select bundle instance for promotion when container can't run" ],
],
[
[ "history-1", "Correctly parse stateful-1 resource state" ],
],
[
[ "managed-0", "Managed (reference)" ],
[ "managed-1", "Not managed - down" ],
[ "managed-2", "Not managed - up" ],
[ "bug-5028", "Shutdown should block if anything depends on an unmanaged resource" ],
[ "bug-5028-detach", "Ensure detach still works" ],
[ "bug-5028-bottom",
"Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" ],
[ "unmanaged-stop-1",
"cl#5155 - Block the stop of resources if any depending resource is unmanaged" ],
[ "unmanaged-stop-2",
"cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged" ],
[ "unmanaged-stop-3",
"cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged" ],
[ "unmanaged-stop-4",
"cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged" ],
[ "unmanaged-block-restart",
"Block restart of resources if any dependent resource in a group is unmanaged" ],
],
[
[ "interleave-0", "Interleave (reference)" ],
[ "interleave-1", "coloc - not interleaved" ],
[ "interleave-2", "coloc - interleaved" ],
[ "interleave-3", "coloc - interleaved (2)" ],
[ "interleave-pseudo-stop", "Interleaved clone during stonith" ],
[ "interleave-stop", "Interleaved clone during stop" ],
[ "interleave-restart", "Interleaved clone during dependency restart" ],
],
[
[ "notify-0", "Notify reference" ],
[ "notify-1", "Notify simple" ],
[ "notify-2", "Notify simple, confirm" ],
[ "notify-3", "Notify move, confirm" ],
[ "novell-239079", "Notification priority" ],
#[ "notify-2", "Notify - 764" ],
[ "notifs-for-unrunnable", "Don't schedule notifications for an unrunnable action" ],
[ "route-remote-notify", "Route remote notify actions through correct cluster node" ],
[ "notify-behind-stopping-remote", "Don't schedule notifications behind stopped remote" ],
],
[
[ "594", "OSDL #594 - Unrunnable actions scheduled in transition" ],
[ "662", "OSDL #662 - Two resources start on one node when incarnation_node_max = 1" ],
[ "696", "OSDL #696 - CRM starts stonith RA without monitor" ],
[ "726", "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop" ],
[ "735", "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3" ],
[ "764", "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1" ],
[ "797", "OSDL #797 - Assert triggered: task_id_i > max_call_id" ],
[ "829", "OSDL #829" ],
[ "994",
"OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted" ],
[ "994-2", "OSDL #994 - with a dependent resource" ],
[ "1360", "OSDL #1360 - Clone stickiness" ],
[ "1484", "OSDL #1484 - on_fail=stop" ],
[ "1494", "OSDL #1494 - Clone stability" ],
[ "unrunnable-1", "Unrunnable" ],
[ "unrunnable-2", "Unrunnable 2" ],
[ "stonith-0", "Stonith loop - 1" ],
[ "stonith-1", "Stonith loop - 2" ],
[ "stonith-2", "Stonith loop - 3" ],
[ "stonith-3", "Stonith startup" ],
[ "stonith-4", "Stonith node state" ],
[ "dc-fence-ordering", "DC needs fencing while other nodes are shutting down" ],
[ "bug-1572-1", "Recovery of groups depending on promotable role" ],
[ "bug-1572-2", "Recovery of groups depending on promotable role when promoted is not re-promoted" ],
[ "bug-1685", "Depends-on-promoted ordering" ],
[ "bug-1822", "Don't promote partially active groups" ],
[ "bug-pm-11", "New resource added to a m/s group" ],
[ "bug-pm-12", "Recover only the failed portion of a cloned group" ],
[ "bug-n-387749", "Don't shuffle clone instances" ],
[ "bug-n-385265",
"Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" ],
[ "bug-n-385265-2",
"Ensure groups are migrated instead of remaining partially active on the current node" ],
[ "bug-lf-1920", "Correctly handle probes that find active resources" ],
[ "bnc-515172", "Location constraint with multiple expressions" ],
[ "colocate-primitive-with-clone", "Optional colocation with a clone" ],
[ "use-after-free-merge", "Use-after-free in native_merge_weights" ],
[ "bug-lf-2551", "STONITH ordering for stop" ],
[ "bug-lf-2606", "Stonith implies demote" ],
[ "bug-lf-2474", "Ensure resource op timeout takes precedence over op_defaults" ],
[ "bug-suse-707150", "Prevent vm-01 from starting due to colocation/ordering" ],
[ "bug-5014-A-start-B-start", "Verify when A starts B starts using symmetrical=false" ],
[ "bug-5014-A-stop-B-started",
"Verify when A stops B does not stop if it has already started using symmetric=false" ],
[ "bug-5014-A-stopped-B-stopped",
"Verify when A is stopped and B has not started, B does not start before A using symmetric=false" ],
[ "bug-5014-CthenAthenB-C-stopped",
"Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts" ],
[ "bug-5014-CLONE-A-start-B-start",
"Verify when A starts B starts using clone resources with symmetric=false" ],
[ "bug-5014-CLONE-A-stop-B-started",
"Verify when A stops B does not stop if it has already started using clone resources with symmetric=false" ],
[ "bug-5014-GROUP-A-start-B-start",
"Verify when A starts B starts when using group resources with symmetric=false" ],
[ "bug-5014-GROUP-A-stopped-B-started",
"Verify when A stops B does not stop if it has already started using group resources with symmetric=false" ],
[ "bug-5014-GROUP-A-stopped-B-stopped",
"Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false" ],
[ "bug-5014-ordered-set-symmetrical-false",
"Verify ordered sets work with symmetrical=false" ],
[ "bug-5014-ordered-set-symmetrical-true",
"Verify ordered sets work with symmetrical=true" ],
[ "clbz5007-promotable-colocation",
"Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources" ],
[ "bug-5038", "Prevent restart of anonymous clones when clone-max decreases" ],
[ "bug-5025-1", "Automatically clean up failcount after resource config change with reload" ],
[ "bug-5025-2", "Make sure clear failcount action isn't set when config does not change" ],
[ "bug-5025-3", "Automatically clean up failcount after resource config change with restart" ],
[ "bug-5025-4", "Clear failcount when last failure is a start op and rsc attributes changed" ],
[ "failcount", "Ensure failcounts are correctly expired" ],
[ "failcount-block", "Ensure failcounts are not expired when on-fail=block is present" ],
[ "per-op-failcount", "Ensure per-operation failcount is handled and not passed to fence agent" ],
[ "on-fail-ignore", "Ensure on-fail=ignore works even beyond migration-threshold" ],
[ "monitor-onfail-restart", "bug-5058 - Monitor failure with on-fail set to restart" ],
[ "monitor-onfail-stop", "bug-5058 - Monitor failure wiht on-fail set to stop" ],
[ "bug-5059", "No need to restart p_stateful1:*" ],
[ "bug-5069-op-enabled", "Test on-fail=ignore with failure when monitor is enabled" ],
[ "bug-5069-op-disabled", "Test on-fail-ignore with failure when monitor is disabled" ],
[ "obsolete-lrm-resource", "cl#5115 - Do not use obsolete lrm_resource sections" ],
[ "expire-non-blocked-failure",
"Ignore failure-timeout only if the failed operation has on-fail=block" ],
[ "asymmetrical-order-move", "Respect asymmetrical ordering when trying to move resources" ],
[ "asymmetrical-order-restart", "Respect asymmetrical ordering when restarting dependent resource" ],
[ "start-then-stop-with-unfence", "Avoid graph loop with start-then-stop constraint plus unfencing" ],
[ "order-expired-failure", "Order failcount cleanup after remote fencing" ],
[ "ignore_stonith_rsc_order1",
"cl#5056- Ignore order constraint between stonith and non-stonith rsc" ],
[ "ignore_stonith_rsc_order2",
"cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith" ],
[ "ignore_stonith_rsc_order3", "cl#5056- Ignore order constraint, stonith clone and mixed group" ],
[ "ignore_stonith_rsc_order4",
"cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group" ],
[ "honor_stonith_rsc_order1",
"cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)" ],
[ "honor_stonith_rsc_order2",
"cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)" ],
[ "honor_stonith_rsc_order3",
"cl#5056- Honor order constraint, stonith clones with nested pure stonith group" ],
[ "honor_stonith_rsc_order4",
"cl#5056- Honor order constraint, between two native stonith rscs" ],
[ "multiply-active-stonith", "Multiply active stonith" ],
[ "probe-timeout", "cl#5099 - Default probe timeout" ],
[ "order-first-probes",
"cl#5301 - respect order constraints when relevant resources are being probed" ],
[ "concurrent-fencing", "Allow performing fencing operations in parallel" ],
[ "priority-fencing-delay", "Delay fencing targeting the more significant node" ],
],
[
[ "systemhealth1", "System Health () #1" ],
[ "systemhealth2", "System Health () #2" ],
[ "systemhealth3", "System Health () #3" ],
[ "systemhealthn1", "System Health (None) #1" ],
[ "systemhealthn2", "System Health (None) #2" ],
[ "systemhealthn3", "System Health (None) #3" ],
[ "systemhealthm1", "System Health (Migrate On Red) #1" ],
[ "systemhealthm2", "System Health (Migrate On Red) #2" ],
[ "systemhealthm3", "System Health (Migrate On Red) #3" ],
[ "systemhealtho1", "System Health (Only Green) #1" ],
[ "systemhealtho2", "System Health (Only Green) #2" ],
[ "systemhealtho3", "System Health (Only Green) #3" ],
[ "systemhealthp1", "System Health (Progessive) #1" ],
[ "systemhealthp2", "System Health (Progessive) #2" ],
[ "systemhealthp3", "System Health (Progessive) #3" ],
[ "allow-unhealthy-nodes", "System Health (migrate-on-red + allow-unhealth-nodes)" ],
],
[
[ "utilization", "Placement Strategy - utilization" ],
[ "minimal", "Placement Strategy - minimal" ],
[ "balanced", "Placement Strategy - balanced" ],
],
[
[ "placement-stickiness", "Optimized Placement Strategy - stickiness" ],
[ "placement-priority", "Optimized Placement Strategy - priority" ],
[ "placement-location", "Optimized Placement Strategy - location" ],
[ "placement-capacity", "Optimized Placement Strategy - capacity" ],
],
[
[ "utilization-order1", "Utilization Order - Simple" ],
[ "utilization-order2", "Utilization Order - Complex" ],
[ "utilization-order3", "Utilization Order - Migrate" ],
[ "utilization-order4", "Utilization Order - Live Migration (bnc#695440)" ],
[ "utilization-complex", "Utilization with complex relationships" ],
[ "utilization-shuffle",
"Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3" ],
[ "load-stopped-loop", "Avoid transition loop due to load_stopped (cl#5044)" ],
[ "load-stopped-loop-2",
"cl#5235 - Prevent graph loops that can be introduced by load_stopped -> migrate_to ordering" ],
],
[
[ "colocated-utilization-primitive-1", "Colocated Utilization - Primitive" ],
[ "colocated-utilization-primitive-2", "Colocated Utilization - Choose the most capable node" ],
[ "colocated-utilization-group", "Colocated Utilization - Group" ],
[ "colocated-utilization-clone", "Colocated Utilization - Clone" ],
[ "utilization-check-allowed-nodes",
"Only check the capacities of the nodes that can run the resource" ],
],
[
[ "reprobe-target_rc", "Ensure correct target_rc for reprobe of inactive resources" ],
[ "node-maintenance-1", "cl#5128 - Node maintenance" ],
[ "node-maintenance-2", "cl#5128 - Node maintenance (coming out of maintenance mode)" ],
[ "shutdown-maintenance-node", "Do not fence a maintenance node if it shuts down cleanly" ],
[ "rsc-maintenance", "Per-resource maintenance" ],
],
[
[ "not-installed-agent", "The resource agent is missing" ],
[ "not-installed-tools", "Something the resource agent needs is missing" ],
],
[
[ "stopped-monitor-00", "Stopped Monitor - initial start" ],
[ "stopped-monitor-01", "Stopped Monitor - failed started" ],
[ "stopped-monitor-02", "Stopped Monitor - started multi-up" ],
[ "stopped-monitor-03", "Stopped Monitor - stop started" ],
[ "stopped-monitor-04", "Stopped Monitor - failed stop" ],
[ "stopped-monitor-05", "Stopped Monitor - start unmanaged" ],
[ "stopped-monitor-06", "Stopped Monitor - unmanaged multi-up" ],
[ "stopped-monitor-07", "Stopped Monitor - start unmanaged multi-up" ],
[ "stopped-monitor-08", "Stopped Monitor - migrate" ],
[ "stopped-monitor-09", "Stopped Monitor - unmanage started" ],
[ "stopped-monitor-10", "Stopped Monitor - unmanaged started multi-up" ],
[ "stopped-monitor-11", "Stopped Monitor - stop unmanaged started" ],
[ "stopped-monitor-12", "Stopped Monitor - unmanaged started multi-up (target-role=Stopped)" ],
[ "stopped-monitor-20", "Stopped Monitor - initial stop" ],
[ "stopped-monitor-21", "Stopped Monitor - stopped single-up" ],
[ "stopped-monitor-22", "Stopped Monitor - stopped multi-up" ],
[ "stopped-monitor-23", "Stopped Monitor - start stopped" ],
[ "stopped-monitor-24", "Stopped Monitor - unmanage stopped" ],
[ "stopped-monitor-25", "Stopped Monitor - unmanaged stopped multi-up" ],
[ "stopped-monitor-26", "Stopped Monitor - start unmanaged stopped" ],
[ "stopped-monitor-27", "Stopped Monitor - unmanaged stopped multi-up (target-role=Started)" ],
[ "stopped-monitor-30", "Stopped Monitor - new node started" ],
[ "stopped-monitor-31", "Stopped Monitor - new node stopped" ],
],
[
# This is a combo test to check:
# - probe timeout defaults to the minimum-interval monitor's
# - duplicate recurring operations are ignored
# - if timeout spec is bad, the default timeout is used
# - failure is blocked with on-fail=block even if ISO8601 interval is specified
# - started/stopped role monitors are started/stopped on right nodes
[ "intervals", "Recurring monitor interval handling" ],
],
[
[ "ticket-primitive-1", "Ticket - Primitive (loss-policy=stop, initial)" ],
[ "ticket-primitive-2", "Ticket - Primitive (loss-policy=stop, granted)" ],
[ "ticket-primitive-3", "Ticket - Primitive (loss-policy-stop, revoked)" ],
[ "ticket-primitive-4", "Ticket - Primitive (loss-policy=demote, initial)" ],
[ "ticket-primitive-5", "Ticket - Primitive (loss-policy=demote, granted)" ],
[ "ticket-primitive-6", "Ticket - Primitive (loss-policy=demote, revoked)" ],
[ "ticket-primitive-7", "Ticket - Primitive (loss-policy=fence, initial)" ],
[ "ticket-primitive-8", "Ticket - Primitive (loss-policy=fence, granted)" ],
[ "ticket-primitive-9", "Ticket - Primitive (loss-policy=fence, revoked)" ],
[ "ticket-primitive-10", "Ticket - Primitive (loss-policy=freeze, initial)" ],
[ "ticket-primitive-11", "Ticket - Primitive (loss-policy=freeze, granted)" ],
[ "ticket-primitive-12", "Ticket - Primitive (loss-policy=freeze, revoked)" ],
[ "ticket-primitive-13", "Ticket - Primitive (loss-policy=stop, standby, granted)" ],
[ "ticket-primitive-14", "Ticket - Primitive (loss-policy=stop, granted, standby)" ],
[ "ticket-primitive-15", "Ticket - Primitive (loss-policy=stop, standby, revoked)" ],
[ "ticket-primitive-16", "Ticket - Primitive (loss-policy=demote, standby, granted)" ],
[ "ticket-primitive-17", "Ticket - Primitive (loss-policy=demote, granted, standby)" ],
[ "ticket-primitive-18", "Ticket - Primitive (loss-policy=demote, standby, revoked)" ],
[ "ticket-primitive-19", "Ticket - Primitive (loss-policy=fence, standby, granted)" ],
[ "ticket-primitive-20", "Ticket - Primitive (loss-policy=fence, granted, standby)" ],
[ "ticket-primitive-21", "Ticket - Primitive (loss-policy=fence, standby, revoked)" ],
[ "ticket-primitive-22", "Ticket - Primitive (loss-policy=freeze, standby, granted)" ],
[ "ticket-primitive-23", "Ticket - Primitive (loss-policy=freeze, granted, standby)" ],
[ "ticket-primitive-24", "Ticket - Primitive (loss-policy=freeze, standby, revoked)" ],
],
[
[ "ticket-group-1", "Ticket - Group (loss-policy=stop, initial)" ],
[ "ticket-group-2", "Ticket - Group (loss-policy=stop, granted)" ],
[ "ticket-group-3", "Ticket - Group (loss-policy-stop, revoked)" ],
[ "ticket-group-4", "Ticket - Group (loss-policy=demote, initial)" ],
[ "ticket-group-5", "Ticket - Group (loss-policy=demote, granted)" ],
[ "ticket-group-6", "Ticket - Group (loss-policy=demote, revoked)" ],
[ "ticket-group-7", "Ticket - Group (loss-policy=fence, initial)" ],
[ "ticket-group-8", "Ticket - Group (loss-policy=fence, granted)" ],
[ "ticket-group-9", "Ticket - Group (loss-policy=fence, revoked)" ],
[ "ticket-group-10", "Ticket - Group (loss-policy=freeze, initial)" ],
[ "ticket-group-11", "Ticket - Group (loss-policy=freeze, granted)" ],
[ "ticket-group-12", "Ticket - Group (loss-policy=freeze, revoked)" ],
[ "ticket-group-13", "Ticket - Group (loss-policy=stop, standby, granted)" ],
[ "ticket-group-14", "Ticket - Group (loss-policy=stop, granted, standby)" ],
[ "ticket-group-15", "Ticket - Group (loss-policy=stop, standby, revoked)" ],
[ "ticket-group-16", "Ticket - Group (loss-policy=demote, standby, granted)" ],
[ "ticket-group-17", "Ticket - Group (loss-policy=demote, granted, standby)" ],
[ "ticket-group-18", "Ticket - Group (loss-policy=demote, standby, revoked)" ],
[ "ticket-group-19", "Ticket - Group (loss-policy=fence, standby, granted)" ],
[ "ticket-group-20", "Ticket - Group (loss-policy=fence, granted, standby)" ],
[ "ticket-group-21", "Ticket - Group (loss-policy=fence, standby, revoked)" ],
[ "ticket-group-22", "Ticket - Group (loss-policy=freeze, standby, granted)" ],
[ "ticket-group-23", "Ticket - Group (loss-policy=freeze, granted, standby)" ],
[ "ticket-group-24", "Ticket - Group (loss-policy=freeze, standby, revoked)" ],
],
[
[ "ticket-clone-1", "Ticket - Clone (loss-policy=stop, initial)" ],
[ "ticket-clone-2", "Ticket - Clone (loss-policy=stop, granted)" ],
[ "ticket-clone-3", "Ticket - Clone (loss-policy-stop, revoked)" ],
[ "ticket-clone-4", "Ticket - Clone (loss-policy=demote, initial)" ],
[ "ticket-clone-5", "Ticket - Clone (loss-policy=demote, granted)" ],
[ "ticket-clone-6", "Ticket - Clone (loss-policy=demote, revoked)" ],
[ "ticket-clone-7", "Ticket - Clone (loss-policy=fence, initial)" ],
[ "ticket-clone-8", "Ticket - Clone (loss-policy=fence, granted)" ],
[ "ticket-clone-9", "Ticket - Clone (loss-policy=fence, revoked)" ],
[ "ticket-clone-10", "Ticket - Clone (loss-policy=freeze, initial)" ],
[ "ticket-clone-11", "Ticket - Clone (loss-policy=freeze, granted)" ],
[ "ticket-clone-12", "Ticket - Clone (loss-policy=freeze, revoked)" ],
[ "ticket-clone-13", "Ticket - Clone (loss-policy=stop, standby, granted)" ],
[ "ticket-clone-14", "Ticket - Clone (loss-policy=stop, granted, standby)" ],
[ "ticket-clone-15", "Ticket - Clone (loss-policy=stop, standby, revoked)" ],
[ "ticket-clone-16", "Ticket - Clone (loss-policy=demote, standby, granted)" ],
[ "ticket-clone-17", "Ticket - Clone (loss-policy=demote, granted, standby)" ],
[ "ticket-clone-18", "Ticket - Clone (loss-policy=demote, standby, revoked)" ],
[ "ticket-clone-19", "Ticket - Clone (loss-policy=fence, standby, granted)" ],
[ "ticket-clone-20", "Ticket - Clone (loss-policy=fence, granted, standby)" ],
[ "ticket-clone-21", "Ticket - Clone (loss-policy=fence, standby, revoked)" ],
[ "ticket-clone-22", "Ticket - Clone (loss-policy=freeze, standby, granted)" ],
[ "ticket-clone-23", "Ticket - Clone (loss-policy=freeze, granted, standby)" ],
[ "ticket-clone-24", "Ticket - Clone (loss-policy=freeze, standby, revoked)" ],
],
[
[ "ticket-promoted-1", "Ticket - Promoted (loss-policy=stop, initial)" ],
[ "ticket-promoted-2", "Ticket - Promoted (loss-policy=stop, granted)" ],
[ "ticket-promoted-3", "Ticket - Promoted (loss-policy-stop, revoked)" ],
[ "ticket-promoted-4", "Ticket - Promoted (loss-policy=demote, initial)" ],
[ "ticket-promoted-5", "Ticket - Promoted (loss-policy=demote, granted)" ],
[ "ticket-promoted-6", "Ticket - Promoted (loss-policy=demote, revoked)" ],
[ "ticket-promoted-7", "Ticket - Promoted (loss-policy=fence, initial)" ],
[ "ticket-promoted-8", "Ticket - Promoted (loss-policy=fence, granted)" ],
[ "ticket-promoted-9", "Ticket - Promoted (loss-policy=fence, revoked)" ],
[ "ticket-promoted-10", "Ticket - Promoted (loss-policy=freeze, initial)" ],
[ "ticket-promoted-11", "Ticket - Promoted (loss-policy=freeze, granted)" ],
[ "ticket-promoted-12", "Ticket - Promoted (loss-policy=freeze, revoked)" ],
[ "ticket-promoted-13", "Ticket - Promoted (loss-policy=stop, standby, granted)" ],
[ "ticket-promoted-14", "Ticket - Promoted (loss-policy=stop, granted, standby)" ],
[ "ticket-promoted-15", "Ticket - Promoted (loss-policy=stop, standby, revoked)" ],
[ "ticket-promoted-16", "Ticket - Promoted (loss-policy=demote, standby, granted)" ],
[ "ticket-promoted-17", "Ticket - Promoted (loss-policy=demote, granted, standby)" ],
[ "ticket-promoted-18", "Ticket - Promoted (loss-policy=demote, standby, revoked)" ],
[ "ticket-promoted-19", "Ticket - Promoted (loss-policy=fence, standby, granted)" ],
[ "ticket-promoted-20", "Ticket - Promoted (loss-policy=fence, granted, standby)" ],
[ "ticket-promoted-21", "Ticket - Promoted (loss-policy=fence, standby, revoked)" ],
[ "ticket-promoted-22", "Ticket - Promoted (loss-policy=freeze, standby, granted)" ],
[ "ticket-promoted-23", "Ticket - Promoted (loss-policy=freeze, granted, standby)" ],
[ "ticket-promoted-24", "Ticket - Promoted (loss-policy=freeze, standby, revoked)" ],
],
[
[ "ticket-rsc-sets-1", "Ticket - Resource sets (1 ticket, initial)" ],
[ "ticket-rsc-sets-2", "Ticket - Resource sets (1 ticket, granted)" ],
[ "ticket-rsc-sets-3", "Ticket - Resource sets (1 ticket, revoked)" ],
[ "ticket-rsc-sets-4", "Ticket - Resource sets (2 tickets, initial)" ],
[ "ticket-rsc-sets-5", "Ticket - Resource sets (2 tickets, granted)" ],
[ "ticket-rsc-sets-6", "Ticket - Resource sets (2 tickets, granted)" ],
[ "ticket-rsc-sets-7", "Ticket - Resource sets (2 tickets, revoked)" ],
[ "ticket-rsc-sets-8", "Ticket - Resource sets (1 ticket, standby, granted)" ],
[ "ticket-rsc-sets-9", "Ticket - Resource sets (1 ticket, granted, standby)" ],
[ "ticket-rsc-sets-10", "Ticket - Resource sets (1 ticket, standby, revoked)" ],
[ "ticket-rsc-sets-11", "Ticket - Resource sets (2 tickets, standby, granted)" ],
[ "ticket-rsc-sets-12", "Ticket - Resource sets (2 tickets, standby, granted)" ],
[ "ticket-rsc-sets-13", "Ticket - Resource sets (2 tickets, granted, standby)" ],
[ "ticket-rsc-sets-14", "Ticket - Resource sets (2 tickets, standby, revoked)" ],
[ "cluster-specific-params", "Cluster-specific instance attributes based on rules" ],
[ "site-specific-params", "Site-specific instance attributes based on rules" ],
],
[
[ "template-1", "Template - 1" ],
[ "template-2", "Template - 2" ],
[ "template-3", "Template - 3 (merge operations)" ],
[ "template-coloc-1", "Template - Colocation 1" ],
[ "template-coloc-2", "Template - Colocation 2" ],
[ "template-coloc-3", "Template - Colocation 3" ],
[ "template-order-1", "Template - Order 1" ],
[ "template-order-2", "Template - Order 2" ],
[ "template-order-3", "Template - Order 3" ],
[ "template-ticket", "Template - Ticket" ],
[ "template-rsc-sets-1", "Template - Resource Sets 1" ],
[ "template-rsc-sets-2", "Template - Resource Sets 2" ],
[ "template-rsc-sets-3", "Template - Resource Sets 3" ],
[ "template-rsc-sets-4", "Template - Resource Sets 4" ],
[ "template-clone-primitive", "Cloned primitive from template" ],
[ "template-clone-group", "Cloned group from template" ],
[ "location-sets-templates", "Resource sets and templates - Location" ],
[ "tags-coloc-order-1", "Tags - Colocation and Order (Simple)" ],
[ "tags-coloc-order-2", "Tags - Colocation and Order (Resource Sets with Templates)" ],
[ "tags-location", "Tags - Location" ],
[ "tags-ticket", "Tags - Ticket" ],
],
[
[ "container-1", "Container - initial" ],
[ "container-2", "Container - monitor failed" ],
[ "container-3", "Container - stop failed" ],
[ "container-4", "Container - reached migration-threshold" ],
[ "container-group-1", "Container in group - initial" ],
[ "container-group-2", "Container in group - monitor failed" ],
[ "container-group-3", "Container in group - stop failed" ],
[ "container-group-4", "Container in group - reached migration-threshold" ],
[ "container-is-remote-node", "Place resource within container when container is remote-node" ],
[ "bug-rh-1097457", "Kill user defined container/contents ordering" ],
[ "bug-cl-5247", "Graph loop when recovering m/s resource in a container" ],
[ "bundle-order-startup", "Bundle startup ordering" ],
[ "bundle-order-partial-start",
"Bundle startup ordering when some dependencies are already running" ],
[ "bundle-order-partial-start-2",
"Bundle startup ordering when some dependencies and the container are already running" ],
[ "bundle-order-stop", "Bundle stop ordering" ],
[ "bundle-order-partial-stop", "Bundle startup ordering when some dependencies are already stopped" ],
[ "bundle-order-stop-on-remote", "Stop nested resource after bringing up the connection" ],
[ "bundle-order-startup-clone", "Prevent startup because bundle isn't promoted" ],
[ "bundle-order-startup-clone-2", "Bundle startup with clones" ],
[ "bundle-order-stop-clone", "Stop bundle because clone is stopping" ],
[ "bundle-nested-colocation", "Colocation of nested connection resources" ],
[ "bundle-order-fencing",
"Order pseudo bundle fencing after parent node fencing if both are happening" ],
[ "bundle-probe-order-1", "order 1" ],
[ "bundle-probe-order-2", "order 2" ],
[ "bundle-probe-order-3", "order 3" ],
[ "bundle-probe-remotes", "Ensure remotes get probed too" ],
[ "bundle-replicas-change", "Change bundle from 1 replica to multiple" ],
[ "bundle-connection-with-container", "Don't move a container due to connection preferences" ],
[ "nested-remote-recovery", "Recover bundle's container hosted on remote node" ],
],
[
[ "whitebox-fail1", "Fail whitebox container rsc" ],
[ "whitebox-fail2", "Fail cluster connection to guest node" ],
[ "whitebox-fail3", "Failed containers should not run nested on remote nodes" ],
[ "whitebox-start", "Start whitebox container with resources assigned to it" ],
[ "whitebox-stop", "Stop whitebox container with resources assigned to it" ],
[ "whitebox-move", "Move whitebox container with resources assigned to it" ],
[ "whitebox-asymmetric", "Verify connection rsc opts-in based on container resource" ],
[ "whitebox-ms-ordering", "Verify promote/demote can not occur before connection is established" ],
[ "whitebox-ms-ordering-move", "Stop/Start cycle within a moving container" ],
[ "whitebox-orphaned", "Properly shutdown orphaned whitebox container" ],
[ "whitebox-orphan-ms", "Properly tear down orphan ms resources on remote-nodes" ],
[ "whitebox-unexpectedly-running", "Recover container nodes the cluster did not start" ],
[ "whitebox-migrate1", "Migrate both container and connection resource" ],
[ "whitebox-imply-stop-on-fence",
"imply stop action on container node rsc when host node is fenced" ],
[ "whitebox-nested-group", "Verify guest remote-node works nested in a group" ],
[ "guest-node-host-dies", "Verify guest node is recovered if host goes away" ],
[ "guest-node-cleanup", "Order guest node connection recovery after container probe" ],
[ "guest-host-not-fenceable", "Actions on guest node are unrunnable if host is unclean and cannot be fenced" ],
],
[
[ "remote-startup-probes", "Baremetal remote-node startup probes" ],
[ "remote-startup", "Startup a newly discovered remote-nodes with no status" ],
[ "remote-fence-unclean", "Fence unclean baremetal remote-node" ],
[ "remote-fence-unclean2",
"Fence baremetal remote-node after cluster node fails and connection can not be recovered" ],
[ "remote-fence-unclean-3", "Probe failed remote nodes (triggers fencing)" ],
[ "remote-move", "Move remote-node connection resource" ],
[ "remote-disable", "Disable a baremetal remote-node" ],
[ "remote-probe-disable", "Probe then stop a baremetal remote-node" ],
[ "remote-orphaned", "Properly shutdown orphaned connection resource" ],
[ "remote-orphaned2",
"verify we can handle orphaned remote connections with active resources on the remote" ],
[ "remote-recover", "Recover connection resource after cluster-node fails" ],
[ "remote-stale-node-entry",
"Make sure we properly handle leftover remote-node entries in the node section" ],
[ "remote-partial-migrate",
"Make sure partial migrations are handled before ops on the remote node" ],
[ "remote-partial-migrate2",
"Make sure partial migration target is prefered for remote connection" ],
[ "remote-recover-fail", "Make sure start failure causes fencing if rsc are active on remote" ],
[ "remote-start-fail",
"Make sure a start failure does not result in fencing if no active resources are on remote" ],
[ "remote-unclean2",
"Make monitor failure always results in fencing, even if no rsc are active on remote" ],
[ "remote-fence-before-reconnect", "Fence before clearing recurring monitor failure" ],
[ "remote-recovery", "Recover remote connections before attempting demotion" ],
[ "remote-recover-connection", "Optimistically recovery of only the connection" ],
[ "remote-recover-all", "Fencing when the connection has no home" ],
[ "remote-recover-no-resources", "Fencing when the connection has no home and no active resources" ],
[ "remote-recover-unknown",
"Fencing when the connection has no home and the remote has no operation history" ],
[ "remote-reconnect-delay", "Waiting for remote reconnect interval to expire" ],
[ "remote-connection-unrecoverable",
"Remote connection host must be fenced, with connection unrecoverable" ],
[ "remote-connection-shutdown", "Remote connection shutdown" ],
[ "cancel-behind-moving-remote",
"Route recurring monitor cancellations through original node of a moving remote connection" ],
],
[
[ "resource-discovery", "Exercises resource-discovery location constraint option" ],
[ "rsc-discovery-per-node", "Disable resource discovery per node" ],
[ "shutdown-lock", "Ensure shutdown lock works properly" ],
[ "shutdown-lock-expiration", "Ensure shutdown lock expiration works properly" ],
],
[
[ "op-defaults", "Test op_defaults conditional expressions" ],
[ "op-defaults-2", "Test op_defaults AND'ed conditional expressions" ],
[ "op-defaults-3", "Test op_defaults precedence" ],
[ "rsc-defaults", "Test rsc_defaults conditional expressions" ],
[ "rsc-defaults-2", "Test rsc_defaults conditional expressions without type" ],
],
[ [ "stop-all-resources", "Test stop-all-resources=true "],
],
[ [ "ocf_degraded-remap-ocf_ok", "Test degraded remapped to OK" ],
[ "ocf_degraded_promoted-remap-ocf_ok", "Test degraded promoted remapped to OK"],
],
]
TESTS_64BIT = [
[
[ "year-2038", "Check handling of timestamps beyond 2038-01-19 03:14:08 UTC" ],
],
]
# Constants substituted in the build process
class BuildVars(object):
SBINDIR = "@sbindir@"
BUILDDIR = "@abs_top_builddir@"
CRM_SCHEMA_DIRECTORY = "@CRM_SCHEMA_DIRECTORY@"
# These values must be kept in sync with crm_exit_t
class CrmExit(object):
OK = 0
ERROR = 1
NOT_INSTALLED = 5
NOINPUT = 66
CANTCREAT = 73
def is_executable(path):
""" Check whether a file at a given path is executable. """
try:
return os.stat(path)[stat.ST_MODE] & stat.S_IXUSR
except OSError:
return False
def diff(file1, file2, **kwargs):
""" Call diff on two files """
return subprocess.call([ "diff", "-u", "-N", "--ignore-all-space",
"--ignore-blank-lines", file1, file2 ], **kwargs)
def sort_file(filename):
""" Sort a file alphabetically """
with io.open(filename, "rt") as f:
lines = sorted(f)
with io.open(filename, "wt") as f:
f.writelines(lines)
def remove_files(filenames):
""" Remove a list of files """
for filename in filenames:
try:
os.remove(filename)
except OSError:
pass
def normalize(filename):
""" Remove text from a file that isn't important for comparison """
if not hasattr(normalize, "patterns"):
normalize.patterns = [
re.compile(r'crm_feature_set="[^"]*"'),
re.compile(r'batch-limit="[0-9]*"')
]
if os.path.isfile(filename):
with io.open(filename, "rt") as f:
lines = f.readlines()
with io.open(filename, "wt") as f:
for line in lines:
for pattern in normalize.patterns:
line = pattern.sub("", line)
f.write(line)
def cat(filename, dest=sys.stdout):
""" Copy a file to a destination file descriptor """
with io.open(filename, "rt") as f:
shutil.copyfileobj(f, dest)
class CtsScheduler(object):
""" Regression tests for Pacemaker's scheduler """
def _parse_args(self, argv):
""" Parse command-line arguments """
parser = argparse.ArgumentParser(description=DESC)
parser.add_argument('-V', '--verbose', action='count',
help='Display any differences from expected output')
parser.add_argument('--run', metavar='TEST',
help=('Run only single specified test (any further '
'arguments will be passed to crm_simulate)'))
parser.add_argument('--update', action='store_true',
help='Update expected results with actual results')
parser.add_argument('-b', '--binary', metavar='PATH',
help='Specify path to crm_simulate')
parser.add_argument('-i', '--io-dir', metavar='PATH',
help='Specify path to regression test data directory')
parser.add_argument('-o', '--out-dir', metavar='PATH',
help='Specify where intermediate and output files should go')
parser.add_argument('-v', '--valgrind', action='store_true',
help='Run all commands under valgrind')
parser.add_argument('--valgrind-dhat', action='store_true',
help='Run all commands under valgrind with heap analyzer')
parser.add_argument('--valgrind-skip-output', action='store_true',
help='If running under valgrind, do not display output')
parser.add_argument('--testcmd-options', metavar='OPTIONS', default='',
help='Additional options for command under test')
# argparse can't handle "everything after --run TEST", so grab that
self.single_test_args = []
narg = 0
for arg in argv:
narg = narg + 1
if arg == '--run':
(argv, self.single_test_args) = (argv[:narg+1], argv[narg+1:])
break
self.args = parser.parse_args(argv[1:])
def _error(self, s):
print(" * ERROR: %s" % s)
def _failed(self, s):
print(" * FAILED: %s" % s)
def _get_valgrind_cmd(self):
""" Return command arguments needed (or not) to run valgrind """
if self.args.valgrind:
os.environ['G_SLICE'] = "always-malloc"
return [
"valgrind",
"-q",
"--gen-suppressions=all",
"--time-stamp=yes",
"--trace-children=no",
"--show-reachable=no",
"--leak-check=full",
"--num-callers=20",
"--suppressions=%s/valgrind-pcmk.suppressions" % (self.test_home)
]
if self.args.valgrind_dhat:
os.environ['G_SLICE'] = "always-malloc"
return [
"valgrind",
"--tool=exp-dhat",
"--time-stamp=yes",
"--trace-children=no",
"--show-top-n=100",
"--num-callers=4"
]
return []
def _get_simulator_cmd(self):
""" Locate the simulation binary """
if self.args.binary is None:
self.args.binary = BuildVars.BUILDDIR + "/tools/crm_simulate"
if not is_executable(self.args.binary):
self.args.binary = BuildVars.SBINDIR + "/crm_simulate"
if not is_executable(self.args.binary):
# @TODO it would be more pythonic to raise an exception
self._error("Test binary " + self.args.binary + " not found")
sys.exit(CrmExit.NOT_INSTALLED)
return [ self.args.binary ] + shlex.split(self.args.testcmd_options)
def set_schema_env(self):
""" Ensure schema directory environment variable is set, if possible """
try:
return os.environ['PCMK_schema_directory']
except KeyError:
for d in [ os.path.join(BuildVars.BUILDDIR, "xml"),
BuildVars.CRM_SCHEMA_DIRECTORY ]:
if os.path.isdir(d):
os.environ['PCMK_schema_directory'] = d
return d
return None
def __init__(self, argv=sys.argv):
# Ensure all command output is in portable locale for comparison
os.environ['LC_ALL'] = "C"
self._parse_args(argv)
# Where this executable lives
self.test_home = os.path.dirname(os.path.realpath(argv[0]))
# Where test data resides
if self.args.io_dir is None:
self.args.io_dir = os.path.join(self.test_home, "scheduler")
self.xml_input_dir = os.path.join(self.args.io_dir, "xml")
self.expected_dir = os.path.join(self.args.io_dir, "exp")
self.dot_expected_dir = os.path.join(self.args.io_dir, "dot")
self.scores_dir = os.path.join(self.args.io_dir, "scores")
self.summary_dir = os.path.join(self.args.io_dir, "summary")
self.stderr_expected_dir = os.path.join(self.args.io_dir, "stderr")
# Create a temporary directory to store diff file
self.failed_dir = tempfile.mkdtemp(prefix='cts-scheduler_')
# Where to store generated files
if self.args.out_dir is None:
self.args.out_dir = self.args.io_dir
self.failed_filename = os.path.join(self.failed_dir, "test-output.diff")
else:
self.failed_filename = os.path.join(self.args.out_dir, "test-output.diff")
os.environ['CIB_shadow_dir'] = self.args.out_dir
self.failed_file = None
self.outfile_out_dir = os.path.join(self.args.out_dir, "out")
self.dot_out_dir = os.path.join(self.args.out_dir, "dot")
self.scores_out_dir = os.path.join(self.args.out_dir, "scores")
self.summary_out_dir = os.path.join(self.args.out_dir, "summary")
self.stderr_out_dir = os.path.join(self.args.out_dir, "stderr")
self.valgrind_out_dir = os.path.join(self.args.out_dir, "valgrind")
# Single test mode (if requested)
try:
# User can give test base name or file name of a test input
self.args.run = os.path.splitext(os.path.basename(self.args.run))[0]
except (AttributeError, TypeError):
pass # --run was not specified
self.set_schema_env()
# Arguments needed (or not) to run commands
self.valgrind_args = self._get_valgrind_cmd()
self.simulate_args = self._get_simulator_cmd()
# Test counters
self.num_failed = 0
self.num_tests = 0
# Ensure that the main output directory exists
# We don't want to create it with os.makedirs below
if not os.path.isdir(self.args.out_dir):
self._error("Output directory missing; can't create output files")
sys.exit(CrmExit.CANTCREAT)
# Create output subdirectories if they don't exist
try:
os.makedirs(self.outfile_out_dir, 0o755, True)
os.makedirs(self.dot_out_dir, 0o755, True)
os.makedirs(self.scores_out_dir, 0o755, True)
os.makedirs(self.summary_out_dir, 0o755, True)
os.makedirs(self.stderr_out_dir, 0o755, True)
if self.valgrind_args:
os.makedirs(self.valgrind_out_dir, 0o755, True)
except OSError as ex:
self._error("Unable to create output subdirectory: %s" % ex)
remove_files([
self.outfile_out_dir,
self.dot_out_dir,
self.scores_out_dir,
self.summary_out_dir,
self.stderr_out_dir,
])
sys.exit(CrmExit.CANTCREAT)
def _compare_files(self, filename1, filename2):
""" Add any file differences to failed results """
if diff(filename1, filename2, stdout=subprocess.DEVNULL) != 0:
diff(filename1, filename2, stdout=self.failed_file, stderr=subprocess.DEVNULL)
self.failed_file.write("\n")
return True
return False
def run_one(self, test_name, test_desc, test_args=[]):
""" Run one scheduler test """
print(" Test %-25s %s" % ((test_name + ":"), test_desc))
did_fail = False
self.num_tests = self.num_tests + 1
# Test inputs
input_filename = os.path.join(
self.xml_input_dir, "%s.xml" % test_name)
expected_filename = os.path.join(
self.expected_dir, "%s.exp" % test_name)
dot_expected_filename = os.path.join(
self.dot_expected_dir, "%s.dot" % test_name)
scores_filename = os.path.join(
self.scores_dir, "%s.scores" % test_name)
summary_filename = os.path.join(
self.summary_dir, "%s.summary" % test_name)
stderr_expected_filename = os.path.join(
self.stderr_expected_dir, "%s.stderr" % test_name)
# (Intermediate) test outputs
output_filename = os.path.join(
self.outfile_out_dir, "%s.out" % test_name)
dot_output_filename = os.path.join(
self.dot_out_dir, "%s.dot.pe" % test_name)
score_output_filename = os.path.join(
self.scores_out_dir, "%s.scores.pe" % test_name)
summary_output_filename = os.path.join(
self.summary_out_dir, "%s.summary.pe" % test_name)
stderr_output_filename = os.path.join(
self.stderr_out_dir, "%s.stderr.pe" % test_name)
valgrind_output_filename = os.path.join(
self.valgrind_out_dir, "%s.valgrind" % test_name)
# Common arguments for running test
test_cmd = []
if self.valgrind_args:
test_cmd = self.valgrind_args + [ "--log-file=%s" % valgrind_output_filename ]
test_cmd = test_cmd + self.simulate_args
# @TODO It would be more pythonic to raise exceptions for errors,
# then perhaps it would be nice to make a single-test class
# Ensure necessary test inputs exist
if not os.path.isfile(input_filename):
self._error("No input")
self.num_failed = self.num_failed + 1
return CrmExit.NOINPUT
if not self.args.update and not os.path.isfile(expected_filename):
self._error("no stored output")
return CrmExit.NOINPUT
# Run simulation to generate summary output
if self.args.run: # Single test mode
test_cmd_full = test_cmd + [ '-x', input_filename, '-S' ] + test_args
print(" ".join(test_cmd_full))
else:
# @TODO Why isn't test_args added here?
test_cmd_full = test_cmd + [ '-x', input_filename, '-S' ]
with io.open(summary_output_filename, "wt") as f:
simulation = subprocess.Popen(test_cmd_full, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
env=os.environ)
# This makes diff happy regardless of --enable-compat-2.0.
# Use sed -E to make Linux and BSD special characters more compatible.
sed = subprocess.Popen(["sed", "-E",
"-e", "s/ocf::/ocf:/g",
"-e", r"s/Masters:/Promoted:/",
"-e", r"s/Slaves:/Unpromoted:/",
"-e", r"s/ Master( |\[|$)/ Promoted\1/",
"-e", r"s/ Slave / Unpromoted /",
], stdin=simulation.stdout, stdout=f,
stderr=subprocess.STDOUT)
simulation.stdout.close()
sed.communicate()
if self.args.run:
cat(summary_output_filename)
# Re-run simulation to generate dot, graph, and scores
test_cmd_full = test_cmd + [
'-x', input_filename,
'-D', dot_output_filename,
'-G', output_filename,
'-sSQ' ] + test_args
with io.open(stderr_output_filename, "wt") as f_stderr, \
io.open(score_output_filename, "wt") as f_score:
rc = subprocess.call(test_cmd_full, stdout=f_score, stderr=f_stderr, env=os.environ)
# Check for test command failure
if rc != CrmExit.OK:
self._failed("Test returned: %d" % rc)
did_fail = True
print(" ".join(test_cmd_full))
# Check for valgrind errors
if self.valgrind_args and not self.args.valgrind_skip_output:
if os.stat(valgrind_output_filename).st_size > 0:
self._failed("Valgrind reported errors")
did_fail = True
cat(valgrind_output_filename)
remove_files([ valgrind_output_filename ])
# Check for core dump
if os.path.isfile("core"):
self._failed("Core-file detected: core." + test_name)
did_fail = True
os.rename("core", "%s/core.%s" % (self.test_home, test_name))
# Check any stderr output
if os.path.isfile(stderr_expected_filename):
if self._compare_files(stderr_expected_filename, stderr_output_filename):
self._failed("stderr changed")
did_fail = True
elif os.stat(stderr_output_filename).st_size > 0:
self._failed("Output was written to stderr")
did_fail = True
cat(stderr_output_filename)
remove_files([ stderr_output_filename ])
# Check whether output graph exists, and normalize it
if (not os.path.isfile(output_filename)
or os.stat(output_filename).st_size == 0):
self._error("No graph produced")
did_fail = True
self.num_failed = self.num_failed + 1
remove_files([ output_filename ])
return CrmExit.ERROR
normalize(output_filename)
# Check whether dot output exists, and sort it
if (not os.path.isfile(dot_output_filename) or
os.stat(dot_output_filename).st_size == 0):
self._error("No dot-file summary produced")
did_fail = True
self.num_failed = self.num_failed + 1
remove_files([ dot_output_filename, output_filename ])
return CrmExit.ERROR
with io.open(dot_output_filename, "rt") as f:
first_line = f.readline() # "digraph" line with opening brace
lines = f.readlines()
last_line = lines[-1] # closing brace
del lines[-1]
lines = sorted(set(lines)) # unique sort
with io.open(dot_output_filename, "wt") as f:
f.write(first_line)
f.writelines(lines)
f.write(last_line)
# Check whether score output exists, and sort it
if (not os.path.isfile(score_output_filename)
or os.stat(score_output_filename).st_size == 0):
self._error("No allocation scores produced")
did_fail = True
self.num_failed = self.num_failed + 1
remove_files([ score_output_filename, output_filename ])
return CrmExit.ERROR
else:
sort_file(score_output_filename)
if self.args.update:
shutil.copyfile(output_filename, expected_filename)
shutil.copyfile(dot_output_filename, dot_expected_filename)
shutil.copyfile(score_output_filename, scores_filename)
shutil.copyfile(summary_output_filename, summary_filename)
print(" Updated expected outputs")
if self._compare_files(summary_filename, summary_output_filename):
self._failed("summary changed")
did_fail = True
if self._compare_files(dot_expected_filename, dot_output_filename):
self._failed("dot-file summary changed")
did_fail = True
else:
remove_files([ dot_output_filename ])
if self._compare_files(expected_filename, output_filename):
self._failed("xml-file changed")
did_fail = True
if self._compare_files(scores_filename, score_output_filename):
self._failed("scores-file changed")
did_fail = True
remove_files([ output_filename,
dot_output_filename,
score_output_filename,
summary_output_filename])
if did_fail:
self.num_failed = self.num_failed + 1
return CrmExit.ERROR
return CrmExit.OK
def run_all(self):
""" Run all defined tests """
if platform.architecture()[0] == "64bit":
TESTS.extend(TESTS_64BIT)
for group in TESTS:
for test in group:
try:
args = test[2]
except IndexError:
args = []
self.run_one(test[0], test[1], args)
print()
def _print_summary(self):
""" Print a summary of parameters for this test run """
print("Test home is:\t" + self.test_home)
print("Test binary is:\t" + self.args.binary)
if 'PCMK_schema_directory' in os.environ:
print("Schema home is:\t" + os.environ['PCMK_schema_directory'])
if self.valgrind_args != []:
print("Activating memory testing with valgrind")
print()
def _test_results(self):
if self.num_failed == 0:
shutil.rmtree(self.failed_dir)
return CrmExit.OK
if os.path.isfile(self.failed_filename) and os.stat(self.failed_filename).st_size != 0:
if self.args.verbose:
self._error("Results of %d failed tests (out of %d):" %
(self.num_failed, self.num_tests))
cat(self.failed_filename)
else:
self._error("Results of %d failed tests (out of %d) are in %s" %
(self.num_failed, self.num_tests, self.failed_filename))
self._error("Use -V to display them after running the tests")
else:
self._error("%d (of %d) tests failed (no diff results)" %
(self.num_failed, self.num_tests))
if os.path.isfile(self.failed_filename):
shutil.rmtree(self.failed_dir)
return CrmExit.ERROR
def run(self):
""" Run test(s) as specified """
self._print_summary()
# Zero out the error log
self.failed_file = io.open(self.failed_filename, "wt")
if self.args.run is None:
print("Performing the following tests from " + self.args.io_dir)
print()
self.run_all()
print()
self.failed_file.close()
rc = self._test_results()
else:
rc = self.run_one(self.args.run, "Single shot", self.single_test_args)
self.failed_file.close()
if self.num_failed > 0:
print("\nFailures:\nThese have also been written to: " + self.failed_filename + "\n")
cat(self.failed_filename)
shutil.rmtree(self.failed_dir)
return rc
if __name__ == "__main__":
sys.exit(CtsScheduler().run())
# vim: set filetype=python expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=120:
diff --git a/cts/scheduler/dot/colocation-priority-group.dot b/cts/scheduler/dot/colocation-priority-group.dot
new file mode 100644
index 0000000000..6e88e6b14d
--- /dev/null
+++ b/cts/scheduler/dot/colocation-priority-group.dot
@@ -0,0 +1,48 @@
+ digraph "g" {
+"Fencing_monitor_120000 node2" [ style=bold color="green" fontcolor="black"]
+"Fencing_start_0 node2" -> "Fencing_monitor_120000 node2" [ style = bold]
+"Fencing_start_0 node2" [ style=bold color="green" fontcolor="black"]
+"Fencing_stop_0 node1" -> "Fencing_start_0 node2" [ style = bold]
+"Fencing_stop_0 node1" [ style=bold color="green" fontcolor="black"]
+"group1_running_0" [ style=bold color="green" fontcolor="orange"]
+"group1_start_0" -> "group1_running_0" [ style = bold]
+"group1_start_0" -> "member1a_start_0 node1" [ style = bold]
+"group1_start_0" -> "member1b_start_0 node1" [ style = bold]
+"group1_start_0" [ style=bold color="green" fontcolor="orange"]
+"load_stopped_node1 node1" -> "rsc3_start_0 node1" [ style = bold]
+"load_stopped_node1 node1" [ style=bold color="green" fontcolor="orange"]
+"load_stopped_node2 node2" [ style=bold color="green" fontcolor="orange"]
+"member1a_monitor_0 node1" -> "member1a_start_0 node1" [ style = bold]
+"member1a_monitor_0 node1" [ style=bold color="green" fontcolor="black"]
+"member1a_monitor_0 node2" -> "member1a_start_0 node1" [ style = bold]
+"member1a_monitor_0 node2" [ style=bold color="green" fontcolor="black"]
+"member1a_monitor_10000 node1" [ style=bold color="green" fontcolor="black"]
+"member1a_start_0 node1" -> "group1_running_0" [ style = bold]
+"member1a_start_0 node1" -> "member1a_monitor_10000 node1" [ style = bold]
+"member1a_start_0 node1" -> "member1b_start_0 node1" [ style = bold]
+"member1a_start_0 node1" [ style=bold color="green" fontcolor="black"]
+"member1b_monitor_0 node1" -> "member1b_start_0 node1" [ style = bold]
+"member1b_monitor_0 node1" [ style=bold color="green" fontcolor="black"]
+"member1b_monitor_0 node2" -> "member1b_start_0 node1" [ style = bold]
+"member1b_monitor_0 node2" [ style=bold color="green" fontcolor="black"]
+"member1b_monitor_10000 node1" [ style=bold color="green" fontcolor="black"]
+"member1b_start_0 node1" -> "group1_running_0" [ style = bold]
+"member1b_start_0 node1" -> "member1b_monitor_10000 node1" [ style = bold]
+"member1b_start_0 node1" [ style=bold color="green" fontcolor="black"]
+"rsc2_monitor_0 node1" [ style=bold color="green" fontcolor="black"]
+"rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black"]
+"rsc3_monitor_0 node1" -> "rsc3_start_0 node1" [ style = bold]
+"rsc3_monitor_0 node1" [ style=bold color="green" fontcolor="black"]
+"rsc3_monitor_0 node2" -> "rsc3_start_0 node1" [ style = bold]
+"rsc3_monitor_0 node2" [ style=bold color="green" fontcolor="black"]
+"rsc3_monitor_10000 node1" [ style=bold color="green" fontcolor="black"]
+"rsc3_start_0 node1" -> "rsc3_monitor_10000 node1" [ style = bold]
+"rsc3_start_0 node1" [ style=bold color="green" fontcolor="black"]
+"rsc4_monitor_0 node1" -> "rsc4_start_0 node2" [ style = bold]
+"rsc4_monitor_0 node1" [ style=bold color="green" fontcolor="black"]
+"rsc4_monitor_0 node2" -> "rsc4_start_0 node2" [ style = bold]
+"rsc4_monitor_0 node2" [ style=bold color="green" fontcolor="black"]
+"rsc4_monitor_10000 node2" [ style=bold color="green" fontcolor="black"]
+"rsc4_start_0 node2" -> "rsc4_monitor_10000 node2" [ style = bold]
+"rsc4_start_0 node2" [ style=bold color="green" fontcolor="black"]
+}
diff --git a/cts/scheduler/dot/colocation-vs-stickiness.dot b/cts/scheduler/dot/colocation-vs-stickiness.dot
new file mode 100644
index 0000000000..744da559cd
--- /dev/null
+++ b/cts/scheduler/dot/colocation-vs-stickiness.dot
@@ -0,0 +1,40 @@
+ digraph "g" {
+"dummy2a_monitor_10000 rhel7-4" [ style=bold color="green" fontcolor="black"]
+"dummy2a_start_0 rhel7-4" -> "dummy2a_monitor_10000 rhel7-4" [ style = bold]
+"dummy2a_start_0 rhel7-4" -> "dummy2b_start_0 rhel7-4" [ style = bold]
+"dummy2a_start_0 rhel7-4" -> "group2_running_0" [ style = bold]
+"dummy2a_start_0 rhel7-4" [ style=bold color="green" fontcolor="black"]
+"dummy2a_stop_0 rhel7-5" -> "dummy2a_start_0 rhel7-4" [ style = bold]
+"dummy2a_stop_0 rhel7-5" -> "group2_stopped_0" [ style = bold]
+"dummy2a_stop_0 rhel7-5" [ style=bold color="green" fontcolor="black"]
+"dummy2b_monitor_10000 rhel7-4" [ style=bold color="green" fontcolor="black"]
+"dummy2b_start_0 rhel7-4" -> "dummy2b_monitor_10000 rhel7-4" [ style = bold]
+"dummy2b_start_0 rhel7-4" -> "dummy2c_start_0 rhel7-4" [ style = bold]
+"dummy2b_start_0 rhel7-4" -> "group2_running_0" [ style = bold]
+"dummy2b_start_0 rhel7-4" [ style=bold color="green" fontcolor="black"]
+"dummy2b_stop_0 rhel7-5" -> "dummy2a_stop_0 rhel7-5" [ style = bold]
+"dummy2b_stop_0 rhel7-5" -> "dummy2b_start_0 rhel7-4" [ style = bold]
+"dummy2b_stop_0 rhel7-5" -> "group2_stopped_0" [ style = bold]
+"dummy2b_stop_0 rhel7-5" [ style=bold color="green" fontcolor="black"]
+"dummy2c_monitor_10000 rhel7-4" [ style=bold color="green" fontcolor="black"]
+"dummy2c_start_0 rhel7-4" -> "dummy2c_monitor_10000 rhel7-4" [ style = bold]
+"dummy2c_start_0 rhel7-4" -> "group2_running_0" [ style = bold]
+"dummy2c_start_0 rhel7-4" [ style=bold color="green" fontcolor="black"]
+"dummy2c_stop_0 rhel7-5" -> "dummy2b_stop_0 rhel7-5" [ style = bold]
+"dummy2c_stop_0 rhel7-5" -> "dummy2c_start_0 rhel7-4" [ style = bold]
+"dummy2c_stop_0 rhel7-5" -> "group2_stopped_0" [ style = bold]
+"dummy2c_stop_0 rhel7-5" [ style=bold color="green" fontcolor="black"]
+"group2_running_0" [ style=bold color="green" fontcolor="orange"]
+"group2_start_0" -> "dummy2a_start_0 rhel7-4" [ style = bold]
+"group2_start_0" -> "dummy2b_start_0 rhel7-4" [ style = bold]
+"group2_start_0" -> "dummy2c_start_0 rhel7-4" [ style = bold]
+"group2_start_0" -> "group2_running_0" [ style = bold]
+"group2_start_0" [ style=bold color="green" fontcolor="orange"]
+"group2_stop_0" -> "dummy2a_stop_0 rhel7-5" [ style = bold]
+"group2_stop_0" -> "dummy2b_stop_0 rhel7-5" [ style = bold]
+"group2_stop_0" -> "dummy2c_stop_0 rhel7-5" [ style = bold]
+"group2_stop_0" -> "group2_stopped_0" [ style = bold]
+"group2_stop_0" [ style=bold color="green" fontcolor="orange"]
+"group2_stopped_0" -> "group2_start_0" [ style = bold]
+"group2_stopped_0" [ style=bold color="green" fontcolor="orange"]
+}
diff --git a/cts/scheduler/dot/group-anticolocation.dot b/cts/scheduler/dot/group-anticolocation.dot
new file mode 100644
index 0000000000..4886650df6
--- /dev/null
+++ b/cts/scheduler/dot/group-anticolocation.dot
@@ -0,0 +1,8 @@
+ digraph "g" {
+"group2_stop_0" -> "group2_stopped_0" [ style = bold]
+"group2_stop_0" -> "member2b_stop_0 node1" [ style = bold]
+"group2_stop_0" [ style=bold color="green" fontcolor="orange"]
+"group2_stopped_0" [ style=bold color="green" fontcolor="orange"]
+"member2b_stop_0 node1" -> "group2_stopped_0" [ style = bold]
+"member2b_stop_0 node1" [ style=bold color="green" fontcolor="black"]
+}
diff --git a/cts/scheduler/dot/group-colocation-failure.dot b/cts/scheduler/dot/group-colocation-failure.dot
new file mode 100644
index 0000000000..a2aac2854d
--- /dev/null
+++ b/cts/scheduler/dot/group-colocation-failure.dot
@@ -0,0 +1,45 @@
+ digraph "g" {
+"group1_running_0" [ style=bold color="green" fontcolor="orange"]
+"group1_start_0" -> "group1_running_0" [ style = bold]
+"group1_start_0" -> "member1a_start_0 node1" [ style = bold]
+"group1_start_0" -> "member1b_start_0 node1" [ style = bold]
+"group1_start_0" [ style=bold color="green" fontcolor="orange"]
+"group1_stop_0" -> "group1_stopped_0" [ style = bold]
+"group1_stop_0" -> "member1a_stop_0 node2" [ style = bold]
+"group1_stop_0" -> "member1b_stop_0 node2" [ style = bold]
+"group1_stop_0" [ style=bold color="green" fontcolor="orange"]
+"group1_stopped_0" -> "group1_start_0" [ style = bold]
+"group1_stopped_0" [ style=bold color="green" fontcolor="orange"]
+"group2_running_0" [ style=bold color="green" fontcolor="orange"]
+"group2_start_0" -> "group2_running_0" [ style = bold]
+"group2_start_0" -> "member2a_start_0 node1" [ style = bold]
+"group2_start_0" [ style=bold color="green" fontcolor="orange"]
+"group2_stop_0" -> "group2_stopped_0" [ style = bold]
+"group2_stop_0" -> "member2a_stop_0 node2" [ style = bold]
+"group2_stop_0" [ style=bold color="green" fontcolor="orange"]
+"group2_stopped_0" -> "group2_start_0" [ style = bold]
+"group2_stopped_0" [ style=bold color="green" fontcolor="orange"]
+"member1a_monitor_10000 node1" [ style=bold color="green" fontcolor="black"]
+"member1a_start_0 node1" -> "group1_running_0" [ style = bold]
+"member1a_start_0 node1" -> "member1a_monitor_10000 node1" [ style = bold]
+"member1a_start_0 node1" -> "member1b_start_0 node1" [ style = bold]
+"member1a_start_0 node1" [ style=bold color="green" fontcolor="black"]
+"member1a_stop_0 node2" -> "group1_stopped_0" [ style = bold]
+"member1a_stop_0 node2" -> "member1a_start_0 node1" [ style = bold]
+"member1a_stop_0 node2" [ style=bold color="green" fontcolor="black"]
+"member1b_monitor_10000 node1" [ style=bold color="green" fontcolor="black"]
+"member1b_start_0 node1" -> "group1_running_0" [ style = bold]
+"member1b_start_0 node1" -> "member1b_monitor_10000 node1" [ style = bold]
+"member1b_start_0 node1" [ style=bold color="green" fontcolor="black"]
+"member1b_stop_0 node2" -> "group1_stopped_0" [ style = bold]
+"member1b_stop_0 node2" -> "member1a_stop_0 node2" [ style = bold]
+"member1b_stop_0 node2" -> "member1b_start_0 node1" [ style = bold]
+"member1b_stop_0 node2" [ style=bold color="green" fontcolor="black"]
+"member2a_monitor_10000 node1" [ style=bold color="green" fontcolor="black"]
+"member2a_start_0 node1" -> "group2_running_0" [ style = bold]
+"member2a_start_0 node1" -> "member2a_monitor_10000 node1" [ style = bold]
+"member2a_start_0 node1" [ style=bold color="green" fontcolor="black"]
+"member2a_stop_0 node2" -> "group2_stopped_0" [ style = bold]
+"member2a_stop_0 node2" -> "member2a_start_0 node1" [ style = bold]
+"member2a_stop_0 node2" [ style=bold color="green" fontcolor="black"]
+}
diff --git a/cts/scheduler/exp/colocation-priority-group.exp b/cts/scheduler/exp/colocation-priority-group.exp
new file mode 100644
index 0000000000..ae79d83613
--- /dev/null
+++ b/cts/scheduler/exp/colocation-priority-group.exp
@@ -0,0 +1,297 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/cts/scheduler/exp/colocation-vs-stickiness.exp b/cts/scheduler/exp/colocation-vs-stickiness.exp
new file mode 100644
index 0000000000..be1de178bf
--- /dev/null
+++ b/cts/scheduler/exp/colocation-vs-stickiness.exp
@@ -0,0 +1,202 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/cts/scheduler/exp/group-anticolocation.exp b/cts/scheduler/exp/group-anticolocation.exp
new file mode 100644
index 0000000000..066b3bd5f4
--- /dev/null
+++ b/cts/scheduler/exp/group-anticolocation.exp
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/cts/scheduler/exp/group-colocation-failure.exp b/cts/scheduler/exp/group-colocation-failure.exp
new file mode 100644
index 0000000000..c57ba8bf6b
--- /dev/null
+++ b/cts/scheduler/exp/group-colocation-failure.exp
@@ -0,0 +1,240 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/cts/scheduler/scores/colocation-priority-group.scores b/cts/scheduler/scores/colocation-priority-group.scores
new file mode 100644
index 0000000000..89028f6046
--- /dev/null
+++ b/cts/scheduler/scores/colocation-priority-group.scores
@@ -0,0 +1,19 @@
+
+pcmk__group_assign: group1 allocation score on node1: 0
+pcmk__group_assign: group1 allocation score on node2: 0
+pcmk__group_assign: member1a allocation score on node1: 0
+pcmk__group_assign: member1a allocation score on node2: 0
+pcmk__group_assign: member1b allocation score on node1: 0
+pcmk__group_assign: member1b allocation score on node2: 0
+pcmk__primitive_assign: Fencing allocation score on node1: 0
+pcmk__primitive_assign: Fencing allocation score on node2: 0
+pcmk__primitive_assign: member1a allocation score on node1: 0
+pcmk__primitive_assign: member1a allocation score on node2: -4000
+pcmk__primitive_assign: member1b allocation score on node1: 0
+pcmk__primitive_assign: member1b allocation score on node2: -INFINITY
+pcmk__primitive_assign: rsc2 allocation score on node1: 16
+pcmk__primitive_assign: rsc2 allocation score on node2: 0
+pcmk__primitive_assign: rsc3 allocation score on node1: 0
+pcmk__primitive_assign: rsc3 allocation score on node2: 0
+pcmk__primitive_assign: rsc4 allocation score on node1: 0
+pcmk__primitive_assign: rsc4 allocation score on node2: 0
diff --git a/cts/scheduler/scores/colocation-vs-stickiness.scores b/cts/scheduler/scores/colocation-vs-stickiness.scores
new file mode 100644
index 0000000000..a5efe0caf2
--- /dev/null
+++ b/cts/scheduler/scores/colocation-vs-stickiness.scores
@@ -0,0 +1,86 @@
+
+pcmk__group_assign: dummy1 allocation score on rhel7-1: 0
+pcmk__group_assign: dummy1 allocation score on rhel7-2: 0
+pcmk__group_assign: dummy1 allocation score on rhel7-3: 0
+pcmk__group_assign: dummy1 allocation score on rhel7-4: 0
+pcmk__group_assign: dummy1 allocation score on rhel7-5: INFINITY
+pcmk__group_assign: dummy1b allocation score on rhel7-1: 0
+pcmk__group_assign: dummy1b allocation score on rhel7-2: 0
+pcmk__group_assign: dummy1b allocation score on rhel7-3: 0
+pcmk__group_assign: dummy1b allocation score on rhel7-4: 0
+pcmk__group_assign: dummy1b allocation score on rhel7-5: INFINITY
+pcmk__group_assign: dummy1c allocation score on rhel7-1: 0
+pcmk__group_assign: dummy1c allocation score on rhel7-2: 0
+pcmk__group_assign: dummy1c allocation score on rhel7-3: 0
+pcmk__group_assign: dummy1c allocation score on rhel7-4: 0
+pcmk__group_assign: dummy1c allocation score on rhel7-5: INFINITY
+pcmk__group_assign: dummy2a allocation score on rhel7-1: 0
+pcmk__group_assign: dummy2a allocation score on rhel7-2: 0
+pcmk__group_assign: dummy2a allocation score on rhel7-3: 0
+pcmk__group_assign: dummy2a allocation score on rhel7-4: 0
+pcmk__group_assign: dummy2a allocation score on rhel7-5: INFINITY
+pcmk__group_assign: dummy2b allocation score on rhel7-1: 0
+pcmk__group_assign: dummy2b allocation score on rhel7-2: 0
+pcmk__group_assign: dummy2b allocation score on rhel7-3: 0
+pcmk__group_assign: dummy2b allocation score on rhel7-4: 0
+pcmk__group_assign: dummy2b allocation score on rhel7-5: INFINITY
+pcmk__group_assign: dummy2c allocation score on rhel7-1: 0
+pcmk__group_assign: dummy2c allocation score on rhel7-2: 0
+pcmk__group_assign: dummy2c allocation score on rhel7-3: 0
+pcmk__group_assign: dummy2c allocation score on rhel7-4: 0
+pcmk__group_assign: dummy2c allocation score on rhel7-5: INFINITY
+pcmk__group_assign: group1 allocation score on rhel7-1: 0
+pcmk__group_assign: group1 allocation score on rhel7-2: 0
+pcmk__group_assign: group1 allocation score on rhel7-3: 0
+pcmk__group_assign: group1 allocation score on rhel7-4: 0
+pcmk__group_assign: group1 allocation score on rhel7-5: 0
+pcmk__group_assign: group2 allocation score on rhel7-1: 0
+pcmk__group_assign: group2 allocation score on rhel7-2: 0
+pcmk__group_assign: group2 allocation score on rhel7-3: 0
+pcmk__group_assign: group2 allocation score on rhel7-4: 0
+pcmk__group_assign: group2 allocation score on rhel7-5: 0
+pcmk__primitive_assign: Fencing allocation score on rhel7-1: 0
+pcmk__primitive_assign: Fencing allocation score on rhel7-2: 0
+pcmk__primitive_assign: Fencing allocation score on rhel7-3: 0
+pcmk__primitive_assign: Fencing allocation score on rhel7-4: 0
+pcmk__primitive_assign: Fencing allocation score on rhel7-5: INFINITY
+pcmk__primitive_assign: FencingFail allocation score on rhel7-1: 0
+pcmk__primitive_assign: FencingFail allocation score on rhel7-2: 0
+pcmk__primitive_assign: FencingFail allocation score on rhel7-3: 0
+pcmk__primitive_assign: FencingFail allocation score on rhel7-4: 0
+pcmk__primitive_assign: FencingFail allocation score on rhel7-5: INFINITY
+pcmk__primitive_assign: FencingPass allocation score on rhel7-1: 0
+pcmk__primitive_assign: FencingPass allocation score on rhel7-2: 0
+pcmk__primitive_assign: FencingPass allocation score on rhel7-3: 0
+pcmk__primitive_assign: FencingPass allocation score on rhel7-4: 0
+pcmk__primitive_assign: FencingPass allocation score on rhel7-5: INFINITY
+pcmk__primitive_assign: dummy1 allocation score on rhel7-1: -INFINITY
+pcmk__primitive_assign: dummy1 allocation score on rhel7-2: -INFINITY
+pcmk__primitive_assign: dummy1 allocation score on rhel7-3: -INFINITY
+pcmk__primitive_assign: dummy1 allocation score on rhel7-4: -500
+pcmk__primitive_assign: dummy1 allocation score on rhel7-5: INFINITY
+pcmk__primitive_assign: dummy1b allocation score on rhel7-1: -INFINITY
+pcmk__primitive_assign: dummy1b allocation score on rhel7-2: -INFINITY
+pcmk__primitive_assign: dummy1b allocation score on rhel7-3: -INFINITY
+pcmk__primitive_assign: dummy1b allocation score on rhel7-4: -INFINITY
+pcmk__primitive_assign: dummy1b allocation score on rhel7-5: INFINITY
+pcmk__primitive_assign: dummy1c allocation score on rhel7-1: -INFINITY
+pcmk__primitive_assign: dummy1c allocation score on rhel7-2: -INFINITY
+pcmk__primitive_assign: dummy1c allocation score on rhel7-3: -INFINITY
+pcmk__primitive_assign: dummy1c allocation score on rhel7-4: -INFINITY
+pcmk__primitive_assign: dummy1c allocation score on rhel7-5: INFINITY
+pcmk__primitive_assign: dummy2a allocation score on rhel7-1: -INFINITY
+pcmk__primitive_assign: dummy2a allocation score on rhel7-2: -INFINITY
+pcmk__primitive_assign: dummy2a allocation score on rhel7-3: -INFINITY
+pcmk__primitive_assign: dummy2a allocation score on rhel7-4: 0
+pcmk__primitive_assign: dummy2a allocation score on rhel7-5: -INFINITY
+pcmk__primitive_assign: dummy2b allocation score on rhel7-1: -INFINITY
+pcmk__primitive_assign: dummy2b allocation score on rhel7-2: -INFINITY
+pcmk__primitive_assign: dummy2b allocation score on rhel7-3: -INFINITY
+pcmk__primitive_assign: dummy2b allocation score on rhel7-4: 0
+pcmk__primitive_assign: dummy2b allocation score on rhel7-5: -INFINITY
+pcmk__primitive_assign: dummy2c allocation score on rhel7-1: -INFINITY
+pcmk__primitive_assign: dummy2c allocation score on rhel7-2: -INFINITY
+pcmk__primitive_assign: dummy2c allocation score on rhel7-3: -INFINITY
+pcmk__primitive_assign: dummy2c allocation score on rhel7-4: 0
+pcmk__primitive_assign: dummy2c allocation score on rhel7-5: -INFINITY
diff --git a/cts/scheduler/scores/group-anticolocation.scores b/cts/scheduler/scores/group-anticolocation.scores
new file mode 100644
index 0000000000..4ea6cf61a9
--- /dev/null
+++ b/cts/scheduler/scores/group-anticolocation.scores
@@ -0,0 +1,23 @@
+
+pcmk__group_assign: group1 allocation score on node1: 0
+pcmk__group_assign: group1 allocation score on node2: 0
+pcmk__group_assign: group2 allocation score on node1: 0
+pcmk__group_assign: group2 allocation score on node2: 0
+pcmk__group_assign: member1a allocation score on node1: 0
+pcmk__group_assign: member1a allocation score on node2: 0
+pcmk__group_assign: member1b allocation score on node1: 0
+pcmk__group_assign: member1b allocation score on node2: 0
+pcmk__group_assign: member2a allocation score on node1: 0
+pcmk__group_assign: member2a allocation score on node2: 0
+pcmk__group_assign: member2b allocation score on node1: -INFINITY
+pcmk__group_assign: member2b allocation score on node2: 0
+pcmk__primitive_assign: Fencing allocation score on node1: 0
+pcmk__primitive_assign: Fencing allocation score on node2: 0
+pcmk__primitive_assign: member1a allocation score on node1: 0
+pcmk__primitive_assign: member1a allocation score on node2: 0
+pcmk__primitive_assign: member1b allocation score on node1: -INFINITY
+pcmk__primitive_assign: member1b allocation score on node2: 0
+pcmk__primitive_assign: member2a allocation score on node1: 0
+pcmk__primitive_assign: member2a allocation score on node2: -5000
+pcmk__primitive_assign: member2b allocation score on node1: -INFINITY
+pcmk__primitive_assign: member2b allocation score on node2: -INFINITY
diff --git a/cts/scheduler/scores/group-colocation-failure.scores b/cts/scheduler/scores/group-colocation-failure.scores
new file mode 100644
index 0000000000..8466e127ca
--- /dev/null
+++ b/cts/scheduler/scores/group-colocation-failure.scores
@@ -0,0 +1,19 @@
+
+pcmk__group_assign: group1 allocation score on node1: 0
+pcmk__group_assign: group1 allocation score on node2: 0
+pcmk__group_assign: group2 allocation score on node1: 0
+pcmk__group_assign: group2 allocation score on node2: 0
+pcmk__group_assign: member1a allocation score on node1: 0
+pcmk__group_assign: member1a allocation score on node2: 0
+pcmk__group_assign: member1b allocation score on node1: 0
+pcmk__group_assign: member1b allocation score on node2: 0
+pcmk__group_assign: member2a allocation score on node1: 0
+pcmk__group_assign: member2a allocation score on node2: -INFINITY
+pcmk__primitive_assign: Fencing allocation score on node1: 0
+pcmk__primitive_assign: Fencing allocation score on node2: 0
+pcmk__primitive_assign: member1a allocation score on node1: 0
+pcmk__primitive_assign: member1a allocation score on node2: -INFINITY
+pcmk__primitive_assign: member1b allocation score on node1: 0
+pcmk__primitive_assign: member1b allocation score on node2: -INFINITY
+pcmk__primitive_assign: member2a allocation score on node1: 0
+pcmk__primitive_assign: member2a allocation score on node2: -INFINITY
diff --git a/cts/scheduler/scores/group-dependents.scores b/cts/scheduler/scores/group-dependents.scores
index a54d986373..f7ff5da336 100644
--- a/cts/scheduler/scores/group-dependents.scores
+++ b/cts/scheduler/scores/group-dependents.scores
@@ -1,99 +1,99 @@
drbd:0 promotion score on asttest1: 1
drbd:1 promotion score on asttest2: 9
pcmk__clone_allocate: cl_route allocation score on asttest1: 0
pcmk__clone_allocate: cl_route allocation score on asttest2: 0
pcmk__clone_allocate: drbd:0 allocation score on asttest1: 6
pcmk__clone_allocate: drbd:0 allocation score on asttest2: 0
pcmk__clone_allocate: drbd:1 allocation score on asttest1: 0
pcmk__clone_allocate: drbd:1 allocation score on asttest2: 6
pcmk__clone_allocate: ip_voip_route_default:0 allocation score on asttest1: 1
pcmk__clone_allocate: ip_voip_route_default:0 allocation score on asttest2: 0
pcmk__clone_allocate: ip_voip_route_default:1 allocation score on asttest1: 0
pcmk__clone_allocate: ip_voip_route_default:1 allocation score on asttest2: 1
pcmk__clone_allocate: ms_drbd allocation score on asttest1: 1
pcmk__clone_allocate: ms_drbd allocation score on asttest2: 2
pcmk__group_assign: asterisk allocation score on asttest1: 0
pcmk__group_assign: asterisk allocation score on asttest2: 0
pcmk__group_assign: dahdi allocation score on asttest1: 0
pcmk__group_assign: dahdi allocation score on asttest2: 0
pcmk__group_assign: fonulator allocation score on asttest1: -INFINITY
pcmk__group_assign: fonulator allocation score on asttest2: 0
pcmk__group_assign: httpd allocation score on asttest1: 0
pcmk__group_assign: httpd allocation score on asttest2: 0
pcmk__group_assign: iax2_mon allocation score on asttest1: 0
pcmk__group_assign: iax2_mon allocation score on asttest2: 0
pcmk__group_assign: ip_voip_addresses_p allocation score on asttest1: 0
pcmk__group_assign: ip_voip_addresses_p allocation score on asttest2: 0
pcmk__group_assign: ip_voip_route_test1 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_route_test1 allocation score on asttest2: 0
pcmk__group_assign: ip_voip_route_test2 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_route_test2 allocation score on asttest2: 0
pcmk__group_assign: ip_voip_routes allocation score on asttest1: 0
pcmk__group_assign: ip_voip_routes allocation score on asttest2: 0
pcmk__group_assign: ip_voip_vlan850 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_vlan850 allocation score on asttest2: 0
pcmk__group_assign: ip_voip_vlan851 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_vlan851 allocation score on asttest2: 0
pcmk__group_assign: ip_voip_vlan852 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_vlan852 allocation score on asttest2: 0
pcmk__group_assign: ip_voip_vlan853 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_vlan853 allocation score on asttest2: 0
pcmk__group_assign: ip_voip_vlan854 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_vlan854 allocation score on asttest2: 0
pcmk__group_assign: ip_voip_vlan855 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_vlan855 allocation score on asttest2: 0
pcmk__group_assign: ip_voip_vlan856 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_vlan856 allocation score on asttest2: 0
pcmk__group_assign: ip_voip_vlan998 allocation score on asttest1: 0
pcmk__group_assign: ip_voip_vlan998 allocation score on asttest2: 0
pcmk__group_assign: mysqld allocation score on asttest1: 2
pcmk__group_assign: mysqld allocation score on asttest2: 1
pcmk__group_assign: tftp allocation score on asttest1: 0
pcmk__group_assign: tftp allocation score on asttest2: 0
pcmk__group_assign: voip allocation score on asttest1: 2
pcmk__group_assign: voip allocation score on asttest2: 1
pcmk__primitive_assign: asterisk allocation score on asttest1: -INFINITY
pcmk__primitive_assign: asterisk allocation score on asttest2: 0
pcmk__primitive_assign: dahdi allocation score on asttest1: -INFINITY
pcmk__primitive_assign: dahdi allocation score on asttest2: 0
pcmk__primitive_assign: drbd:0 allocation score on asttest1: 6
pcmk__primitive_assign: drbd:0 allocation score on asttest2: 0
pcmk__primitive_assign: drbd:1 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: drbd:1 allocation score on asttest2: 6
pcmk__primitive_assign: fonulator allocation score on asttest1: -INFINITY
pcmk__primitive_assign: fonulator allocation score on asttest2: 0
pcmk__primitive_assign: fs_drbd allocation score on asttest1: -INFINITY
-pcmk__primitive_assign: fs_drbd allocation score on asttest2: 7
+pcmk__primitive_assign: fs_drbd allocation score on asttest2: 8
pcmk__primitive_assign: httpd allocation score on asttest1: -INFINITY
pcmk__primitive_assign: httpd allocation score on asttest2: 0
pcmk__primitive_assign: iax2_mon allocation score on asttest1: -INFINITY
pcmk__primitive_assign: iax2_mon allocation score on asttest2: 0
pcmk__primitive_assign: ip_voip_route_default:0 allocation score on asttest1: 1
pcmk__primitive_assign: ip_voip_route_default:0 allocation score on asttest2: 0
pcmk__primitive_assign: ip_voip_route_default:1 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_route_default:1 allocation score on asttest2: 1
pcmk__primitive_assign: ip_voip_route_test1 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_route_test1 allocation score on asttest2: 0
pcmk__primitive_assign: ip_voip_route_test2 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_route_test2 allocation score on asttest2: 0
pcmk__primitive_assign: ip_voip_vlan850 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_vlan850 allocation score on asttest2: 1
pcmk__primitive_assign: ip_voip_vlan851 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_vlan851 allocation score on asttest2: 1
pcmk__primitive_assign: ip_voip_vlan852 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_vlan852 allocation score on asttest2: 1
pcmk__primitive_assign: ip_voip_vlan853 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_vlan853 allocation score on asttest2: 1
pcmk__primitive_assign: ip_voip_vlan854 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_vlan854 allocation score on asttest2: 1
pcmk__primitive_assign: ip_voip_vlan855 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_vlan855 allocation score on asttest2: 1
pcmk__primitive_assign: ip_voip_vlan856 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_vlan856 allocation score on asttest2: 1
pcmk__primitive_assign: ip_voip_vlan998 allocation score on asttest1: -INFINITY
pcmk__primitive_assign: ip_voip_vlan998 allocation score on asttest2: 1
pcmk__primitive_assign: mysqld allocation score on asttest1: -INFINITY
pcmk__primitive_assign: mysqld allocation score on asttest2: 1
pcmk__primitive_assign: tftp allocation score on asttest1: -INFINITY
pcmk__primitive_assign: tftp allocation score on asttest2: 0
diff --git a/cts/scheduler/scores/promoted-partially-demoted-group.scores b/cts/scheduler/scores/promoted-partially-demoted-group.scores
index ad91702fd0..dece7ab097 100644
--- a/cts/scheduler/scores/promoted-partially-demoted-group.scores
+++ b/cts/scheduler/scores/promoted-partially-demoted-group.scores
@@ -1,93 +1,93 @@
-cdev-pool-0-drbd:0 promotion score on sd01-1: 10800
+cdev-pool-0-drbd:0 promotion score on sd01-1: 11200
cdev-pool-0-drbd:1 promotion score on sd01-0: INFINITY
cdev-pool-0-iscsi-vips-fw:0 promotion score on sd01-1: -INFINITY
cdev-pool-0-iscsi-vips-fw:1 promotion score on sd01-0: 2000
pcmk__clone_allocate: cdev-pool-0-drbd:0 allocation score on sd01-0: 0
pcmk__clone_allocate: cdev-pool-0-drbd:0 allocation score on sd01-1: 10100
pcmk__clone_allocate: cdev-pool-0-drbd:1 allocation score on sd01-0: 10100
pcmk__clone_allocate: cdev-pool-0-drbd:1 allocation score on sd01-1: 0
pcmk__clone_allocate: cdev-pool-0-iscsi-vips-fw:0 allocation score on sd01-0: 0
pcmk__clone_allocate: cdev-pool-0-iscsi-vips-fw:0 allocation score on sd01-1: 2000
pcmk__clone_allocate: cdev-pool-0-iscsi-vips-fw:1 allocation score on sd01-0: 2000
pcmk__clone_allocate: cdev-pool-0-iscsi-vips-fw:1 allocation score on sd01-1: 0
pcmk__clone_allocate: cl-ietd allocation score on sd01-0: INFINITY
-pcmk__clone_allocate: cl-ietd allocation score on sd01-1: 300
+pcmk__clone_allocate: cl-ietd allocation score on sd01-1: 500
pcmk__clone_allocate: cl-vlan1-net allocation score on sd01-0: 0
pcmk__clone_allocate: cl-vlan1-net allocation score on sd01-1: 0
pcmk__clone_allocate: ietd:0 allocation score on sd01-0: 0
pcmk__clone_allocate: ietd:0 allocation score on sd01-1: 100
pcmk__clone_allocate: ietd:1 allocation score on sd01-0: 100
pcmk__clone_allocate: ietd:1 allocation score on sd01-1: 0
pcmk__clone_allocate: ms-cdev-pool-0-drbd allocation score on sd01-0: INFINITY
-pcmk__clone_allocate: ms-cdev-pool-0-drbd allocation score on sd01-1: 400
+pcmk__clone_allocate: ms-cdev-pool-0-drbd allocation score on sd01-1: 600
pcmk__clone_allocate: ms-cdev-pool-0-iscsi-vips-fw allocation score on sd01-0: 0
pcmk__clone_allocate: ms-cdev-pool-0-iscsi-vips-fw allocation score on sd01-1: 0
pcmk__clone_allocate: vip-164-fw:0 allocation score on sd01-0: 0
pcmk__clone_allocate: vip-164-fw:0 allocation score on sd01-1: 100
pcmk__clone_allocate: vip-164-fw:1 allocation score on sd01-0: 100
pcmk__clone_allocate: vip-164-fw:1 allocation score on sd01-1: 0
pcmk__clone_allocate: vip-165-fw:0 allocation score on sd01-0: 0
pcmk__clone_allocate: vip-165-fw:0 allocation score on sd01-1: 100
pcmk__clone_allocate: vip-165-fw:1 allocation score on sd01-0: 100
pcmk__clone_allocate: vip-165-fw:1 allocation score on sd01-1: 0
pcmk__clone_allocate: vlan1-net:0 allocation score on sd01-0: 0
pcmk__clone_allocate: vlan1-net:0 allocation score on sd01-1: 100
pcmk__clone_allocate: vlan1-net:1 allocation score on sd01-0: 100
pcmk__clone_allocate: vlan1-net:1 allocation score on sd01-1: 0
pcmk__group_assign: cdev-pool-0-iscsi-export allocation score on sd01-0: INFINITY
pcmk__group_assign: cdev-pool-0-iscsi-export allocation score on sd01-1: 0
pcmk__group_assign: cdev-pool-0-iscsi-lun-1 allocation score on sd01-0: 0
pcmk__group_assign: cdev-pool-0-iscsi-lun-1 allocation score on sd01-1: 100
pcmk__group_assign: cdev-pool-0-iscsi-target allocation score on sd01-0: INFINITY
pcmk__group_assign: cdev-pool-0-iscsi-target allocation score on sd01-1: 100
pcmk__group_assign: cdev-pool-0-iscsi-vips allocation score on sd01-0: 0
pcmk__group_assign: cdev-pool-0-iscsi-vips allocation score on sd01-1: 0
pcmk__group_assign: cdev-pool-0-iscsi-vips-fw:0 allocation score on sd01-0: -INFINITY
pcmk__group_assign: cdev-pool-0-iscsi-vips-fw:0 allocation score on sd01-1: 2000
pcmk__group_assign: cdev-pool-0-iscsi-vips-fw:1 allocation score on sd01-0: 2000
pcmk__group_assign: cdev-pool-0-iscsi-vips-fw:1 allocation score on sd01-1: 0
pcmk__group_assign: vip-164 allocation score on sd01-0: 0
pcmk__group_assign: vip-164 allocation score on sd01-1: 100
pcmk__group_assign: vip-164-fw:0 allocation score on sd01-0: -INFINITY
pcmk__group_assign: vip-164-fw:0 allocation score on sd01-1: 100
pcmk__group_assign: vip-164-fw:1 allocation score on sd01-0: 100
pcmk__group_assign: vip-164-fw:1 allocation score on sd01-1: 0
pcmk__group_assign: vip-165 allocation score on sd01-0: 0
pcmk__group_assign: vip-165 allocation score on sd01-1: 100
pcmk__group_assign: vip-165-fw:0 allocation score on sd01-0: -INFINITY
pcmk__group_assign: vip-165-fw:0 allocation score on sd01-1: 100
pcmk__group_assign: vip-165-fw:1 allocation score on sd01-0: 100
pcmk__group_assign: vip-165-fw:1 allocation score on sd01-1: 0
pcmk__primitive_assign: cdev-pool-0-drbd:0 allocation score on sd01-0: -INFINITY
pcmk__primitive_assign: cdev-pool-0-drbd:0 allocation score on sd01-1: 10100
pcmk__primitive_assign: cdev-pool-0-drbd:1 allocation score on sd01-0: 10100
pcmk__primitive_assign: cdev-pool-0-drbd:1 allocation score on sd01-1: 0
pcmk__primitive_assign: cdev-pool-0-iscsi-lun-1 allocation score on sd01-0: 0
pcmk__primitive_assign: cdev-pool-0-iscsi-lun-1 allocation score on sd01-1: -INFINITY
pcmk__primitive_assign: cdev-pool-0-iscsi-target allocation score on sd01-0: INFINITY
pcmk__primitive_assign: cdev-pool-0-iscsi-target allocation score on sd01-1: -INFINITY
pcmk__primitive_assign: ietd:0 allocation score on sd01-0: -INFINITY
pcmk__primitive_assign: ietd:0 allocation score on sd01-1: 100
pcmk__primitive_assign: ietd:1 allocation score on sd01-0: 100
pcmk__primitive_assign: ietd:1 allocation score on sd01-1: 0
pcmk__primitive_assign: stonith-xvm-sd01-0 allocation score on sd01-0: -INFINITY
pcmk__primitive_assign: stonith-xvm-sd01-0 allocation score on sd01-1: 100
pcmk__primitive_assign: stonith-xvm-sd01-1 allocation score on sd01-0: 100
pcmk__primitive_assign: stonith-xvm-sd01-1 allocation score on sd01-1: -INFINITY
pcmk__primitive_assign: vip-164 allocation score on sd01-0: 0
pcmk__primitive_assign: vip-164 allocation score on sd01-1: -INFINITY
pcmk__primitive_assign: vip-164-fw:0 allocation score on sd01-0: -INFINITY
pcmk__primitive_assign: vip-164-fw:0 allocation score on sd01-1: 200
pcmk__primitive_assign: vip-164-fw:1 allocation score on sd01-0: 200
pcmk__primitive_assign: vip-164-fw:1 allocation score on sd01-1: 0
pcmk__primitive_assign: vip-165 allocation score on sd01-0: 0
pcmk__primitive_assign: vip-165 allocation score on sd01-1: -INFINITY
pcmk__primitive_assign: vip-165-fw:0 allocation score on sd01-0: -INFINITY
pcmk__primitive_assign: vip-165-fw:0 allocation score on sd01-1: 100
pcmk__primitive_assign: vip-165-fw:1 allocation score on sd01-0: 100
pcmk__primitive_assign: vip-165-fw:1 allocation score on sd01-1: -INFINITY
pcmk__primitive_assign: vlan1-net:0 allocation score on sd01-0: 0
pcmk__primitive_assign: vlan1-net:0 allocation score on sd01-1: 100
pcmk__primitive_assign: vlan1-net:1 allocation score on sd01-0: 100
pcmk__primitive_assign: vlan1-net:1 allocation score on sd01-1: -INFINITY
diff --git a/cts/scheduler/summary/colocation-priority-group.summary b/cts/scheduler/summary/colocation-priority-group.summary
new file mode 100644
index 0000000000..3a7cf2aee5
--- /dev/null
+++ b/cts/scheduler/summary/colocation-priority-group.summary
@@ -0,0 +1,59 @@
+Current cluster status:
+ * Node List:
+ * Online: [ node1 node2 ]
+
+ * Full List of Resources:
+ * Resource Group: group1:
+ * member1a (ocf:pacemaker:Dummy): Stopped
+ * member1b (ocf:pacemaker:Dummy): Stopped
+ * rsc3 (ocf:pacemaker:Dummy): Stopped
+ * rsc2 (ocf:pacemaker:Dummy): Stopped
+ * Fencing (stonith:fence_xvm): Started node1
+ * rsc4 (ocf:pacemaker:Dummy): Stopped
+
+Transition Summary:
+ * Start member1a ( node1 )
+ * Start member1b ( node1 )
+ * Start rsc3 ( node1 )
+ * Move Fencing ( node1 -> node2 )
+ * Start rsc4 ( node2 )
+
+Executing Cluster Transition:
+ * Pseudo action: group1_start_0
+ * Resource action: member1a monitor on node2
+ * Resource action: member1a monitor on node1
+ * Resource action: member1b monitor on node2
+ * Resource action: member1b monitor on node1
+ * Resource action: rsc3 monitor on node2
+ * Resource action: rsc3 monitor on node1
+ * Resource action: rsc2 monitor on node2
+ * Resource action: rsc2 monitor on node1
+ * Resource action: Fencing stop on node1
+ * Resource action: rsc4 monitor on node2
+ * Resource action: rsc4 monitor on node1
+ * Pseudo action: load_stopped_node2
+ * Pseudo action: load_stopped_node1
+ * Resource action: member1a start on node1
+ * Resource action: member1b start on node1
+ * Resource action: rsc3 start on node1
+ * Resource action: Fencing start on node2
+ * Resource action: rsc4 start on node2
+ * Pseudo action: group1_running_0
+ * Resource action: member1a monitor=10000 on node1
+ * Resource action: member1b monitor=10000 on node1
+ * Resource action: rsc3 monitor=10000 on node1
+ * Resource action: Fencing monitor=120000 on node2
+ * Resource action: rsc4 monitor=10000 on node2
+
+Revised Cluster Status:
+ * Node List:
+ * Online: [ node1 node2 ]
+
+ * Full List of Resources:
+ * Resource Group: group1:
+ * member1a (ocf:pacemaker:Dummy): Started node1
+ * member1b (ocf:pacemaker:Dummy): Started node1
+ * rsc3 (ocf:pacemaker:Dummy): Started node1
+ * rsc2 (ocf:pacemaker:Dummy): Stopped
+ * Fencing (stonith:fence_xvm): Started node2
+ * rsc4 (ocf:pacemaker:Dummy): Started node2
diff --git a/cts/scheduler/summary/colocation-vs-stickiness.summary b/cts/scheduler/summary/colocation-vs-stickiness.summary
new file mode 100644
index 0000000000..4e5407ac8c
--- /dev/null
+++ b/cts/scheduler/summary/colocation-vs-stickiness.summary
@@ -0,0 +1,61 @@
+Using the original execution date of: 2018-09-26 16:40:38Z
+Current cluster status:
+ * Node List:
+ * Node rhel7-1: standby
+ * Node rhel7-2: standby
+ * Node rhel7-3: standby
+ * Online: [ rhel7-4 rhel7-5 ]
+
+ * Full List of Resources:
+ * Fencing (stonith:fence_xvm): Started rhel7-5
+ * FencingPass (stonith:fence_dummy): Started rhel7-5
+ * FencingFail (stonith:fence_dummy): Started rhel7-5
+ * Resource Group: group1:
+ * dummy1 (ocf:pacemaker:Dummy): Started rhel7-5
+ * dummy1b (ocf:pacemaker:Dummy): Started rhel7-5
+ * dummy1c (ocf:pacemaker:Dummy): Started rhel7-5
+ * Resource Group: group2:
+ * dummy2a (ocf:pacemaker:Dummy): Started rhel7-5
+ * dummy2b (ocf:pacemaker:Dummy): Started rhel7-5
+ * dummy2c (ocf:pacemaker:Dummy): Started rhel7-5
+
+Transition Summary:
+ * Move dummy2a ( rhel7-5 -> rhel7-4 )
+ * Move dummy2b ( rhel7-5 -> rhel7-4 )
+ * Move dummy2c ( rhel7-5 -> rhel7-4 )
+
+Executing Cluster Transition:
+ * Pseudo action: group2_stop_0
+ * Resource action: dummy2c stop on rhel7-5
+ * Resource action: dummy2b stop on rhel7-5
+ * Resource action: dummy2a stop on rhel7-5
+ * Pseudo action: group2_stopped_0
+ * Pseudo action: group2_start_0
+ * Resource action: dummy2a start on rhel7-4
+ * Resource action: dummy2b start on rhel7-4
+ * Resource action: dummy2c start on rhel7-4
+ * Pseudo action: group2_running_0
+ * Resource action: dummy2a monitor=10000 on rhel7-4
+ * Resource action: dummy2b monitor=10000 on rhel7-4
+ * Resource action: dummy2c monitor=10000 on rhel7-4
+Using the original execution date of: 2018-09-26 16:40:38Z
+
+Revised Cluster Status:
+ * Node List:
+ * Node rhel7-1: standby
+ * Node rhel7-2: standby
+ * Node rhel7-3: standby
+ * Online: [ rhel7-4 rhel7-5 ]
+
+ * Full List of Resources:
+ * Fencing (stonith:fence_xvm): Started rhel7-5
+ * FencingPass (stonith:fence_dummy): Started rhel7-5
+ * FencingFail (stonith:fence_dummy): Started rhel7-5
+ * Resource Group: group1:
+ * dummy1 (ocf:pacemaker:Dummy): Started rhel7-5
+ * dummy1b (ocf:pacemaker:Dummy): Started rhel7-5
+ * dummy1c (ocf:pacemaker:Dummy): Started rhel7-5
+ * Resource Group: group2:
+ * dummy2a (ocf:pacemaker:Dummy): Started rhel7-4
+ * dummy2b (ocf:pacemaker:Dummy): Started rhel7-4
+ * dummy2c (ocf:pacemaker:Dummy): Started rhel7-4
diff --git a/cts/scheduler/summary/group-anticolocation.summary b/cts/scheduler/summary/group-anticolocation.summary
new file mode 100644
index 0000000000..c9d4321330
--- /dev/null
+++ b/cts/scheduler/summary/group-anticolocation.summary
@@ -0,0 +1,33 @@
+Current cluster status:
+ * Node List:
+ * Online: [ node1 node2 ]
+
+ * Full List of Resources:
+ * Fencing (stonith:fence_xvm): Started node1
+ * Resource Group: group1:
+ * member1a (ocf:pacemaker:Dummy): Started node2
+ * member1b (ocf:pacemaker:Dummy): Started node2
+ * Resource Group: group2:
+ * member2a (ocf:pacemaker:Dummy): Started node1
+ * member2b (ocf:pacemaker:Dummy): FAILED node1
+
+Transition Summary:
+ * Stop member2b ( node1 ) due to node availability
+
+Executing Cluster Transition:
+ * Pseudo action: group2_stop_0
+ * Resource action: member2b stop on node1
+ * Pseudo action: group2_stopped_0
+
+Revised Cluster Status:
+ * Node List:
+ * Online: [ node1 node2 ]
+
+ * Full List of Resources:
+ * Fencing (stonith:fence_xvm): Started node1
+ * Resource Group: group1:
+ * member1a (ocf:pacemaker:Dummy): Started node2
+ * member1b (ocf:pacemaker:Dummy): Started node2
+ * Resource Group: group2:
+ * member2a (ocf:pacemaker:Dummy): Started node1
+ * member2b (ocf:pacemaker:Dummy): Stopped
diff --git a/cts/scheduler/summary/group-colocation-failure.summary b/cts/scheduler/summary/group-colocation-failure.summary
new file mode 100644
index 0000000000..fed71c80b6
--- /dev/null
+++ b/cts/scheduler/summary/group-colocation-failure.summary
@@ -0,0 +1,47 @@
+Current cluster status:
+ * Node List:
+ * Online: [ node1 node2 ]
+
+ * Full List of Resources:
+ * Fencing (stonith:fence_xvm): Started node1
+ * Resource Group: group1:
+ * member1a (ocf:pacemaker:Dummy): Started node2
+ * member1b (ocf:pacemaker:Dummy): Started node2
+ * Resource Group: group2:
+ * member2a (ocf:pacemaker:Dummy): FAILED node2
+
+Transition Summary:
+ * Move member1a ( node2 -> node1 )
+ * Move member1b ( node2 -> node1 )
+ * Recover member2a ( node2 -> node1 )
+
+Executing Cluster Transition:
+ * Pseudo action: group1_stop_0
+ * Resource action: member1b stop on node2
+ * Pseudo action: group2_stop_0
+ * Resource action: member2a stop on node2
+ * Resource action: member1a stop on node2
+ * Pseudo action: group2_stopped_0
+ * Pseudo action: group2_start_0
+ * Resource action: member2a start on node1
+ * Pseudo action: group1_stopped_0
+ * Pseudo action: group1_start_0
+ * Resource action: member1a start on node1
+ * Resource action: member1b start on node1
+ * Pseudo action: group2_running_0
+ * Resource action: member2a monitor=10000 on node1
+ * Pseudo action: group1_running_0
+ * Resource action: member1a monitor=10000 on node1
+ * Resource action: member1b monitor=10000 on node1
+
+Revised Cluster Status:
+ * Node List:
+ * Online: [ node1 node2 ]
+
+ * Full List of Resources:
+ * Fencing (stonith:fence_xvm): Started node1
+ * Resource Group: group1:
+ * member1a (ocf:pacemaker:Dummy): Started node1
+ * member1b (ocf:pacemaker:Dummy): Started node1
+ * Resource Group: group2:
+ * member2a (ocf:pacemaker:Dummy): Started node1
diff --git a/cts/scheduler/xml/colocation-priority-group.xml b/cts/scheduler/xml/colocation-priority-group.xml
new file mode 100644
index 0000000000..6c598971f7
--- /dev/null
+++ b/cts/scheduler/xml/colocation-priority-group.xml
@@ -0,0 +1,155 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/cts/scheduler/xml/colocation-vs-stickiness.xml b/cts/scheduler/xml/colocation-vs-stickiness.xml
new file mode 100644
index 0000000000..95e25a2ba2
--- /dev/null
+++ b/cts/scheduler/xml/colocation-vs-stickiness.xml
@@ -0,0 +1,372 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/cts/scheduler/xml/group-anticolocation.xml b/cts/scheduler/xml/group-anticolocation.xml
new file mode 100644
index 0000000000..0d244ff4a7
--- /dev/null
+++ b/cts/scheduler/xml/group-anticolocation.xml
@@ -0,0 +1,151 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/cts/scheduler/xml/group-colocation-failure.xml b/cts/scheduler/xml/group-colocation-failure.xml
new file mode 100644
index 0000000000..a6df2a02a2
--- /dev/null
+++ b/cts/scheduler/xml/group-colocation-failure.xml
@@ -0,0 +1,134 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/sphinx/Pacemaker_Explained/constraints.rst b/doc/sphinx/Pacemaker_Explained/constraints.rst
index 356054761d..333e4b5431 100644
--- a/doc/sphinx/Pacemaker_Explained/constraints.rst
+++ b/doc/sphinx/Pacemaker_Explained/constraints.rst
@@ -1,1069 +1,1067 @@
.. index::
single: constraint
single: resource; constraint
.. _constraints:
Resource Constraints
--------------------
.. index::
single: resource; score
single: node; score
Scores
######
Scores of all kinds are integral to how the cluster works.
Practically everything from moving a resource to deciding which
resource to stop in a degraded cluster is achieved by manipulating
scores in some way.
Scores are calculated per resource and node. Any node with a
negative score for a resource can't run that resource. The cluster
places a resource on the node with the highest score for it.
Infinity Math
_____________
Pacemaker implements **INFINITY** (or equivalently, **+INFINITY**) internally as a
score of 1,000,000. Addition and subtraction with it follow these three basic
rules:
* Any value + **INFINITY** = **INFINITY**
* Any value - **INFINITY** = -**INFINITY**
* **INFINITY** - **INFINITY** = **-INFINITY**
.. note::
What if you want to use a score higher than 1,000,000? Typically this possibility
arises when someone wants to base the score on some external metric that might
go above 1,000,000.
The short answer is you can't.
The long answer is it is sometimes possible work around this limitation
creatively. You may be able to set the score to some computed value based on
the external metric rather than use the metric directly. For nodes, you can
store the metric as a node attribute, and query the attribute when computing
the score (possibly as part of a custom resource agent).
.. _location-constraint:
.. index::
single: location constraint
single: constraint; location
Deciding Which Nodes a Resource Can Run On
##########################################
*Location constraints* tell the cluster which nodes a resource can run on.
There are two alternative strategies. One way is to say that, by default,
resources can run anywhere, and then the location constraints specify nodes
that are not allowed (an *opt-out* cluster). The other way is to start with
nothing able to run anywhere, and use location constraints to selectively
enable allowed nodes (an *opt-in* cluster).
Whether you should choose opt-in or opt-out depends on your
personal preference and the make-up of your cluster. If most of your
resources can run on most of the nodes, then an opt-out arrangement is
likely to result in a simpler configuration. On the other-hand, if
most resources can only run on a small subset of nodes, an opt-in
configuration might be simpler.
.. index::
pair: XML element; rsc_location
single: constraint; rsc_location
Location Properties
___________________
.. table:: **Attributes of a rsc_location Element**
:class: longtable
:widths: 1 1 4
+--------------------+---------+----------------------------------------------------------------------------------------------+
| Attribute | Default | Description |
+====================+=========+==============================================================================================+
| id | | .. index:: |
| | | single: rsc_location; attribute, id |
| | | single: attribute; id (rsc_location) |
| | | single: id; rsc_location attribute |
| | | |
| | | A unique name for the constraint (required) |
+--------------------+---------+----------------------------------------------------------------------------------------------+
| rsc | | .. index:: |
| | | single: rsc_location; attribute, rsc |
| | | single: attribute; rsc (rsc_location) |
| | | single: rsc; rsc_location attribute |
| | | |
| | | The name of the resource to which this constraint |
| | | applies. A location constraint must either have a |
| | | ``rsc``, have a ``rsc-pattern``, or contain at |
| | | least one resource set. |
+--------------------+---------+----------------------------------------------------------------------------------------------+
| rsc-pattern | | .. index:: |
| | | single: rsc_location; attribute, rsc-pattern |
| | | single: attribute; rsc-pattern (rsc_location) |
| | | single: rsc-pattern; rsc_location attribute |
| | | |
| | | A pattern matching the names of resources to which |
| | | this constraint applies. The syntax is the same as |
| | | `POSIX `_ |
| | | extended regular expressions, with the addition of an |
| | | initial *!* indicating that resources *not* matching |
| | | the pattern are selected. If the regular expression |
| | | contains submatches, and the constraint is governed by |
| | | a :ref:`rule `, the submatches can be |
| | | referenced as **%1** through **%9** in the rule's |
| | | ``score-attribute`` or a rule expression's ``attribute``. |
| | | A location constraint must either have a ``rsc``, have a |
| | | ``rsc-pattern``, or contain at least one resource set. |
+--------------------+---------+----------------------------------------------------------------------------------------------+
| node | | .. index:: |
| | | single: rsc_location; attribute, node |
| | | single: attribute; node (rsc_location) |
| | | single: node; rsc_location attribute |
| | | |
| | | The name of the node to which this constraint applies. |
| | | A location constraint must either have a ``node`` and |
| | | ``score``, or contain at least one rule. |
+--------------------+---------+----------------------------------------------------------------------------------------------+
| score | | .. index:: |
| | | single: rsc_location; attribute, score |
| | | single: attribute; score (rsc_location) |
| | | single: score; rsc_location attribute |
| | | |
| | | Positive values indicate a preference for running the |
| | | affected resource(s) on ``node`` -- the higher the value, |
| | | the stronger the preference. Negative values indicate |
| | | the resource(s) should avoid this node (a value of |
| | | **-INFINITY** changes "should" to "must"). A location |
| | | constraint must either have a ``node`` and ``score``, |
| | | or contain at least one rule. |
+--------------------+---------+----------------------------------------------------------------------------------------------+
| resource-discovery | always | .. index:: |
| | | single: rsc_location; attribute, resource-discovery |
| | | single: attribute; resource-discovery (rsc_location) |
| | | single: resource-discovery; rsc_location attribute |
| | | |
| | | Whether Pacemaker should perform resource discovery |
| | | (that is, check whether the resource is already running) |
| | | for this resource on this node. This should normally be |
| | | left as the default, so that rogue instances of a |
| | | service can be stopped when they are running where they |
| | | are not supposed to be. However, there are two |
| | | situations where disabling resource discovery is a good |
| | | idea: when a service is not installed on a node, |
| | | discovery might return an error (properly written OCF |
| | | agents will not, so this is usually only seen with other |
| | | agent types); and when Pacemaker Remote is used to scale |
| | | a cluster to hundreds of nodes, limiting resource |
| | | discovery to allowed nodes can significantly boost |
| | | performance. |
| | | |
| | | * ``always:`` Always perform resource discovery for |
| | | the specified resource on this node. |
| | | |
| | | * ``never:`` Never perform resource discovery for the |
| | | specified resource on this node. This option should |
| | | generally be used with a -INFINITY score, although |
| | | that is not strictly required. |
| | | |
| | | * ``exclusive:`` Perform resource discovery for the |
| | | specified resource only on this node (and other nodes |
| | | similarly marked as ``exclusive``). Multiple location |
| | | constraints using ``exclusive`` discovery for the |
| | | same resource across different nodes creates a subset |
| | | of nodes resource-discovery is exclusive to. If a |
| | | resource is marked for ``exclusive`` discovery on one |
| | | or more nodes, that resource is only allowed to be |
| | | placed within that subset of nodes. |
+--------------------+---------+----------------------------------------------------------------------------------------------+
.. warning::
Setting ``resource-discovery`` to ``never`` or ``exclusive`` removes Pacemaker's
ability to detect and stop unwanted instances of a service running
where it's not supposed to be. It is up to the system administrator (you!)
to make sure that the service can *never* be active on nodes without
``resource-discovery`` (such as by leaving the relevant software uninstalled).
.. index::
single: Asymmetrical Clusters
single: Opt-In Clusters
Asymmetrical "Opt-In" Clusters
______________________________
To create an opt-in cluster, start by preventing resources from running anywhere
by default:
.. code-block:: none
# crm_attribute --name symmetric-cluster --update false
Then start enabling nodes. The following fragment says that the web
server prefers **sles-1**, the database prefers **sles-2** and both can
fail over to **sles-3** if their most preferred node fails.
.. topic:: Opt-in location constraints for two resources
.. code-block:: xml
.. index::
single: Symmetrical Clusters
single: Opt-Out Clusters
Symmetrical "Opt-Out" Clusters
______________________________
To create an opt-out cluster, start by allowing resources to run
anywhere by default:
.. code-block:: none
# crm_attribute --name symmetric-cluster --update true
Then start disabling nodes. The following fragment is the equivalent
of the above opt-in configuration.
.. topic:: Opt-out location constraints for two resources
.. code-block:: xml
.. _node-score-equal:
What if Two Nodes Have the Same Score
_____________________________________
If two nodes have the same score, then the cluster will choose one.
This choice may seem random and may not be what was intended, however
the cluster was not given enough information to know any better.
.. topic:: Constraints where a resource prefers two nodes equally
.. code-block:: xml
In the example above, assuming no other constraints and an inactive
cluster, **Webserver** would probably be placed on **sles-1** and **Database** on
**sles-2**. It would likely have placed **Webserver** based on the node's
uname and **Database** based on the desire to spread the resource load
evenly across the cluster. However other factors can also be involved
in more complex configurations.
.. index::
single: constraint; ordering
single: resource; start order
.. _s-resource-ordering:
Specifying the Order in which Resources Should Start/Stop
#########################################################
*Ordering constraints* tell the cluster the order in which certain
resource actions should occur.
.. important::
Ordering constraints affect *only* the ordering of resource actions;
they do *not* require that the resources be placed on the
same node. If you want resources to be started on the same node
*and* in a specific order, you need both an ordering constraint *and*
a colocation constraint (see :ref:`s-resource-colocation`), or
alternatively, a group (see :ref:`group-resources`).
.. index::
pair: XML element; rsc_order
pair: constraint; rsc_order
Ordering Properties
___________________
.. table:: **Attributes of a rsc_order Element**
:class: longtable
:widths: 1 2 4
+--------------+----------------------------+-------------------------------------------------------------------+
| Field | Default | Description |
+==============+============================+===================================================================+
| id | | .. index:: |
| | | single: rsc_order; attribute, id |
| | | single: attribute; id (rsc_order) |
| | | single: id; rsc_order attribute |
| | | |
| | | A unique name for the constraint |
+--------------+----------------------------+-------------------------------------------------------------------+
| first | | .. index:: |
| | | single: rsc_order; attribute, first |
| | | single: attribute; first (rsc_order) |
| | | single: first; rsc_order attribute |
| | | |
| | | Name of the resource that the ``then`` resource |
| | | depends on |
+--------------+----------------------------+-------------------------------------------------------------------+
| then | | .. index:: |
| | | single: rsc_order; attribute, then |
| | | single: attribute; then (rsc_order) |
| | | single: then; rsc_order attribute |
| | | |
| | | Name of the dependent resource |
+--------------+----------------------------+-------------------------------------------------------------------+
| first-action | start | .. index:: |
| | | single: rsc_order; attribute, first-action |
| | | single: attribute; first-action (rsc_order) |
| | | single: first-action; rsc_order attribute |
| | | |
| | | The action that the ``first`` resource must complete |
| | | before ``then-action`` can be initiated for the ``then`` |
| | | resource. Allowed values: ``start``, ``stop``, |
| | | ``promote``, ``demote``. |
+--------------+----------------------------+-------------------------------------------------------------------+
| then-action | value of ``first-action`` | .. index:: |
| | | single: rsc_order; attribute, then-action |
| | | single: attribute; then-action (rsc_order) |
| | | single: first-action; rsc_order attribute |
| | | |
| | | The action that the ``then`` resource can execute only |
| | | after the ``first-action`` on the ``first`` resource has |
| | | completed. Allowed values: ``start``, ``stop``, |
| | | ``promote``, ``demote``. |
+--------------+----------------------------+-------------------------------------------------------------------+
| kind | Mandatory | .. index:: |
| | | single: rsc_order; attribute, kind |
| | | single: attribute; kind (rsc_order) |
| | | single: kind; rsc_order attribute |
| | | |
| | | How to enforce the constraint. Allowed values: |
| | | |
| | | * ``Mandatory:`` ``then-action`` will never be initiated |
| | | for the ``then`` resource unless and until ``first-action`` |
| | | successfully completes for the ``first`` resource. |
| | | |
| | | * ``Optional:`` The constraint applies only if both specified |
| | | resource actions are scheduled in the same transition |
| | | (that is, in response to the same cluster state). This |
| | | means that ``then-action`` is allowed on the ``then`` |
| | | resource regardless of the state of the ``first`` resource, |
| | | but if both actions happen to be scheduled at the same time, |
| | | they will be ordered. |
| | | |
| | | * ``Serialize:`` Ensure that the specified actions are never |
| | | performed concurrently for the specified resources. |
| | | ``First-action`` and ``then-action`` can be executed in either |
| | | order, but one must complete before the other can be initiated. |
| | | An example use case is when resource start-up puts a high load |
| | | on the host. |
+--------------+----------------------------+-------------------------------------------------------------------+
| symmetrical | TRUE for ``Mandatory`` and | .. index:: |
| | ``Optional`` kinds. FALSE | single: rsc_order; attribute, symmetrical |
| | for ``Serialize`` kind. | single: attribute; symmetrical (rsc)order) |
| | | single: symmetrical; rsc_order attribute |
| | | |
| | | If true, the reverse of the constraint applies for the |
| | | opposite action (for example, if B starts after A starts, |
| | | then B stops before A stops). ``Serialize`` orders cannot |
| | | be symmetrical. |
+--------------+----------------------------+-------------------------------------------------------------------+
``Promote`` and ``demote`` apply to :ref:`promotable `
clone resources.
Optional and mandatory ordering
_______________________________
Here is an example of ordering constraints where **Database** *must* start before
**Webserver**, and **IP** *should* start before **Webserver** if they both need to be
started:
.. topic:: Optional and mandatory ordering constraints
.. code-block:: xml
Because the above example lets ``symmetrical`` default to TRUE, **Webserver**
must be stopped before **Database** can be stopped, and **Webserver** should be
stopped before **IP** if they both need to be stopped.
.. index::
single: colocation
single: constraint; colocation
single: resource; location relative to other resources
.. _s-resource-colocation:
Placing Resources Relative to other Resources
#############################################
*Colocation constraints* tell the cluster that the location of one resource
depends on the location of another one.
Colocation has an important side-effect: it affects the order in which
resources are assigned to a node. Think about it: You can't place A relative to
B unless you know where B is [#]_.
So when you are creating colocation constraints, it is important to
consider whether you should colocate A with B, or B with A.
.. important::
Colocation constraints affect *only* the placement of resources; they do *not*
require that the resources be started in a particular order. If you want
resources to be started on the same node *and* in a specific order, you need
both an ordering constraint (see :ref:`s-resource-ordering`) *and* a colocation
constraint, or alternatively, a group (see :ref:`group-resources`).
.. index::
pair: XML element; rsc_colocation
single: constraint; rsc_colocation
Colocation Properties
_____________________
.. table:: **Attributes of a rsc_colocation Constraint**
:class: longtable
:widths: 2 2 5
+----------------+----------------+--------------------------------------------------------+
| Field | Default | Description |
+================+================+========================================================+
| id | | .. index:: |
| | | single: rsc_colocation; attribute, id |
| | | single: attribute; id (rsc_colocation) |
| | | single: id; rsc_colocation attribute |
| | | |
| | | A unique name for the constraint (required). |
+----------------+----------------+--------------------------------------------------------+
| rsc | | .. index:: |
| | | single: rsc_colocation; attribute, rsc |
| | | single: attribute; rsc (rsc_colocation) |
| | | single: rsc; rsc_colocation attribute |
| | | |
| | | The name of a resource that should be located |
| | | relative to ``with-rsc``. A colocation constraint must |
| | | either contain at least one |
| | | :ref:`resource set `, or specify both |
| | | ``rsc`` and ``with-rsc``. |
+----------------+----------------+--------------------------------------------------------+
| with-rsc | | .. index:: |
| | | single: rsc_colocation; attribute, with-rsc |
| | | single: attribute; with-rsc (rsc_colocation) |
| | | single: with-rsc; rsc_colocation attribute |
| | | |
| | | The name of the resource used as the colocation |
| | | target. The cluster will decide where to put this |
| | | resource first and then decide where to put ``rsc``. |
| | | A colocation constraint must either contain at least |
| | | one :ref:`resource set `, or specify |
| | | both ``rsc`` and ``with-rsc``. |
+----------------+----------------+--------------------------------------------------------+
| node-attribute | #uname | .. index:: |
| | | single: rsc_colocation; attribute, node-attribute |
| | | single: attribute; node-attribute (rsc_colocation) |
| | | single: node-attribute; rsc_colocation attribute |
| | | |
| | | If ``rsc`` and ``with-rsc`` are specified, this node |
| | | attribute must be the same on the node running ``rsc`` |
| | | and the node running ``with-rsc`` for the constraint |
| | | to be satisfied. (For details, see |
| | | :ref:`s-coloc-attribute`.) |
+----------------+----------------+--------------------------------------------------------+
| score | 0 | .. index:: |
| | | single: rsc_colocation; attribute, score |
| | | single: attribute; score (rsc_colocation) |
| | | single: score; rsc_colocation attribute |
| | | |
| | | Positive values indicate the resources should run on |
| | | the same node. Negative values indicate the resources |
| | | should run on different nodes. Values of |
| | | +/- ``INFINITY`` change "should" to "must". |
+----------------+----------------+--------------------------------------------------------+
| rsc-role | Started | .. index:: |
| | | single: clone; ordering constraint, rsc-role |
| | | single: ordering constraint; rsc-role (clone) |
| | | single: rsc-role; clone ordering constraint |
| | | |
| | | If ``rsc`` and ``with-rsc`` are specified, and ``rsc`` |
| | | is a :ref:`promotable clone `, |
| | | the constraint applies only to ``rsc`` instances in |
| | | this role. Allowed values: ``Started``, ``Promoted``, |
| | | ``Unpromoted``. For details, see |
| | | :ref:`promotable-clone-constraints`. |
+----------------+----------------+--------------------------------------------------------+
| with-rsc-role | Started | .. index:: |
| | | single: clone; ordering constraint, with-rsc-role |
| | | single: ordering constraint; with-rsc-role (clone) |
| | | single: with-rsc-role; clone ordering constraint |
| | | |
| | | If ``rsc`` and ``with-rsc`` are specified, and |
| | | ``with-rsc`` is a |
| | | :ref:`promotable clone `, the |
| | | constraint applies only to ``with-rsc`` instances in |
| | | this role. Allowed values: ``Started``, ``Promoted``, |
| | | ``Unpromoted``. For details, see |
| | | :ref:`promotable-clone-constraints`. |
+----------------+----------------+--------------------------------------------------------+
| influence | value of | .. index:: |
| | ``critical`` | single: rsc_colocation; attribute, influence |
| | meta-attribute | single: attribute; influence (rsc_colocation) |
| | for ``rsc`` | single: influence; rsc_colocation attribute |
| | | |
| | | Whether to consider the location preferences of |
| | | ``rsc`` when ``with-rsc`` is already active. Allowed |
| | | values: ``true``, ``false``. For details, see |
| | | :ref:`s-coloc-influence`. *(since 2.1.0)* |
+----------------+----------------+--------------------------------------------------------+
Mandatory Placement
___________________
Mandatory placement occurs when the constraint's score is
**+INFINITY** or **-INFINITY**. In such cases, if the constraint can't be
satisfied, then the **rsc** resource is not permitted to run. For
``score=INFINITY``, this includes cases where the ``with-rsc`` resource is
not active.
If you need resource **A** to always run on the same machine as
resource **B**, you would add the following constraint:
.. topic:: Mandatory colocation constraint for two resources
.. code-block:: xml
Remember, because **INFINITY** was used, if **B** can't run on any
of the cluster nodes (for whatever reason) then **A** will not
be allowed to run. Whether **A** is running or not has no effect on **B**.
Alternatively, you may want the opposite -- that **A** *cannot*
run on the same machine as **B**. In this case, use ``score="-INFINITY"``.
.. topic:: Mandatory anti-colocation constraint for two resources
.. code-block:: xml
Again, by specifying **-INFINITY**, the constraint is binding. So if the
only place left to run is where **B** already is, then **A** may not run anywhere.
As with **INFINITY**, **B** can run even if **A** is stopped. However, in this
case **A** also can run if **B** is stopped, because it still meets the
constraint of **A** and **B** not running on the same node.
Advisory Placement
__________________
If mandatory placement is about "must" and "must not", then advisory
-placement is the "I'd prefer if" alternative. For constraints with
-scores greater than **-INFINITY** and less than **INFINITY**, the cluster
-will try to accommodate your wishes but may ignore them if the
-alternative is to stop some of the cluster resources.
-
-As in life, where if enough people prefer something it effectively
-becomes mandatory, advisory colocation constraints can combine with
-other elements of the configuration to behave as if they were
-mandatory.
+placement is the "I'd prefer if" alternative.
+
+For colocation constraints with scores greater than **-INFINITY** and less than
+**INFINITY**, the cluster will try to accommodate your wishes, but may ignore
+them if other factors outweigh the colocation score. Those factors might
+include other constraints, resource stickiness, failure thresholds, whether
+other resources would be prevented from being active, etc.
.. topic:: Advisory colocation constraint for two resources
.. code-block:: xml
.. _s-coloc-attribute:
Colocation by Node Attribute
____________________________
The ``node-attribute`` property of a colocation constraints allows you to express
the requirement, "these resources must be on similar nodes".
As an example, imagine that you have two Storage Area Networks (SANs) that are
not controlled by the cluster, and each node is connected to one or the other.
You may have two resources **r1** and **r2** such that **r2** needs to use the same
SAN as **r1**, but doesn't necessarily have to be on the same exact node.
In such a case, you could define a :ref:`node attribute ` named
**san**, with the value **san1** or **san2** on each node as appropriate. Then, you
could colocate **r2** with **r1** using ``node-attribute`` set to **san**.
.. _s-coloc-influence:
Colocation Influence
____________________
By default, if A is colocated with B, the cluster will take into account A's
preferences when deciding where to place B, to maximize the chance that both
resources can run.
For a detailed look at exactly how this occurs, see
`Colocation Explained `_.
However, if ``influence`` is set to ``false`` in the colocation constraint,
this will happen only if B is inactive and needing to be started. If B is
already active, A's preferences will have no effect on placing B.
An example of what effect this would have and when it would be desirable would
be a nonessential reporting tool colocated with a resource-intensive service
that takes a long time to start. If the reporting tool fails enough times to
reach its migration threshold, by default the cluster will want to move both
resources to another node if possible. Setting ``influence`` to ``false`` on
the colocation constraint would mean that the reporting tool would be stopped
in this situation instead, to avoid forcing the service to move.
The ``critical`` resource meta-attribute is a convenient way to specify the
default for all colocation constraints and groups involving a particular
resource.
.. note::
If a noncritical resource is a member of a group, all later members of the
group will be treated as noncritical, even if they are marked as (or left to
default to) critical.
.. _s-resource-sets:
Resource Sets
#############
.. index::
single: constraint; resource set
single: resource; resource set
*Resource sets* allow multiple resources to be affected by a single constraint.
.. topic:: A set of 3 resources
.. code-block:: xml
Resource sets are valid inside ``rsc_location``, ``rsc_order``
(see :ref:`s-resource-sets-ordering`), ``rsc_colocation``
(see :ref:`s-resource-sets-colocation`), and ``rsc_ticket``
(see :ref:`ticket-constraints`) constraints.
A resource set has a number of properties that can be set, though not all
have an effect in all contexts.
.. index::
pair: XML element; resource_set
.. table:: **Attributes of a resource_set Element**
:class: longtable
:widths: 2 2 5
+-------------+------------------+--------------------------------------------------------+
| Field | Default | Description |
+=============+==================+========================================================+
| id | | .. index:: |
| | | single: resource_set; attribute, id |
| | | single: attribute; id (resource_set) |
| | | single: id; resource_set attribute |
| | | |
| | | A unique name for the set (required) |
+-------------+------------------+--------------------------------------------------------+
| sequential | true | .. index:: |
| | | single: resource_set; attribute, sequential |
| | | single: attribute; sequential (resource_set) |
| | | single: sequential; resource_set attribute |
| | | |
| | | Whether the members of the set must be acted on in |
| | | order. Meaningful within ``rsc_order`` and |
| | | ``rsc_colocation``. |
+-------------+------------------+--------------------------------------------------------+
| require-all | true | .. index:: |
| | | single: resource_set; attribute, require-all |
| | | single: attribute; require-all (resource_set) |
| | | single: require-all; resource_set attribute |
| | | |
| | | Whether all members of the set must be active before |
| | | continuing. With the current implementation, the |
| | | cluster may continue even if only one member of the |
| | | set is started, but if more than one member of the set |
| | | is starting at the same time, the cluster will still |
| | | wait until all of those have started before continuing |
| | | (this may change in future versions). Meaningful |
| | | within ``rsc_order``. |
+-------------+------------------+--------------------------------------------------------+
| role | | .. index:: |
| | | single: resource_set; attribute, role |
| | | single: attribute; role (resource_set) |
| | | single: role; resource_set attribute |
| | | |
| | | The constraint applies only to resource set members |
| | | that are :ref:`s-resource-promotable` in this |
| | | role. Meaningful within ``rsc_location``, |
| | | ``rsc_colocation`` and ``rsc_ticket``. |
| | | Allowed values: ``Started``, ``Promoted``, |
| | | ``Unpromoted``. For details, see |
| | | :ref:`promotable-clone-constraints`. |
+-------------+------------------+--------------------------------------------------------+
| action | value of | .. index:: |
| | ``first-action`` | single: resource_set; attribute, action |
| | in the enclosing | single: attribute; action (resource_set) |
| | ordering | single: action; resource_set attribute |
| | constraint | |
| | | The action that applies to *all members* of the set. |
| | | Meaningful within ``rsc_order``. Allowed values: |
| | | ``start``, ``stop``, ``promote``, ``demote``. |
+-------------+------------------+--------------------------------------------------------+
| score | | .. index:: |
| | | single: resource_set; attribute, score |
| | | single: attribute; score (resource_set) |
| | | single: score; resource_set attribute |
| | | |
| | | *Advanced use only.* Use a specific score for this |
| | | set within the constraint. |
+-------------+------------------+--------------------------------------------------------+
.. _s-resource-sets-ordering:
Ordering Sets of Resources
##########################
A common situation is for an administrator to create a chain of ordered
resources, such as:
.. topic:: A chain of ordered resources
.. code-block:: xml
.. topic:: Visual representation of the four resources' start order for the above constraints
.. image:: images/resource-set.png
:alt: Ordered set
Ordered Set
___________
To simplify this situation, :ref:`s-resource-sets` can be used within ordering
constraints:
.. topic:: A chain of ordered resources expressed as a set
.. code-block:: xml
While the set-based format is not less verbose, it is significantly easier to
get right and maintain.
.. important::
If you use a higher-level tool, pay attention to how it exposes this
functionality. Depending on the tool, creating a set **A B** may be equivalent to
**A then B**, or **B then A**.
Ordering Multiple Sets
______________________
The syntax can be expanded to allow sets of resources to be ordered relative to
each other, where the members of each individual set may be ordered or
unordered (controlled by the ``sequential`` property). In the example below, **A**
and **B** can both start in parallel, as can **C** and **D**, however **C** and
**D** can only start once *both* **A** *and* **B** are active.
.. topic:: Ordered sets of unordered resources
.. code-block:: xml
.. topic:: Visual representation of the start order for two ordered sets of
unordered resources
.. image:: images/two-sets.png
:alt: Two ordered sets
Of course either set -- or both sets -- of resources can also be internally
ordered (by setting ``sequential="true"``) and there is no limit to the number
of sets that can be specified.
.. topic:: Advanced use of set ordering - Three ordered sets, two of which are
internally unordered
.. code-block:: xml
.. topic:: Visual representation of the start order for the three sets defined above
.. image:: images/three-sets.png
:alt: Three ordered sets
.. important::
An ordered set with ``sequential=false`` makes sense only if there is another
set in the constraint. Otherwise, the constraint has no effect.
Resource Set OR Logic
_____________________
The unordered set logic discussed so far has all been "AND" logic. To illustrate
this take the 3 resource set figure in the previous section. Those sets can be
expressed, **(A and B) then (C) then (D) then (E and F)**.
Say for example we want to change the first set, **(A and B)**, to use "OR" logic
so the sets look like this: **(A or B) then (C) then (D) then (E and F)**. This
functionality can be achieved through the use of the ``require-all`` option.
This option defaults to TRUE which is why the "AND" logic is used by default.
Setting ``require-all=false`` means only one resource in the set needs to be
started before continuing on to the next set.
.. topic:: Resource Set "OR" logic: Three ordered sets, where the first set is
internally unordered with "OR" logic
.. code-block:: xml
.. important::
An ordered set with ``require-all=false`` makes sense only in conjunction with
``sequential=false``. Think of it like this: ``sequential=false`` modifies the set
to be an unordered set using "AND" logic by default, and adding
``require-all=false`` flips the unordered set's "AND" logic to "OR" logic.
.. _s-resource-sets-colocation:
Colocating Sets of Resources
############################
Another common situation is for an administrator to create a set of
colocated resources.
The simplest way to do this is to define a resource group (see
:ref:`group-resources`), but that cannot always accurately express the desired
relationships. For example, maybe the resources do not need to be ordered.
Another way would be to define each relationship as an individual constraint,
but that causes a difficult-to-follow constraint explosion as the number of
resources and combinations grow.
.. topic:: Colocation chain as individual constraints, where A is placed first,
then B, then C, then D
.. code-block:: xml
To express complicated relationships with a simplified syntax [#]_,
:ref:`resource sets ` can be used within colocation constraints.
.. topic:: Equivalent colocation chain expressed using **resource_set**
.. code-block:: xml
.. note::
Within a ``resource_set``, the resources are listed in the order they are
*placed*, which is the reverse of the order in which they are *colocated*.
In the above example, resource **A** is placed before resource **B**, which is
the same as saying resource **B** is colocated with resource **A**.
As with individual constraints, a resource that can't be active prevents any
resource that must be colocated with it from being active. In both of the two
previous examples, if **B** is unable to run, then both **C** and by inference **D**
must remain stopped.
.. important::
If you use a higher-level tool, pay attention to how it exposes this
functionality. Depending on the tool, creating a set **A B** may be equivalent to
**A with B**, or **B with A**.
Resource sets can also be used to tell the cluster that entire *sets* of
resources must be colocated relative to each other, while the individual
members within any one set may or may not be colocated relative to each other
(determined by the set's ``sequential`` property).
In the following example, resources **B**, **C**, and **D** will each be colocated
with **A** (which will be placed first). **A** must be able to run in order for any
of the resources to run, but any of **B**, **C**, or **D** may be stopped without
affecting any of the others.
.. topic:: Using colocated sets to specify a shared dependency
.. code-block:: xml
.. note::
Pay close attention to the order in which resources and sets are listed.
While the members of any one sequential set are placed first to last (i.e., the
colocation dependency is last with first), multiple sets are placed last to
first (i.e. the colocation dependency is first with last).
.. important::
A colocated set with ``sequential="false"`` makes sense only if there is
another set in the constraint. Otherwise, the constraint has no effect.
There is no inherent limit to the number and size of the sets used.
The only thing that matters is that in order for any member of one set
in the constraint to be active, all members of sets listed after it must also
be active (and naturally on the same node); and if a set has ``sequential="true"``,
then in order for one member of that set to be active, all members listed
before it must also be active.
If desired, you can restrict the dependency to instances of promotable clone
resources that are in a specific role, using the set's ``role`` property.
.. topic:: Colocation in which the members of the middle set have no
interdependencies, and the last set listed applies only to promoted
instances
.. code-block:: xml
.. topic:: Visual representation of the above example (resources are placed from
left to right)
.. image:: ../shared/images/pcmk-colocated-sets.png
:alt: Colocation chain
.. note::
Unlike ordered sets, colocated sets do not use the ``require-all`` option.
.. [#] While the human brain is sophisticated enough to read the constraint
in any order and choose the correct one depending on the situation,
the cluster is not quite so smart. Yet.
.. [#] which is not the same as saying easy to follow
diff --git a/lib/common/operations.c b/lib/common/operations.c
index db82953ac4..bdd5410a35 100644
--- a/lib/common/operations.c
+++ b/lib/common/operations.c
@@ -1,577 +1,576 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static regex_t *notify_migrate_re = NULL;
/*!
* \brief Generate an operation key (RESOURCE_ACTION_INTERVAL)
*
* \param[in] rsc_id ID of resource being operated on
* \param[in] op_type Operation name
* \param[in] interval_ms Operation interval
*
* \return Newly allocated memory containing operation key as string
*
* \note This function asserts on errors, so it will never return NULL.
* The caller is responsible for freeing the result with free().
*/
char *
pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
{
CRM_ASSERT(rsc_id != NULL);
CRM_ASSERT(op_type != NULL);
return crm_strdup_printf(PCMK__OP_FMT, rsc_id, op_type, interval_ms);
}
static inline gboolean
convert_interval(const char *s, guint *interval_ms)
{
unsigned long l;
errno = 0;
l = strtoul(s, NULL, 10);
if (errno != 0) {
return FALSE;
}
*interval_ms = (guint) l;
return TRUE;
}
static gboolean
try_fast_match(const char *key, const char *underbar1, const char *underbar2,
char **rsc_id, char **op_type, guint *interval_ms)
{
if (interval_ms) {
if (!convert_interval(underbar2+1, interval_ms)) {
return FALSE;
}
}
if (rsc_id) {
*rsc_id = strndup(key, underbar1-key);
}
if (op_type) {
*op_type = strndup(underbar1+1, underbar2-underbar1-1);
}
return TRUE;
}
static gboolean
try_basic_match(const char *key, char **rsc_id, char **op_type, guint *interval_ms)
{
char *interval_sep = NULL;
char *type_sep = NULL;
// Parse interval at end of string
interval_sep = strrchr(key, '_');
if (interval_sep == NULL) {
return FALSE;
}
if (interval_ms) {
if (!convert_interval(interval_sep+1, interval_ms)) {
return FALSE;
}
}
type_sep = interval_sep-1;
while (1) {
if (*type_sep == '_') {
break;
} else if (type_sep == key) {
if (interval_ms) {
*interval_ms = 0;
}
return FALSE;
}
type_sep--;
}
if (op_type) {
// Add one here to skip the leading underscore we landed on in the
// while loop.
*op_type = strndup(type_sep+1, interval_sep-type_sep-1);
}
// Everything else is the name of the resource.
if (rsc_id) {
*rsc_id = strndup(key, type_sep-key);
}
return TRUE;
}
static gboolean
try_migrate_notify_match(const char *key, char **rsc_id, char **op_type, guint *interval_ms)
{
int rc = 0;
size_t nmatch = 8;
regmatch_t pmatch[nmatch];
if (notify_migrate_re == NULL) {
// cppcheck-suppress memleak
notify_migrate_re = calloc(1, sizeof(regex_t));
rc = regcomp(notify_migrate_re, "^(.*)_(migrate_(from|to)|(pre|post)_notify_([a-z]+|migrate_(from|to)))_([0-9]+)$",
REG_EXTENDED);
CRM_ASSERT(rc == 0);
}
rc = regexec(notify_migrate_re, key, nmatch, pmatch, 0);
if (rc == REG_NOMATCH) {
return FALSE;
}
if (rsc_id) {
*rsc_id = strndup(key+pmatch[1].rm_so, pmatch[1].rm_eo-pmatch[1].rm_so);
}
if (op_type) {
*op_type = strndup(key+pmatch[2].rm_so, pmatch[2].rm_eo-pmatch[2].rm_so);
}
if (interval_ms) {
if (!convert_interval(key+pmatch[7].rm_so, interval_ms)) {
if (rsc_id) {
free(*rsc_id);
*rsc_id = NULL;
}
if (op_type) {
free(*op_type);
*op_type = NULL;
}
return FALSE;
}
}
return TRUE;
}
gboolean
parse_op_key(const char *key, char **rsc_id, char **op_type, guint *interval_ms)
{
char *underbar1 = NULL;
char *underbar2 = NULL;
char *underbar3 = NULL;
// Initialize output variables in case of early return
if (rsc_id) {
*rsc_id = NULL;
}
if (op_type) {
*op_type = NULL;
}
if (interval_ms) {
*interval_ms = 0;
}
CRM_CHECK(key && *key, return FALSE);
underbar1 = strchr(key, '_');
if (!underbar1) {
return FALSE;
}
underbar2 = strchr(underbar1+1, '_');
if (!underbar2) {
return FALSE;
}
underbar3 = strchr(underbar2+1, '_');
if (!underbar3) {
return try_fast_match(key, underbar1, underbar2,
rsc_id, op_type, interval_ms);
} else if (try_migrate_notify_match(key, rsc_id, op_type, interval_ms)) {
return TRUE;
} else {
return try_basic_match(key, rsc_id, op_type, interval_ms);
}
}
char *
pcmk__notify_key(const char *rsc_id, const char *notify_type,
const char *op_type)
{
CRM_CHECK(rsc_id != NULL, return NULL);
CRM_CHECK(op_type != NULL, return NULL);
CRM_CHECK(notify_type != NULL, return NULL);
return crm_strdup_printf("%s_%s_notify_%s_0",
rsc_id, notify_type, op_type);
}
/*!
* \brief Parse a transition magic string into its constituent parts
*
* \param[in] magic Magic string to parse (must be non-NULL)
* \param[out] uuid If non-NULL, where to store copy of parsed UUID
* \param[out] transition_id If non-NULL, where to store parsed transition ID
* \param[out] action_id If non-NULL, where to store parsed action ID
* \param[out] op_status If non-NULL, where to store parsed result status
* \param[out] op_rc If non-NULL, where to store parsed actual rc
* \param[out] target_rc If non-NULL, where to stored parsed target rc
*
* \return TRUE if key was valid, FALSE otherwise
* \note If uuid is supplied and this returns TRUE, the caller is responsible
* for freeing the memory for *uuid using free().
*/
gboolean
decode_transition_magic(const char *magic, char **uuid, int *transition_id, int *action_id,
int *op_status, int *op_rc, int *target_rc)
{
int res = 0;
char *key = NULL;
gboolean result = TRUE;
int local_op_status = -1;
int local_op_rc = -1;
CRM_CHECK(magic != NULL, return FALSE);
#ifdef HAVE_SSCANF_M
res = sscanf(magic, "%d:%d;%ms", &local_op_status, &local_op_rc, &key);
#else
key = calloc(1, strlen(magic) - 3); // magic must have >=4 other characters
CRM_ASSERT(key);
res = sscanf(magic, "%d:%d;%s", &local_op_status, &local_op_rc, key);
#endif
if (res == EOF) {
crm_err("Could not decode transition information '%s': %s",
magic, pcmk_rc_str(errno));
result = FALSE;
} else if (res < 3) {
crm_warn("Transition information '%s' incomplete (%d of 3 expected items)",
magic, res);
result = FALSE;
} else {
if (op_status) {
*op_status = local_op_status;
}
if (op_rc) {
*op_rc = local_op_rc;
}
result = decode_transition_key(key, uuid, transition_id, action_id,
target_rc);
}
free(key);
return result;
}
char *
pcmk__transition_key(int transition_id, int action_id, int target_rc,
const char *node)
{
CRM_CHECK(node != NULL, return NULL);
return crm_strdup_printf("%d:%d:%d:%-*s",
action_id, transition_id, target_rc, 36, node);
}
/*!
* \brief Parse a transition key into its constituent parts
*
* \param[in] key Transition key to parse (must be non-NULL)
* \param[out] uuid If non-NULL, where to store copy of parsed UUID
* \param[out] transition_id If non-NULL, where to store parsed transition ID
* \param[out] action_id If non-NULL, where to store parsed action ID
* \param[out] target_rc If non-NULL, where to stored parsed target rc
*
* \return TRUE if key was valid, FALSE otherwise
* \note If uuid is supplied and this returns TRUE, the caller is responsible
* for freeing the memory for *uuid using free().
*/
gboolean
decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id,
int *target_rc)
{
int local_transition_id = -1;
int local_action_id = -1;
int local_target_rc = -1;
char local_uuid[37] = { '\0' };
// Initialize any supplied output arguments
if (uuid) {
*uuid = NULL;
}
if (transition_id) {
*transition_id = -1;
}
if (action_id) {
*action_id = -1;
}
if (target_rc) {
*target_rc = -1;
}
CRM_CHECK(key != NULL, return FALSE);
if (sscanf(key, "%d:%d:%d:%36s", &local_action_id, &local_transition_id,
&local_target_rc, local_uuid) != 4) {
crm_err("Invalid transition key '%s'", key);
return FALSE;
}
if (strlen(local_uuid) != 36) {
crm_warn("Invalid UUID '%s' in transition key '%s'", local_uuid, key);
}
if (uuid) {
*uuid = strdup(local_uuid);
CRM_ASSERT(*uuid);
}
if (transition_id) {
*transition_id = local_transition_id;
}
if (action_id) {
*action_id = local_action_id;
}
if (target_rc) {
*target_rc = local_target_rc;
}
return TRUE;
}
// Return true if a is an attribute that should be filtered
static bool
should_filter_for_digest(xmlAttrPtr a, void *user_data)
{
if (strncmp((const char *) a->name, CRM_META "_",
sizeof(CRM_META " ") - 1) == 0) {
return true;
}
return pcmk__str_any_of((const char *) a->name,
XML_ATTR_ID,
XML_ATTR_CRM_VERSION,
XML_LRM_ATTR_OP_DIGEST,
XML_LRM_ATTR_TARGET,
XML_LRM_ATTR_TARGET_UUID,
"pcmk_external_ip",
NULL);
}
/*!
* \internal
* \brief Remove XML attributes not needed for operation digest
*
* \param[in,out] param_set XML with operation parameters
*/
void
pcmk__filter_op_for_digest(xmlNode *param_set)
{
char *key = NULL;
char *timeout = NULL;
guint interval_ms = 0;
if (param_set == NULL) {
return;
}
/* Timeout is useful for recurring operation digests, so grab it before
* removing meta-attributes
*/
key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
if (crm_element_value_ms(param_set, key, &interval_ms) != pcmk_ok) {
interval_ms = 0;
}
free(key);
key = NULL;
if (interval_ms != 0) {
key = crm_meta_name(XML_ATTR_TIMEOUT);
timeout = crm_element_value_copy(param_set, key);
}
// Remove all CRM_meta_* attributes and certain other attributes
pcmk__xe_remove_matching_attrs(param_set, should_filter_for_digest, NULL);
// Add timeout back for recurring operation digests
if (timeout != NULL) {
crm_xml_add(param_set, key, timeout);
}
free(timeout);
free(key);
}
int
rsc_op_expected_rc(lrmd_event_data_t * op)
{
int rc = 0;
if (op && op->user_data) {
decode_transition_key(op->user_data, NULL, NULL, NULL, &rc);
}
return rc;
}
gboolean
did_rsc_op_fail(lrmd_event_data_t * op, int target_rc)
{
switch (op->op_status) {
case PCMK_EXEC_CANCELLED:
case PCMK_EXEC_PENDING:
return FALSE;
case PCMK_EXEC_NOT_SUPPORTED:
case PCMK_EXEC_TIMEOUT:
case PCMK_EXEC_ERROR:
case PCMK_EXEC_NOT_CONNECTED:
case PCMK_EXEC_NO_FENCE_DEVICE:
case PCMK_EXEC_NO_SECRETS:
case PCMK_EXEC_INVALID:
return TRUE;
default:
if (target_rc != op->rc) {
return TRUE;
}
}
return FALSE;
}
/*!
* \brief Create a CIB XML element for an operation
*
* \param[in,out] parent If not NULL, make new XML node a child of this
* \param[in] prefix Generate an ID using this prefix
* \param[in] task Operation task to set
* \param[in] interval_spec Operation interval to set
* \param[in] timeout If not NULL, operation timeout to set
*
* \return New XML object on success, NULL otherwise
*/
xmlNode *
crm_create_op_xml(xmlNode *parent, const char *prefix, const char *task,
const char *interval_spec, const char *timeout)
{
xmlNode *xml_op;
CRM_CHECK(prefix && task && interval_spec, return NULL);
xml_op = create_xml_node(parent, XML_ATTR_OP);
crm_xml_set_id(xml_op, "%s-%s-%s", prefix, task, interval_spec);
crm_xml_add(xml_op, XML_LRM_ATTR_INTERVAL, interval_spec);
crm_xml_add(xml_op, "name", task);
if (timeout) {
crm_xml_add(xml_op, XML_ATTR_TIMEOUT, timeout);
}
return xml_op;
}
/*!
* \brief Check whether an operation requires resource agent meta-data
*
* \param[in] rsc_class Resource agent class (or NULL to skip class check)
* \param[in] op Operation action (or NULL to skip op check)
*
- * \return TRUE if operation needs meta-data, FALSE otherwise
+ * \return true if operation needs meta-data, false otherwise
* \note At least one of rsc_class and op must be specified.
*/
bool
crm_op_needs_metadata(const char *rsc_class, const char *op)
{
- /* Agent meta-data is used to determine whether an agent reload is possible,
- * and to evaluate versioned parameters -- so if this op is not relevant to
- * those features, we don't need the meta-data.
+ /* Agent metadata is used to determine whether an agent reload is possible,
+ * so if this op is not relevant to that feature, we don't need metadata.
*/
CRM_CHECK((rsc_class != NULL) || (op != NULL), return false);
if ((rsc_class != NULL)
&& !pcmk_is_set(pcmk_get_ra_caps(rsc_class), pcmk_ra_cap_params)) {
- /* Meta-data is only needed for resource classes that use parameters */
+ // Metadata is needed only for resource classes that use parameters
return false;
}
if (op == NULL) {
return true;
}
- /* Meta-data is only needed for these actions */
+ // Metadata is needed only for these actions
return pcmk__str_any_of(op, CRMD_ACTION_START, CRMD_ACTION_STATUS,
CRMD_ACTION_PROMOTE, CRMD_ACTION_DEMOTE,
CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT,
CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED,
CRMD_ACTION_NOTIFY, NULL);
}
/*!
* \internal
* \brief Check whether an action name is for a fencing action
*
* \param[in] action Action name to check
*
* \return true if \p action is "off", "reboot", or "poweroff", otherwise false
*/
bool
pcmk__is_fencing_action(const char *action)
{
return pcmk__str_any_of(action, "off", "reboot", "poweroff", NULL);
}
bool
pcmk_is_probe(const char *task, guint interval)
{
if (task == NULL) {
return false;
}
return (interval == 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none);
}
bool
pcmk_xe_is_probe(xmlNode *xml_op)
{
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
int interval_ms;
pcmk__scan_min_int(interval_ms_s, &interval_ms, 0);
return pcmk_is_probe(task, interval_ms);
}
bool
pcmk_xe_mask_probe_failure(xmlNode *xml_op)
{
int status = PCMK_EXEC_UNKNOWN;
int rc = PCMK_OCF_OK;
if (!pcmk_xe_is_probe(xml_op)) {
return false;
}
crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
return rc == PCMK_OCF_NOT_INSTALLED || rc == PCMK_OCF_INVALID_PARAM ||
status == PCMK_EXEC_NOT_INSTALLED;
}
diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h
index caafe19130..38ce24513b 100644
--- a/lib/pacemaker/libpacemaker_private.h
+++ b/lib/pacemaker/libpacemaker_private.h
@@ -1,845 +1,823 @@
/*
* Copyright 2021-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__LIBPACEMAKER_PRIVATE__H
# define PCMK__LIBPACEMAKER_PRIVATE__H
/* This header is for the sole use of libpacemaker, so that functions can be
* declared with G_GNUC_INTERNAL for efficiency.
*/
#include // pe_action_t, pe_node_t, pe_working_set_t
-// Flags to modify the behavior of the add_colocated_node_scores() method
+// Flags to modify the behavior of pcmk__add_colocated_node_scores()
enum pcmk__coloc_select {
// With no other flags, apply all "with this" colocations
pcmk__coloc_select_default = 0,
// Apply "this with" colocations instead of "with this" colocations
pcmk__coloc_select_this_with = (1 << 0),
// Apply only colocations with non-negative scores
pcmk__coloc_select_nonnegative = (1 << 1),
// Apply only colocations with at least one matching node
pcmk__coloc_select_active = (1 << 2),
};
// Flags the update_ordered_actions() method can return
enum pcmk__updated {
pcmk__updated_none = 0, // Nothing changed
pcmk__updated_first = (1 << 0), // First action was updated
pcmk__updated_then = (1 << 1), // Then action was updated
};
#define pcmk__set_updated_flags(au_flags, action, flags_to_set) do { \
au_flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Action update", \
(action)->uuid, au_flags, \
(flags_to_set), #flags_to_set); \
} while (0)
#define pcmk__clear_updated_flags(au_flags, action, flags_to_clear) do { \
au_flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Action update", \
(action)->uuid, au_flags, \
(flags_to_clear), #flags_to_clear); \
} while (0)
// Resource allocation methods
struct resource_alloc_functions_s {
/*!
* \internal
* \brief Assign a resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*/
pe_node_t *(*assign)(pe_resource_t *rsc, const pe_node_t *prefer);
/*!
* \internal
* \brief Create all actions needed for a given resource
*
* \param[in,out] rsc Resource to create actions for
*/
void (*create_actions)(pe_resource_t *rsc);
/*!
* \internal
* \brief Schedule any probes needed for a resource on a node
*
* \param[in] rsc Resource to create probe for
* \param[in] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool (*create_probe)(pe_resource_t *rsc, pe_node_t *node);
/*!
* \internal
* \brief Create implicit constraints needed for a resource
*
* \param[in,out] rsc Resource to create implicit constraints for
*/
void (*internal_constraints)(pe_resource_t *rsc);
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void (*apply_coloc_score) (pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
- /*!
- * \internal
- * \brief Update nodes with scores of colocated resources' nodes
- *
- * Given a table of nodes and a resource, update the nodes' scores with the
- * scores of the best nodes matching the attribute used for each of the
- * resource's relevant colocations.
- *
- * \param[in,out] rsc Resource to check colocations for
- * \param[in] log_id Resource ID to use in logs (if NULL, use rsc ID)
- * \param[in,out] nodes Nodes to update
- * \param[in] attr Colocation attribute (NULL to use default)
- * \param[in] factor Incorporate scores multiplied by this factor
- * \param[in] flags Bitmask of enum pcmk__coloc_select values
- *
- * \note The caller remains responsible for freeing \p *nodes.
- */
- void (*add_colocated_node_scores)(pe_resource_t *rsc, const char *log_id,
- GHashTable **nodes, const char *attr,
- float factor,
- enum pcmk__coloc_select flags);
-
/*!
* \internal
* \brief Create list of all resources in colocations with a given resource
*
* Given a resource, create a list of all resources involved in mandatory
* colocations with it, whether directly or indirectly via chained colocations.
*
* \param[in] rsc Resource to add to colocated list
* \param[in] orig_rsc Resource originally requested
* \param[in] colocated_rscs Existing list
*
* \return List of given resource and all resources involved in colocations
*
* \note This function is recursive; top-level callers should pass NULL as
* \p colocated_rscs and \p orig_rsc, and the desired resource as
* \p rsc. The recursive calls will use other values.
*/
GList *(*colocated_resources)(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs);
/*!
* \internal
* \brief Apply a location constraint to a resource's allowed node scores
*
* \param[in,out] rsc Resource to apply constraint to
* \param[in,out] location Location constraint to apply
*/
void (*apply_location)(pe_resource_t *rsc, pe__location_t *location);
/*!
* \internal
* \brief Return action flags for a given resource action
*
* \param[in,out] action Action to get flags for
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
* \note For primitives, this will be the same as action->flags regardless
* of node. For collective resources, the flags can differ due to
* multiple instances possibly being involved.
*/
enum pe_action_flags (*action_flags)(pe_action_t *action,
const pe_node_t *node);
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions'
* flags (and runnable_before members if appropriate) as appropriate for the
* ordering. In some cases, the ordering could be disabled as well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this
* node (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates
* (may include pe_action_optional to affect only
* mandatory actions, and pe_action_runnable to
* affect only runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t (*update_ordered_actions)(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set);
void (*output_actions)(pe_resource_t *rsc);
/*!
* \internal
* \brief Add a resource's actions to the transition graph
*
* \param[in] rsc Resource whose actions should be added
*/
void (*add_actions_to_graph)(pe_resource_t *rsc);
/*!
* \internal
* \brief Add meta-attributes relevant to transition graph actions to XML
*
* If a given resource supports variant-specific meta-attributes that are
* needed for transition graph actions, add them to a given XML element.
*
* \param[in] rsc Resource whose meta-attributes should be added
* \param[in,out] xml Transition graph action attributes XML to add to
*/
void (*add_graph_meta)(pe_resource_t *rsc, xmlNode *xml);
/*!
* \internal
* \brief Add a resource's utilization to a table of utilization values
*
* This function is used when summing the utilization of a resource and all
* resources colocated with it, to determine whether a node has sufficient
* capacity. Given a resource and a table of utilization values, it will add
* the resource's utilization to the existing values, if the resource has
* not yet been allocated to a node.
*
* \param[in] rsc Resource with utilization to add
* \param[in] orig_rsc Resource being allocated (for logging only)
* \param[in] all_rscs List of all resources that will be summed
* \param[in,out] utilization Table of utilization values to add to
*/
void (*add_utilization)(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization);
/*!
* \internal
* \brief Apply a shutdown lock for a resource, if appropriate
*
* \param[in] rsc Resource to check for shutdown lock
*/
void (*shutdown_lock)(pe_resource_t *rsc);
};
// Actions (pcmk_sched_actions.c)
G_GNUC_INTERNAL
void pcmk__update_action_for_orderings(pe_action_t *action,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
uint32_t pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__log_action(const char *pre_text, pe_action_t *action, bool details);
G_GNUC_INTERNAL
pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name,
guint interval_ms, const pe_node_t *node);
G_GNUC_INTERNAL
pe_action_t *pcmk__new_shutdown_action(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__action_locks_rsc_to_node(const pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__deduplicate_action_inputs(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__output_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node,
xmlNode *xml_op);
G_GNUC_INTERNAL
void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set);
// Recurring actions (pcmk_sched_recurring.c)
G_GNUC_INTERNAL
void pcmk__create_recurring_actions(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id,
const char *task, guint interval_ms,
const pe_node_t *node, const char *reason);
G_GNUC_INTERNAL
void pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task,
guint interval_ms, pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__action_is_recurring(const pe_action_t *action);
// Producing transition graphs (pcmk_graph_producer.c)
G_GNUC_INTERNAL
bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action,
pe_action_wrapper_t *input);
G_GNUC_INTERNAL
void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__create_graph(pe_working_set_t *data_set);
// Fencing (pcmk_sched_fencing.c)
G_GNUC_INTERNAL
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *action, enum pe_ordering order);
G_GNUC_INTERNAL
void pcmk__fence_guest(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__node_unfenced(pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data);
// Injected scheduler inputs (pcmk_sched_injections.c)
void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib,
pcmk_injections_t *injections);
// Constraints of any type (pcmk_sched_constraints.c)
G_GNUC_INTERNAL
pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id);
G_GNUC_INTERNAL
xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id,
pe_resource_t **rsc, pe_tag_t **tag);
G_GNUC_INTERNAL
bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr,
bool convert_rsc, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__create_internal_constraints(pe_working_set_t *data_set);
// Location constraints
G_GNUC_INTERNAL
void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc,
int node_weight, const char *discover_mode,
pe_node_t *foo_node,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_locations(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_location(pe_resource_t *rsc, pe__location_t *constraint);
// Colocation constraints (pcmk_sched_colocation.c)
enum pcmk__coloc_affects {
pcmk__coloc_affects_nothing = 0,
pcmk__coloc_affects_location,
pcmk__coloc_affects_role,
};
G_GNUC_INTERNAL
enum pcmk__coloc_affects pcmk__colocation_affects(const pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool preview);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_weights(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation);
G_GNUC_INTERNAL
void pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id,
GHashTable **nodes, const char *attr,
float factor, uint32_t flags);
G_GNUC_INTERNAL
void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set);
+G_GNUC_INTERNAL
+void pcmk__add_this_with(pe_resource_t *rsc, pcmk__colocation_t *colocation);
+
+G_GNUC_INTERNAL
+void pcmk__add_with_this(pe_resource_t *rsc, pcmk__colocation_t *colocation);
+
G_GNUC_INTERNAL
void pcmk__new_colocation(const char *id, const char *node_attr, int score,
pe_resource_t *dependent, pe_resource_t *primary,
const char *dependent_role, const char *primary_role,
bool influence, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__block_colocated_starts(pe_action_t *action,
pe_working_set_t *data_set);
/*!
* \internal
* \brief Check whether colocation's dependent preferences should be considered
*
* \param[in] colocation Colocation constraint
* \param[in] rsc Primary instance (normally this will be
* colocation->primary, which NULL will be treated as,
* but for clones or bundles with multiple instances
* this can be a particular instance)
*
* \return true if colocation influence should be effective, otherwise false
*/
static inline bool
pcmk__colocation_has_influence(const pcmk__colocation_t *colocation,
const pe_resource_t *rsc)
{
if (rsc == NULL) {
rsc = colocation->primary;
}
/* A bundle replica colocates its remote connection with its container,
* using a finite score so that the container can run on Pacemaker Remote
* nodes.
*
* Moving a connection is lightweight and does not interrupt the service,
* while moving a container is heavyweight and does interrupt the service,
* so don't move a clean, active container based solely on the preferences
* of its connection.
*
* This also avoids problematic scenarios where two containers want to
* perpetually swap places.
*/
if (pcmk_is_set(colocation->dependent->flags, pe_rsc_allow_remote_remotes)
&& !pcmk_is_set(rsc->flags, pe_rsc_failed)
&& pcmk__list_of_1(rsc->running_on)) {
return false;
}
/* The dependent in a colocation influences the primary's location
* if the influence option is true or the primary is not yet active.
*/
return colocation->influence || (rsc->running_on == NULL);
}
// Ordering constraints (pcmk_sched_ordering.c)
G_GNUC_INTERNAL
void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task,
pe_action_t *first_action, pe_resource_t *then_rsc,
char *then_task, pe_action_t *then_action,
uint32_t flags, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__disable_invalid_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_stops_before_shutdown(pe_node_t *node,
pe_action_t *shutdown_op);
G_GNUC_INTERNAL
void pcmk__apply_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_after_each(pe_action_t *after, GList *list);
/*!
* \internal
* \brief Create a new ordering between two resource actions
*
* \param[in] first_rsc Resource for 'first' action
* \param[in] then_rsc Resource for 'then' action
* \param[in] first_task Action key for 'first' action
* \param[in] then_task Action key for 'then' action
* \param[in] flags Bitmask of enum pe_ordering flags
* \param[in] data_set Cluster working set to add ordering to
*/
#define pcmk__order_resource_actions(first_rsc, first_task, \
then_rsc, then_task, flags) \
pcmk__new_ordering((first_rsc), \
pcmk__op_key((first_rsc)->id, (first_task), 0), \
NULL, \
(then_rsc), \
pcmk__op_key((then_rsc)->id, (then_task), 0), \
NULL, (flags), (first_rsc)->cluster)
#define pcmk__order_starts(rsc1, rsc2, flags) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \
(rsc2), CRMD_ACTION_START, (flags))
#define pcmk__order_stops(rsc1, rsc2, flags) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \
(rsc2), CRMD_ACTION_STOP, (flags))
// Ticket constraints (pcmk_sched_tickets.c)
G_GNUC_INTERNAL
void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set);
// Promotable clone resources (pcmk_sched_promotable.c)
G_GNUC_INTERNAL
void pcmk__add_promotion_scores(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__require_promotion_tickets(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__set_instance_roles(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__create_promotable_actions(pe_resource_t *clone);
G_GNUC_INTERNAL
void pcmk__promotable_restart_ordering(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__order_promotable_instances(pe_resource_t *clone);
G_GNUC_INTERNAL
void pcmk__update_dependent_with_promotable(const pe_resource_t *primary,
pe_resource_t *dependent,
const pcmk__colocation_t *colocation);
G_GNUC_INTERNAL
void pcmk__update_promotable_dependent_priority(const pe_resource_t *primary,
pe_resource_t *dependent,
const pcmk__colocation_t *colocation);
// Pacemaker Remote nodes (pcmk_sched_remote.c)
G_GNUC_INTERNAL
bool pcmk__is_failed_remote_node(pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_remote_connection_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node);
G_GNUC_INTERNAL
pe_node_t *pcmk__connection_host_for_action(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params);
G_GNUC_INTERNAL
void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action);
// Primitives (pcmk_sched_primitive.c)
G_GNUC_INTERNAL
pe_node_t *pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer);
G_GNUC_INTERNAL
void pcmk__primitive_create_actions(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__primitive_internal_constraints(pe_resource_t *rsc);
G_GNUC_INTERNAL
enum pe_action_flags pcmk__primitive_action_flags(pe_action_t *action,
const pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node,
bool optional);
G_GNUC_INTERNAL
void pcmk__primitive_add_graph_meta(pe_resource_t *rsc, xmlNode *xml);
G_GNUC_INTERNAL
void pcmk__primitive_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
G_GNUC_INTERNAL
void pcmk__primitive_shutdown_lock(pe_resource_t *rsc);
// Groups (pcmk_sched_group.c)
G_GNUC_INTERNAL
pe_node_t *pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer);
G_GNUC_INTERNAL
void pcmk__group_create_actions(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__group_internal_constraints(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__group_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location);
G_GNUC_INTERNAL
enum pe_action_flags pcmk__group_action_flags(pe_action_t *action,
const pe_node_t *node);
G_GNUC_INTERNAL
uint32_t pcmk__group_update_ordered_actions(pe_action_t *first,
pe_action_t *then,
const pe_node_t *node,
uint32_t flags, uint32_t filter,
uint32_t type,
pe_working_set_t *data_set);
-G_GNUC_INTERNAL
-void pcmk__group_add_colocated_node_scores(pe_resource_t *rsc,
- const char *log_id,
- GHashTable **nodes, const char *attr,
- float factor, uint32_t flags);
-
G_GNUC_INTERNAL
GList *pcmk__group_colocated_resources(pe_resource_t *rsc,
pe_resource_t *orig_rsc,
GList *colocated_rscs);
G_GNUC_INTERNAL
void pcmk__group_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization);
G_GNUC_INTERNAL
void pcmk__group_shutdown_lock(pe_resource_t *rsc);
// Clones (pcmk_sched_clone.c)
G_GNUC_INTERNAL
void pcmk__clone_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
// Bundles (pcmk_sched_bundle.c)
G_GNUC_INTERNAL
void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__output_bundle_actions(pe_resource_t *rsc);
// Injections (pcmk_injections.c)
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node,
bool up);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node,
const char *resource,
const char *lrm_name,
const char *rclass,
const char *rtype,
const char *rprovider);
G_GNUC_INTERNAL
void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node,
const char *resource, const char *task,
guint interval_ms, int rc);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_action_result(xmlNode *cib_resource,
lrmd_event_data_t *op, int target_rc);
// Nodes (pcmk_sched_nodes.c)
G_GNUC_INTERNAL
bool pcmk__node_available(const pe_node_t *node, bool consider_score,
bool consider_guest);
G_GNUC_INTERNAL
bool pcmk__any_node_available(GHashTable *nodes);
G_GNUC_INTERNAL
GHashTable *pcmk__copy_node_table(GHashTable *nodes);
G_GNUC_INTERNAL
GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node);
G_GNUC_INTERNAL
void pcmk__apply_node_health(pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc,
const pe_node_t *node);
// Functions applying to more than one variant (pcmk_sched_resource.c)
G_GNUC_INTERNAL
void pcmk__set_allocation_methods(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *rsc_entry, bool active_on_node);
G_GNUC_INTERNAL
GList *pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set);
G_GNUC_INTERNAL
GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs);
G_GNUC_INTERNAL
void pcmk__noop_add_graph_meta(pe_resource_t *rsc, xmlNode *xml);
G_GNUC_INTERNAL
void pcmk__output_resource_actions(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen,
bool force);
G_GNUC_INTERNAL
bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force);
G_GNUC_INTERNAL
void pcmk__unassign_resource(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_resource_t **failed);
G_GNUC_INTERNAL
void pcmk__sort_resources(pe_working_set_t *data_set);
G_GNUC_INTERNAL
gint pcmk__cmp_instance(gconstpointer a, gconstpointer b);
G_GNUC_INTERNAL
gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b);
// Functions related to probes (pcmk_sched_probes.c)
G_GNUC_INTERNAL
bool pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_probes(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__probe_resource_list(GList *rscs, pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__schedule_probes(pe_working_set_t *data_set);
// Functions related to live migration (pcmk_sched_migration.c)
void pcmk__create_migration_actions(pe_resource_t *rsc,
const pe_node_t *current);
void pcmk__abort_dangling_migration(void *data, void *user_data);
bool pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current);
void pcmk__order_migration_equivalents(pe__ordering_t *order);
// Functions related to node utilization (pcmk_sched_utilization.c)
G_GNUC_INTERNAL
int pcmk__compare_node_capacities(const pe_node_t *node1,
const pe_node_t *node2);
G_GNUC_INTERNAL
void pcmk__consume_node_capacity(GHashTable *current_utilization,
pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__release_node_capacity(GHashTable *current_utilization,
const pe_resource_t *rsc);
G_GNUC_INTERNAL
const pe_node_t *pcmk__ban_insufficient_capacity(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__create_utilization_constraints(pe_resource_t *rsc,
GList *allowed_nodes);
G_GNUC_INTERNAL
void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set);
#endif // PCMK__LIBPACEMAKER_PRIVATE__H
diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c
index 9a44b33d34..6874b04b78 100644
--- a/lib/pacemaker/pcmk_sched_clone.c
+++ b/lib/pacemaker/pcmk_sched_clone.c
@@ -1,1195 +1,1195 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include "libpacemaker_private.h"
#define VARIANT_CLONE 1
#include
static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all);
static pe_node_t *
can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit)
{
pe_node_t *local_node = NULL;
if (node == NULL && rsc->allowed_nodes) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) {
can_run_instance(rsc, local_node, limit);
}
return NULL;
}
if (!node) {
/* make clang analyzer happy */
goto bail;
} else if (!pcmk__node_available(node, false, false)) {
goto bail;
} else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
goto bail;
}
local_node = pcmk__top_allowed_node(rsc, node);
if (local_node == NULL) {
crm_warn("%s cannot run on %s: node not allowed",
rsc->id, pe__node_name(node));
goto bail;
} else if (local_node->weight < 0) {
common_update_score(rsc, node->details->id, local_node->weight);
pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.",
rsc->id, pe__node_name(node));
} else if (local_node->count < limit) {
pe_rsc_trace(rsc, "%s can run on %s (already running %d)",
rsc->id, pe__node_name(node), local_node->count);
return local_node;
} else {
pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)",
rsc->id, pe__node_name(node), local_node->count, limit);
}
bail:
if (node) {
common_update_score(rsc, node->details->id, -INFINITY);
}
return NULL;
}
static pe_node_t *
allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc,
int limit, pe_working_set_t *data_set)
{
pe_node_t *chosen = NULL;
GHashTable *backup = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)",
rsc->id, (prefer? prefer->details->uname: "none"),
(all_coloc? "all" : "some"));
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->fns->location(rsc, NULL, FALSE);
} else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
/* Only include positive colocation preferences of dependent resources
* if not every node will get a copy of the clone
*/
append_parent_colocation(rsc->parent, rsc, all_coloc);
if (prefer) {
pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (local_prefer == NULL || local_prefer->weight < 0) {
pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id,
pe__node_name(prefer));
return NULL;
}
}
can_run_instance(rsc, NULL, limit);
backup = pcmk__copy_node_table(rsc->allowed_nodes);
pe_rsc_trace(rsc, "Allocating instance %s", rsc->id);
chosen = rsc->cmds->assign(rsc, prefer);
if (chosen && prefer && (chosen->details != prefer->details)) {
crm_info("Not pre-allocating %s to %s because %s is better",
rsc->id, pe__node_name(prefer), pe__node_name(chosen));
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = backup;
pcmk__unassign_resource(rsc);
chosen = NULL;
backup = NULL;
}
if (chosen) {
pe_node_t *local_node = pcmk__top_allowed_node(rsc, chosen);
if (local_node) {
local_node->count++;
} else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
/* what to do? we can't enforce per-node limits in this case */
pcmk__config_err("%s not found in %s (list of %d)",
chosen->details->id, rsc->parent->id,
g_hash_table_size(rsc->parent->allowed_nodes));
}
}
if(backup) {
g_hash_table_destroy(backup);
}
return chosen;
}
static void
append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all)
{
GList *gIter = NULL;
gIter = rsc->rsc_cons;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data;
if (all || cons->score < 0 || cons->score == INFINITY) {
- child->rsc_cons = g_list_prepend(child->rsc_cons, cons);
+ pcmk__add_this_with(child, cons);
}
}
gIter = rsc->rsc_cons_lhs;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(cons, child)) {
continue;
}
if (all || cons->score < 0) {
- child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons);
+ pcmk__add_with_this(child, cons);
}
}
}
void
distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set);
void
distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set)
{
int loop_max = 0;
int allocated = 0;
int available_nodes = 0;
bool all_coloc = false;
/* count now tracks the number of clones currently allocated */
for(GList *nIter = nodes; nIter != NULL; nIter = nIter->next) {
pe_node_t *node = nIter->data;
node->count = 0;
if (pcmk__node_available(node, false, false)) {
available_nodes++;
}
}
all_coloc = (max < available_nodes) ? true : false;
if(available_nodes) {
loop_max = max / available_nodes;
}
if (loop_max < 1) {
loop_max = 1;
}
pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)",
max, rsc->id, available_nodes, per_host_max, loop_max);
/* Pre-allocate as many instances as we can to their current location */
for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe_node_t *child_node = NULL;
pe_node_t *local_node = NULL;
if ((child->running_on == NULL)
|| !pcmk_is_set(child->flags, pe_rsc_provisional)
|| pcmk_is_set(child->flags, pe_rsc_failed)) {
continue;
}
child_node = pe__current_node(child);
local_node = pcmk__top_allowed_node(child, child_node);
pe_rsc_trace(rsc,
"Checking pre-allocation of %s to %s (%d remaining of %d)",
child->id, pe__node_name(child_node), max - allocated,
max);
if (!pcmk__node_available(child_node, true, false)) {
pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s",
pe__node_name(child_node), child->id);
continue;
}
if ((local_node != NULL) && (local_node->count >= loop_max)) {
pe_rsc_trace(rsc,
"Not pre-allocating because %s already allocated "
"optimal instances", pe__node_name(child_node));
continue;
}
if (allocate_instance(child, child_node, all_coloc, per_host_max,
data_set)) {
pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id,
pe__node_name(child_node));
allocated++;
}
}
pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max);
for (GList *gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (child->running_on != NULL) {
pe_node_t *child_node = pe__current_node(child);
pe_node_t *local_node = pcmk__top_allowed_node(child, child_node);
if (local_node == NULL) {
crm_err("%s is running on %s which isn't allowed",
child->id, pe__node_name(child_node));
}
}
if (!pcmk_is_set(child->flags, pe_rsc_provisional)) {
} else if (allocated >= max) {
pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max);
resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set);
} else {
if (allocate_instance(child, NULL, all_coloc, per_host_max,
data_set)) {
allocated++;
}
}
}
pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d",
allocated, rsc->id, max);
}
/*!
* \internal
* \brief Assign a clone resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*/
pe_node_t *
pcmk__clone_allocate(pe_resource_t *rsc, const pe_node_t *prefer)
{
GList *nodes = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return NULL;
} else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__add_promotion_scores(rsc);
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
/* This information is used by pcmk__cmp_instance() when deciding the order
* in which to assign clone instances to nodes.
*/
for (GList *gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_rsc_trace(rsc, "%s: Allocating %s first",
rsc->id, constraint->primary->id);
constraint->primary->cmds->assign(constraint->primary, prefer);
}
for (GList *gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (pcmk__colocation_has_influence(constraint, NULL)) {
pe_resource_t *dependent = constraint->dependent;
const char *attr = constraint->node_attribute;
const float factor = constraint->score / (float) INFINITY;
const uint32_t flags = pcmk__coloc_select_active
|pcmk__coloc_select_nonnegative;
- dependent->cmds->add_colocated_node_scores(dependent, rsc->id,
- &rsc->allowed_nodes,
- attr, factor, flags);
+ pcmk__add_colocated_node_scores(dependent, rsc->id,
+ &rsc->allowed_nodes, attr, factor,
+ flags);
}
}
pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = pcmk__sort_nodes(nodes, NULL);
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance);
distribute_children(rsc, rsc->children, nodes, clone_data->clone_max,
clone_data->clone_node_max, rsc->cluster);
g_list_free(nodes);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__set_instance_roles(rsc);
}
pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating);
pe_rsc_trace(rsc, "Done allocating %s", rsc->id);
return NULL;
}
static void
clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting,
gboolean * active)
{
GList *gIter = NULL;
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
clone_update_pseudo_status(child, stopping, starting, active);
}
return;
}
CRM_ASSERT(active != NULL);
CRM_ASSERT(starting != NULL);
CRM_ASSERT(stopping != NULL);
if (rsc->running_on) {
*active = TRUE;
}
gIter = rsc->actions;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (*starting && *stopping) {
return;
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid);
continue;
} else if (!pcmk_any_flags_set(action->flags,
pe_action_pseudo|pe_action_runnable)) {
pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid);
continue;
} else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) {
pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid);
*stopping = TRUE;
} else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) {
if (!pcmk_is_set(action->flags, pe_action_runnable)) {
pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d",
action->uuid,
pcmk_is_set(action->flags, pe_action_runnable),
pcmk_is_set(action->flags, pe_action_pseudo));
} else {
pe_rsc_trace(rsc, "Starting due to: %s", action->uuid);
pe_rsc_trace(rsc, "%s run=%d, pseudo=%d",
action->uuid,
pcmk_is_set(action->flags, pe_action_runnable),
pcmk_is_set(action->flags, pe_action_pseudo));
*starting = TRUE;
}
}
}
}
static pe_action_t *
find_rsc_action(pe_resource_t *rsc, const char *task)
{
pe_action_t *match = NULL;
GList *actions = pe__resource_actions(rsc, NULL, task, FALSE);
for (GList *item = actions; item != NULL; item = item->next) {
pe_action_t *op = (pe_action_t *) item->data;
if (!pcmk_is_set(op->flags, pe_action_optional)) {
if (match != NULL) {
// More than one match, don't return any
match = NULL;
break;
}
match = op;
}
}
g_list_free(actions);
return match;
}
static void
child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set)
{
pe_action_t *stop = NULL;
pe_action_t *start = NULL;
pe_action_t *last_stop = NULL;
pe_action_t *last_start = NULL;
GList *gIter = NULL;
if (!pe__clone_is_ordered(rsc)) {
return;
}
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
stop = find_rsc_action(child, RSC_STOP);
if (stop) {
if (last_stop) {
/* child/child relative stop */
order_actions(stop, last_stop, pe_order_optional);
}
last_stop = stop;
}
start = find_rsc_action(child, RSC_START);
if (start) {
if (last_start) {
/* child/child relative start */
order_actions(last_start, start, pe_order_optional);
}
last_start = start;
}
}
}
void
clone_create_actions(pe_resource_t *rsc)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_rsc_debug(rsc, "Creating actions for clone %s", rsc->id);
clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify,
&clone_data->stop_notify);
child_ordering_constraints(rsc, rsc->cluster);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__create_promotable_actions(rsc);
}
}
void
clone_create_pseudo_actions(pe_resource_t *rsc, GList *children,
notify_data_t **start_notify,
notify_data_t **stop_notify)
{
gboolean child_active = FALSE;
gboolean child_starting = FALSE;
gboolean child_stopping = FALSE;
gboolean allow_dependent_migrations = TRUE;
pe_action_t *stop = NULL;
pe_action_t *stopped = NULL;
pe_action_t *start = NULL;
pe_action_t *started = NULL;
pe_rsc_trace(rsc, "Creating actions for %s", rsc->id);
for (GList *gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean starting = FALSE;
gboolean stopping = FALSE;
child_rsc->cmds->create_actions(child_rsc);
clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active);
if (stopping && starting) {
allow_dependent_migrations = FALSE;
}
child_stopping |= stopping;
child_starting |= starting;
}
/* start */
start = pe__new_rsc_pseudo_action(rsc, RSC_START, !child_starting, true);
started = pe__new_rsc_pseudo_action(rsc, RSC_STARTED, !child_starting,
false);
started->priority = INFINITY;
if (child_active || child_starting) {
pe__set_action_flags(started, pe_action_runnable);
}
if (start_notify != NULL && *start_notify == NULL) {
*start_notify = pe__clone_notif_pseudo_ops(rsc, RSC_START, start,
started);
}
/* stop */
stop = pe__new_rsc_pseudo_action(rsc, RSC_STOP, !child_stopping, true);
stopped = pe__new_rsc_pseudo_action(rsc, RSC_STOPPED, !child_stopping,
true);
stopped->priority = INFINITY;
if (allow_dependent_migrations) {
pe__set_action_flags(stop, pe_action_migrate_runnable);
}
if (stop_notify != NULL && *stop_notify == NULL) {
*stop_notify = pe__clone_notif_pseudo_ops(rsc, RSC_STOP, stop, stopped);
if (start_notify && *start_notify && *stop_notify) {
order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional);
}
}
}
void
clone_internal_constraints(pe_resource_t *rsc)
{
pe_resource_t *last_rsc = NULL;
GList *gIter;
bool ordered = pe__clone_is_ordered(rsc);
pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id);
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
pe_order_optional);
pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED,
pe_order_runnable_left);
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_runnable_left);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP,
pe_order_optional);
pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE,
pe_order_runnable_left);
}
if (ordered) {
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
}
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->internal_constraints(child_rsc);
pcmk__order_starts(rsc, child_rsc,
pe_order_runnable_left|pe_order_implies_first_printed);
pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED,
pe_order_implies_then_printed);
if (ordered && (last_rsc != NULL)) {
pcmk__order_starts(last_rsc, child_rsc, pe_order_optional);
}
pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed);
pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_implies_then_printed);
if (ordered && (last_rsc != NULL)) {
pcmk__order_stops(child_rsc, last_rsc, pe_order_optional);
}
last_rsc = child_rsc;
}
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__order_promotable_instances(rsc);
}
}
gboolean
is_child_compatible(const pe_resource_t *child_rsc, const pe_node_t *local_node,
enum rsc_role_e filter, gboolean current)
{
pe_node_t *node = NULL;
enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current);
CRM_CHECK(child_rsc && local_node, return FALSE);
if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
/* We only want instances that haven't failed */
node = child_rsc->fns->location(child_rsc, NULL, current);
}
if (filter != RSC_ROLE_UNKNOWN && next_role != filter) {
crm_trace("Filtered %s", child_rsc->id);
return FALSE;
}
if (node && (node->details == local_node->details)) {
return TRUE;
} else if (node) {
crm_trace("%s - %s vs %s", child_rsc->id, pe__node_name(node),
pe__node_name(local_node));
} else {
crm_trace("%s - not allocated %d", child_rsc->id, current);
}
return FALSE;
}
pe_resource_t *
find_compatible_child(const pe_resource_t *local_child,
const pe_resource_t *rsc, enum rsc_role_e filter,
gboolean current)
{
pe_resource_t *pair = NULL;
GList *gIter = NULL;
GList *scratch = NULL;
pe_node_t *local_node = NULL;
local_node = local_child->fns->location(local_child, NULL, current);
if (local_node) {
return find_compatible_child_by_node(local_child, local_node, rsc, filter, current);
}
scratch = g_hash_table_get_values(local_child->allowed_nodes);
scratch = pcmk__sort_nodes(scratch, NULL);
gIter = scratch;
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
pair = find_compatible_child_by_node(local_child, node, rsc, filter, current);
if (pair) {
goto done;
}
}
pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id);
done:
g_list_free(scratch);
return pair;
}
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__clone_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
GList *gIter = NULL;
gboolean do_interleave = FALSE;
const char *interleave_s = NULL;
/* This should never be called for the clone itself as a dependent. Instead,
* we add its colocation constraints to its instances and call the
* apply_coloc_score() for the instances as dependents.
*/
CRM_ASSERT(!for_dependent);
CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
return);
CRM_CHECK(dependent->variant == pe_native, return);
pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d",
colocation->id, dependent->id, primary->id, colocation->score);
if (pcmk_is_set(primary->flags, pe_rsc_promotable)) {
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
// We haven't placed the primary yet, so we can't apply colocation
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if (colocation->primary_role == RSC_ROLE_UNKNOWN) {
// This isn't a role-specfic colocation, so handle normally
pe_rsc_trace(primary, "Handling %s as a clone colocation",
colocation->id);
} else if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
// We're placing the dependent
pcmk__update_dependent_with_promotable(primary, dependent,
colocation);
return;
} else if (colocation->dependent_role == RSC_ROLE_PROMOTED) {
// We're choosing roles for the dependent
pcmk__update_promotable_dependent_priority(primary, dependent,
colocation);
return;
}
}
// Only the dependent needs to be marked for interleave
interleave_s = g_hash_table_lookup(colocation->dependent->meta,
XML_RSC_ATTR_INTERLEAVE);
if (crm_is_true(interleave_s)
&& (colocation->dependent->variant > pe_group)) {
/* @TODO Do we actually care about multiple primary copies sharing a
* dependent copy anymore?
*/
if (copies_per_node(colocation->dependent) != copies_per_node(colocation->primary)) {
pcmk__config_err("Cannot interleave %s and %s because they do not "
"support the same number of instances per node",
colocation->dependent->id,
colocation->primary->id);
} else {
do_interleave = TRUE;
}
}
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if (do_interleave) {
pe_resource_t *primary_instance = NULL;
primary_instance = find_compatible_child(dependent, primary,
RSC_ROLE_UNKNOWN, FALSE);
if (primary_instance != NULL) {
pe_rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_instance->id);
dependent->cmds->apply_coloc_score(dependent, primary_instance,
colocation, true);
} else if (colocation->score >= INFINITY) {
crm_notice("Cannot pair %s with instance of %s",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true);
} else {
pe_rsc_debug(primary, "Cannot pair %s with instance of %s",
dependent->id, primary->id);
}
return;
} else if (colocation->score >= INFINITY) {
GList *affected_nodes = NULL;
gIter = primary->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
pe_rsc_trace(primary, "Allowing %s: %s %d",
colocation->id, pe__node_name(chosen),
chosen->weight);
affected_nodes = g_list_prepend(affected_nodes, chosen);
}
}
node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE);
g_list_free(affected_nodes);
return;
}
gIter = primary->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->apply_coloc_score(dependent, child_rsc, colocation,
false);
}
}
enum action_tasks
clone_child_action(pe_action_t * action)
{
enum action_tasks result = no_action;
pe_resource_t *child = (pe_resource_t *) action->rsc->children->data;
if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) {
/* Find the action we're notifying about instead */
int stop = 0;
char *key = action->uuid;
int lpc = strlen(key);
for (; lpc > 0; lpc--) {
if (key[lpc] == '_' && stop == 0) {
stop = lpc;
} else if (key[lpc] == '_') {
char *task_mutable = NULL;
lpc++;
task_mutable = strdup(key + lpc);
task_mutable[stop - lpc] = 0;
crm_trace("Extracted action '%s' from '%s'", task_mutable, key);
result = get_complex_task(child, task_mutable, TRUE);
free(task_mutable);
break;
}
}
} else {
result = get_complex_task(child, action->task, TRUE);
}
return result;
}
#define pe__clear_action_summary_flags(flags, action, flag) do { \
flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Action summary", action->rsc->id, \
flags, flag, #flag); \
} while (0)
enum pe_action_flags
summary_action_flags(pe_action_t *action, GList *children,
const pe_node_t *node)
{
GList *gIter = NULL;
gboolean any_runnable = FALSE;
gboolean check_runnable = TRUE;
enum action_tasks task = clone_child_action(action);
enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo);
const char *task_s = task2text(task);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_action_t *child_action = NULL;
pe_resource_t *child = (pe_resource_t *) gIter->data;
child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node);
pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id,
pe__node_name(node), child_action?child_action->uuid:"NA");
if (child_action) {
enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node);
if (pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(child_flags, pe_action_optional)) {
pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid,
child_action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_optional);
pe__clear_action_flags(action, pe_action_optional);
}
if (pcmk_is_set(child_flags, pe_action_runnable)) {
any_runnable = TRUE;
}
}
}
if (check_runnable && any_runnable == FALSE) {
pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_runnable);
if (node == NULL) {
pe__clear_action_flags(action, pe_action_runnable);
}
}
return flags;
}
enum pe_action_flags
clone_action_flags(pe_action_t *action, const pe_node_t *node)
{
return summary_action_flags(action, action->rsc->children, node);
}
void
clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
GList *gIter = rsc->children;
pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id);
pcmk__apply_location(rsc, constraint);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->apply_location(child_rsc, constraint);
}
}
/*!
* \internal
* \brief Add a resource's actions to the transition graph
*
* \param[in] rsc Resource whose actions should be added
*/
void
clone_expand(pe_resource_t *rsc)
{
GList *gIter = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL);
pe__create_notifications(rsc, clone_data->start_notify);
pe__create_notifications(rsc, clone_data->stop_notify);
pe__create_notifications(rsc, clone_data->promote_notify);
pe__create_notifications(rsc, clone_data->demote_notify);
/* Now that the notifcations have been created we can expand the children */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->add_actions_to_graph(child_rsc);
}
pcmk__add_rsc_actions_to_graph(rsc);
/* The notifications are in the graph now, we can destroy the notify_data */
pe__free_notification_data(clone_data->demote_notify);
clone_data->demote_notify = NULL;
pe__free_notification_data(clone_data->stop_notify);
clone_data->stop_notify = NULL;
pe__free_notification_data(clone_data->start_notify);
clone_data->start_notify = NULL;
pe__free_notification_data(clone_data->promote_notify);
clone_data->promote_notify = NULL;
}
// Check whether a resource or any of its children is known on node
static bool
rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node)
{
if (rsc->children) {
for (GList *child_iter = rsc->children; child_iter != NULL;
child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
if (rsc_known_on(child, node)) {
return TRUE;
}
}
} else if (rsc->known_on) {
GHashTableIter iter;
pe_node_t *known_node = NULL;
g_hash_table_iter_init(&iter, rsc->known_on);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) {
if (node->details == known_node->details) {
return TRUE;
}
}
}
return FALSE;
}
// Look for an instance of clone that is known on node
static pe_resource_t *
find_instance_on(const pe_resource_t *clone, const pe_node_t *node)
{
for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (rsc_known_on(child, node)) {
return child;
}
}
return NULL;
}
// For anonymous clones, only a single instance needs to be probed
static bool
probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set)
{
// First, check if we probed an instance on this node last time
pe_resource_t *child = find_instance_on(rsc, node);
// Otherwise, check if we plan to start an instance on this node
if (child == NULL) {
for (GList *child_iter = rsc->children; child_iter && !child;
child_iter = child_iter->next) {
pe_node_t *local_node = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data;
if (child_rsc) { /* make clang analyzer happy */
local_node = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (local_node && (local_node->details == node->details)) {
child = child_rsc;
}
}
}
}
// Otherwise, use the first clone instance
if (child == NULL) {
child = rsc->children->data;
}
CRM_ASSERT(child);
return child->cmds->create_probe(child, node);
}
/*!
* \internal
*
* \brief Schedule any probes needed for a resource on a node
*
* \param[in] rsc Resource to create probe for
* \param[in] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool
clone_create_probe(pe_resource_t *rsc, pe_node_t *node)
{
CRM_ASSERT(rsc);
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
if (rsc->children == NULL) {
pe_warn("Clone %s has no children", rsc->id);
return false;
}
if (rsc->exclusive_discover) {
pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) {
/* exclusive discover is enabled and this node is not marked
* as a node this resource should be discovered on
*
* remove the node from allowed_nodes so that the
* notification contains only nodes that we might ever run
* on
*/
g_hash_table_remove(rsc->allowed_nodes, node->details->id);
/* Bit of a shortcut - might as well take it */
return false;
}
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
return pcmk__probe_resource_list(rsc->children, node);
} else {
return probe_anonymous_clone(rsc, node, rsc->cluster);
}
}
void
clone_append_meta(pe_resource_t * rsc, xmlNode * xml)
{
char *name = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
name = crm_meta_name(XML_RSC_ATTR_UNIQUE);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique));
free(name);
name = crm_meta_name(XML_RSC_ATTR_NOTIFY);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify));
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX);
crm_xml_add_int(xml, name, clone_data->clone_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX);
crm_xml_add_int(xml, name, clone_data->clone_node_max);
free(name);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
int promoted_max = pe__clone_promoted_max(rsc);
int promoted_node_max = pe__clone_promoted_node_max(rsc);
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX);
crm_xml_add_int(xml, name, promoted_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX);
crm_xml_add_int(xml, name, promoted_node_max);
free(name);
/* @COMPAT Maintain backward compatibility with resource agents that
* expect the old names (deprecated since 2.0.0).
*/
name = crm_meta_name(PCMK_XA_PROMOTED_MAX_LEGACY);
crm_xml_add_int(xml, name, promoted_max);
free(name);
name = crm_meta_name(PCMK_XA_PROMOTED_NODE_MAX_LEGACY);
crm_xml_add_int(xml, name, promoted_node_max);
free(name);
}
}
// Clone implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__clone_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
bool existing = false;
pe_resource_t *child = NULL;
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return;
}
// Look for any child already existing in the list
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
child = (pe_resource_t *) iter->data;
if (g_list_find(all_rscs, child)) {
existing = true; // Keep checking remaining children
} else {
// If this is a clone of a group, look for group's members
for (GList *member_iter = child->children; member_iter != NULL;
member_iter = member_iter->next) {
pe_resource_t *member = (pe_resource_t *) member_iter->data;
if (g_list_find(all_rscs, member) != NULL) {
// Add *child's* utilization, not group member's
child->cmds->add_utilization(child, orig_rsc, all_rscs,
utilization);
existing = true;
break;
}
}
}
}
if (!existing && (rsc->children != NULL)) {
// If nothing was found, still add first child's utilization
child = (pe_resource_t *) rsc->children->data;
child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization);
}
}
// Clone implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__clone_shutdown_lock(pe_resource_t *rsc)
{
return; // Clones currently don't support shutdown locks
}
diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c
index 1475a8afef..a30c8184b8 100644
--- a/lib/pacemaker/pcmk_sched_colocation.c
+++ b/lib/pacemaker/pcmk_sched_colocation.c
@@ -1,1387 +1,1465 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include "crm/common/util.h"
#include "crm/common/xml_internal.h"
#include "crm/msg_xml.h"
#include "libpacemaker_private.h"
#define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \
__rsc = pcmk__find_constraint_resource(data_set->resources, __name); \
if (__rsc == NULL) { \
pcmk__config_err("%s: No resource found for %s", __set, __name); \
return; \
} \
} while(0)
// Used to temporarily mark a node as unusable
#define INFINITY_HACK (INFINITY * -100)
static gint
cmp_dependent_priority(gconstpointer a, gconstpointer b)
{
const pcmk__colocation_t *rsc_constraint1 = (const pcmk__colocation_t *) a;
const pcmk__colocation_t *rsc_constraint2 = (const pcmk__colocation_t *) b;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
CRM_ASSERT(rsc_constraint1->dependent != NULL);
CRM_ASSERT(rsc_constraint1->primary != NULL);
if (rsc_constraint1->dependent->priority > rsc_constraint2->dependent->priority) {
return -1;
}
if (rsc_constraint1->dependent->priority < rsc_constraint2->dependent->priority) {
return 1;
}
/* Process clones before primitives and groups */
if (rsc_constraint1->dependent->variant > rsc_constraint2->dependent->variant) {
return -1;
}
if (rsc_constraint1->dependent->variant < rsc_constraint2->dependent->variant) {
return 1;
}
/* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable
* clones (probably unnecessary, but avoids having to update regression
* tests)
*/
if (rsc_constraint1->dependent->variant == pe_clone) {
if (pcmk_is_set(rsc_constraint1->dependent->flags, pe_rsc_promotable)
&& !pcmk_is_set(rsc_constraint2->dependent->flags, pe_rsc_promotable)) {
return -1;
} else if (!pcmk_is_set(rsc_constraint1->dependent->flags, pe_rsc_promotable)
&& pcmk_is_set(rsc_constraint2->dependent->flags, pe_rsc_promotable)) {
return 1;
}
}
return strcmp(rsc_constraint1->dependent->id,
rsc_constraint2->dependent->id);
}
static gint
cmp_primary_priority(gconstpointer a, gconstpointer b)
{
const pcmk__colocation_t *rsc_constraint1 = (const pcmk__colocation_t *) a;
const pcmk__colocation_t *rsc_constraint2 = (const pcmk__colocation_t *) b;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
CRM_ASSERT(rsc_constraint1->dependent != NULL);
CRM_ASSERT(rsc_constraint1->primary != NULL);
if (rsc_constraint1->primary->priority > rsc_constraint2->primary->priority) {
return -1;
}
if (rsc_constraint1->primary->priority < rsc_constraint2->primary->priority) {
return 1;
}
/* Process clones before primitives and groups */
if (rsc_constraint1->primary->variant > rsc_constraint2->primary->variant) {
return -1;
} else if (rsc_constraint1->primary->variant < rsc_constraint2->primary->variant) {
return 1;
}
/* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable
* clones (probably unnecessary, but avoids having to update regression
* tests)
*/
if (rsc_constraint1->primary->variant == pe_clone) {
if (pcmk_is_set(rsc_constraint1->primary->flags, pe_rsc_promotable)
&& !pcmk_is_set(rsc_constraint2->primary->flags, pe_rsc_promotable)) {
return -1;
} else if (!pcmk_is_set(rsc_constraint1->primary->flags, pe_rsc_promotable)
&& pcmk_is_set(rsc_constraint2->primary->flags, pe_rsc_promotable)) {
return 1;
}
}
return strcmp(rsc_constraint1->primary->id, rsc_constraint2->primary->id);
}
+/*!
+ * \internal
+ * \brief Add a "this with" colocation constraint to a resource
+ *
+ * \param[in,out] rsc Resource to add colocation to
+ * \param[in] colocation Colocation constraint to add to \p rsc
+ */
+void
+pcmk__add_this_with(pe_resource_t *rsc, pcmk__colocation_t *colocation)
+{
+ rsc->rsc_cons = g_list_insert_sorted(rsc->rsc_cons, colocation,
+ cmp_primary_priority);
+}
+
+/*!
+ * \internal
+ * \brief Add a "with this" colocation constraint to a resource
+ *
+ * \param[in,out] rsc Resource to add colocation to
+ * \param[in] colocation Colocation constraint to add to \p rsc
+ */
+void
+pcmk__add_with_this(pe_resource_t *rsc, pcmk__colocation_t *colocation)
+{
+ rsc->rsc_cons_lhs = g_list_insert_sorted(rsc->rsc_cons_lhs, colocation,
+ cmp_dependent_priority);
+}
+
/*!
* \internal
* \brief Add orderings necessary for an anti-colocation constraint
*/
static void
anti_colocation_order(pe_resource_t *first_rsc, int first_role,
pe_resource_t *then_rsc, int then_role,
pe_working_set_t *data_set)
{
const char *first_tasks[] = { NULL, NULL };
const char *then_tasks[] = { NULL, NULL };
/* Actions to make first_rsc lose first_role */
if (first_role == RSC_ROLE_PROMOTED) {
first_tasks[0] = CRMD_ACTION_DEMOTE;
} else {
first_tasks[0] = CRMD_ACTION_STOP;
if (first_role == RSC_ROLE_UNPROMOTED) {
first_tasks[1] = CRMD_ACTION_PROMOTE;
}
}
/* Actions to make then_rsc gain then_role */
if (then_role == RSC_ROLE_PROMOTED) {
then_tasks[0] = CRMD_ACTION_PROMOTE;
} else {
then_tasks[0] = CRMD_ACTION_START;
if (then_role == RSC_ROLE_UNPROMOTED) {
then_tasks[1] = CRMD_ACTION_DEMOTE;
}
}
for (int first_lpc = 0;
(first_lpc <= 1) && (first_tasks[first_lpc] != NULL); first_lpc++) {
for (int then_lpc = 0;
(then_lpc <= 1) && (then_tasks[then_lpc] != NULL); then_lpc++) {
pcmk__order_resource_actions(first_rsc, first_tasks[first_lpc],
then_rsc, then_tasks[then_lpc],
pe_order_anti_colocation);
}
}
}
/*!
* \internal
* \brief Add a new colocation constraint to a cluster working set
*
* \param[in] id XML ID for this constraint
* \param[in] node_attr Colocate by this attribute (or NULL for #uname)
* \param[in] score Constraint score
* \param[in] dependent Resource to be colocated
* \param[in] primary Resource to colocate \p dependent with
* \param[in] dependent_role Current role of \p dependent
* \param[in] primary_role Current role of \p primary
* \param[in] influence Whether colocation constraint has influence
* \param[in] data_set Cluster working set to add constraint to
*/
void
pcmk__new_colocation(const char *id, const char *node_attr, int score,
pe_resource_t *dependent, pe_resource_t *primary,
const char *dependent_role, const char *primary_role,
bool influence, pe_working_set_t *data_set)
{
pcmk__colocation_t *new_con = NULL;
if (score == 0) {
crm_trace("Ignoring colocation '%s' because score is 0", id);
return;
}
if ((dependent == NULL) || (primary == NULL)) {
pcmk__config_err("Ignoring colocation '%s' because resource "
"does not exist", id);
return;
}
new_con = calloc(1, sizeof(pcmk__colocation_t));
if (new_con == NULL) {
return;
}
if (pcmk__str_eq(dependent_role, RSC_ROLE_STARTED_S,
pcmk__str_null_matches|pcmk__str_casei)) {
dependent_role = RSC_ROLE_UNKNOWN_S;
}
if (pcmk__str_eq(primary_role, RSC_ROLE_STARTED_S,
pcmk__str_null_matches|pcmk__str_casei)) {
primary_role = RSC_ROLE_UNKNOWN_S;
}
new_con->id = id;
new_con->dependent = dependent;
new_con->primary = primary;
new_con->score = score;
new_con->dependent_role = text2role(dependent_role);
new_con->primary_role = text2role(primary_role);
new_con->node_attribute = node_attr;
new_con->influence = influence;
if (node_attr == NULL) {
node_attr = CRM_ATTR_UNAME;
}
pe_rsc_trace(dependent, "%s ==> %s (%s %d)",
dependent->id, primary->id, node_attr, score);
- dependent->rsc_cons = g_list_insert_sorted(dependent->rsc_cons, new_con,
- cmp_primary_priority);
-
- primary->rsc_cons_lhs = g_list_insert_sorted(primary->rsc_cons_lhs, new_con,
- cmp_dependent_priority);
+ pcmk__add_this_with(dependent, new_con);
+ pcmk__add_with_this(primary, new_con);
data_set->colocation_constraints = g_list_append(data_set->colocation_constraints,
new_con);
if (score <= -INFINITY) {
anti_colocation_order(dependent, new_con->dependent_role, primary,
new_con->primary_role, data_set);
anti_colocation_order(primary, new_con->primary_role, dependent,
new_con->dependent_role, data_set);
}
}
/*!
* \internal
* \brief Return the boolean influence corresponding to configuration
*
* \param[in] coloc_id Colocation XML ID (for error logging)
* \param[in] rsc Resource involved in constraint (for default)
* \param[in] influence_s String value of influence option
*
* \return true if string evaluates true, false if string evaluates false,
* or value of resource's critical option if string is NULL or invalid
*/
static bool
unpack_influence(const char *coloc_id, const pe_resource_t *rsc,
const char *influence_s)
{
if (influence_s != NULL) {
int influence_i = 0;
if (crm_str_to_boolean(influence_s, &influence_i) < 0) {
pcmk__config_err("Constraint '%s' has invalid value for "
XML_COLOC_ATTR_INFLUENCE " (using default)",
coloc_id);
} else {
return (influence_i != 0);
}
}
return pcmk_is_set(rsc->flags, pe_rsc_critical);
}
static void
unpack_colocation_set(xmlNode *set, int score, const char *coloc_id,
const char *influence_s, pe_working_set_t *data_set)
{
xmlNode *xml_rsc = NULL;
pe_resource_t *with = NULL;
pe_resource_t *resource = NULL;
const char *set_id = ID(set);
const char *role = crm_element_value(set, "role");
const char *ordering = crm_element_value(set, "ordering");
int local_score = score;
bool sequential = false;
const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE);
if (score_s) {
local_score = char2score(score_s);
}
if (local_score == 0) {
crm_trace("Ignoring colocation '%s' for set '%s' because score is 0",
coloc_id, set_id);
return;
}
if (ordering == NULL) {
ordering = "group";
}
if (pcmk__xe_get_bool_attr(set, "sequential", &sequential) == pcmk_rc_ok && !sequential) {
return;
} else if ((local_score > 0)
&& pcmk__str_eq(ordering, "group", pcmk__str_casei)) {
for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc));
if (with != NULL) {
pe_rsc_trace(resource, "Colocating %s with %s", resource->id, with->id);
pcmk__new_colocation(set_id, NULL, local_score, resource,
with, role, role,
unpack_influence(coloc_id, resource,
influence_s), data_set);
}
with = resource;
}
} else if (local_score > 0) {
pe_resource_t *last = NULL;
for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc));
if (last != NULL) {
pe_rsc_trace(resource, "Colocating %s with %s",
last->id, resource->id);
pcmk__new_colocation(set_id, NULL, local_score, last,
resource, role, role,
unpack_influence(coloc_id, last,
influence_s), data_set);
}
last = resource;
}
} else {
/* Anti-colocating with every prior resource is
* the only way to ensure the intuitive result
* (i.e. that no one in the set can run with anyone else in the set)
*/
for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
xmlNode *xml_rsc_with = NULL;
bool influence = true;
EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc));
influence = unpack_influence(coloc_id, resource, influence_s);
for (xml_rsc_with = first_named_child(set, XML_TAG_RESOURCE_REF);
xml_rsc_with != NULL;
xml_rsc_with = crm_next_same_xml(xml_rsc_with)) {
if (pcmk__str_eq(resource->id, ID(xml_rsc_with),
pcmk__str_casei)) {
break;
}
EXPAND_CONSTRAINT_IDREF(set_id, with, ID(xml_rsc_with));
pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id,
with->id);
pcmk__new_colocation(set_id, NULL, local_score,
resource, with, role, role,
influence, data_set);
}
}
}
}
static void
colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score,
const char *influence_s, pe_working_set_t *data_set)
{
xmlNode *xml_rsc = NULL;
pe_resource_t *rsc_1 = NULL;
pe_resource_t *rsc_2 = NULL;
const char *role_1 = crm_element_value(set1, "role");
const char *role_2 = crm_element_value(set2, "role");
int rc = pcmk_rc_ok;
bool sequential = false;
if (score == 0) {
crm_trace("Ignoring colocation '%s' between sets because score is 0",
id);
return;
}
rc = pcmk__xe_get_bool_attr(set1, "sequential", &sequential);
if (rc != pcmk_rc_ok || sequential) {
// Get the first one
xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
if (xml_rsc != NULL) {
EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
}
}
rc = pcmk__xe_get_bool_attr(set2, "sequential", &sequential);
if (rc != pcmk_rc_ok || sequential) {
// Get the last one
const char *rid = NULL;
for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
rid = ID(xml_rsc);
}
EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid);
}
if ((rsc_1 != NULL) && (rsc_2 != NULL)) {
pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2,
unpack_influence(id, rsc_1, influence_s),
data_set);
} else if (rsc_1 != NULL) {
bool influence = unpack_influence(id, rsc_1, influence_s);
for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc));
pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1,
role_2, influence, data_set);
}
} else if (rsc_2 != NULL) {
for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1,
role_2,
unpack_influence(id, rsc_1, influence_s),
data_set);
}
} else {
for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
xmlNode *xml_rsc_2 = NULL;
bool influence = true;
EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
influence = unpack_influence(id, rsc_1, influence_s);
for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF);
xml_rsc_2 != NULL;
xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) {
EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2));
pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2,
role_1, role_2, influence,
data_set);
}
}
}
}
static void
unpack_simple_colocation(xmlNode *xml_obj, const char *id,
const char *influence_s, pe_working_set_t *data_set)
{
int score_i = 0;
const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
const char *dependent_id = crm_element_value(xml_obj,
XML_COLOC_ATTR_SOURCE);
const char *primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET);
const char *dependent_role = crm_element_value(xml_obj,
XML_COLOC_ATTR_SOURCE_ROLE);
const char *primary_role = crm_element_value(xml_obj,
XML_COLOC_ATTR_TARGET_ROLE);
const char *attr = crm_element_value(xml_obj, XML_COLOC_ATTR_NODE_ATTR);
// @COMPAT: Deprecated since 2.1.5
const char *dependent_instance = crm_element_value(xml_obj,
XML_COLOC_ATTR_SOURCE_INSTANCE);
// @COMPAT: Deprecated since 2.1.5
const char *primary_instance = crm_element_value(xml_obj,
XML_COLOC_ATTR_TARGET_INSTANCE);
pe_resource_t *dependent = pcmk__find_constraint_resource(data_set->resources,
dependent_id);
pe_resource_t *primary = pcmk__find_constraint_resource(data_set->resources,
primary_id);
if (dependent_instance != NULL) {
pe_warn_once(pe_wo_coloc_inst,
"Support for " XML_COLOC_ATTR_SOURCE_INSTANCE " is "
"deprecated and will be removed in a future release.");
}
if (primary_instance != NULL) {
pe_warn_once(pe_wo_coloc_inst,
"Support for " XML_COLOC_ATTR_TARGET_INSTANCE " is "
"deprecated and will be removed in a future release.");
}
if (dependent == NULL) {
pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
"does not exist", id, dependent_id);
return;
} else if (primary == NULL) {
pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
"does not exist", id, primary_id);
return;
} else if ((dependent_instance != NULL) && !pe_rsc_is_clone(dependent)) {
pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
"is not a clone but instance '%s' was requested",
id, dependent_id, dependent_instance);
return;
} else if ((primary_instance != NULL) && !pe_rsc_is_clone(primary)) {
pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
"is not a clone but instance '%s' was requested",
id, primary_id, primary_instance);
return;
}
if (dependent_instance != NULL) {
dependent = find_clone_instance(dependent, dependent_instance, data_set);
if (dependent == NULL) {
pcmk__config_warn("Ignoring constraint '%s' because resource '%s' "
"does not have an instance '%s'",
id, dependent_id, dependent_instance);
return;
}
}
if (primary_instance != NULL) {
primary = find_clone_instance(primary, primary_instance, data_set);
if (primary == NULL) {
pcmk__config_warn("Ignoring constraint '%s' because resource '%s' "
"does not have an instance '%s'",
"'%s'", id, primary_id, primary_instance);
return;
}
}
if (pcmk__xe_attr_is_true(xml_obj, XML_CONS_ATTR_SYMMETRICAL)) {
pcmk__config_warn("The colocation constraint '"
XML_CONS_ATTR_SYMMETRICAL
"' attribute has been removed");
}
if (score) {
score_i = char2score(score);
}
pcmk__new_colocation(id, attr, score_i, dependent, primary,
dependent_role, primary_role,
unpack_influence(id, dependent, influence_s), data_set);
}
// \return Standard Pacemaker return code
static int
unpack_colocation_tags(xmlNode *xml_obj, xmlNode **expanded_xml,
pe_working_set_t *data_set)
{
const char *id = NULL;
const char *dependent_id = NULL;
const char *primary_id = NULL;
const char *dependent_role = NULL;
const char *primary_role = NULL;
pe_resource_t *dependent = NULL;
pe_resource_t *primary = NULL;
pe_tag_t *dependent_tag = NULL;
pe_tag_t *primary_tag = NULL;
xmlNode *dependent_set = NULL;
xmlNode *primary_set = NULL;
bool any_sets = false;
*expanded_xml = NULL;
CRM_CHECK(xml_obj != NULL, return EINVAL);
id = ID(xml_obj);
if (id == NULL) {
pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID,
crm_element_name(xml_obj));
return pcmk_rc_unpack_error;
}
// Check whether there are any resource sets with template or tag references
*expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set);
if (*expanded_xml != NULL) {
crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation");
return pcmk_rc_ok;
}
dependent_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE);
primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET);
if ((dependent_id == NULL) || (primary_id == NULL)) {
return pcmk_rc_ok;
}
if (!pcmk__valid_resource_or_tag(data_set, dependent_id, &dependent,
&dependent_tag)) {
pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
"valid resource or tag", id, dependent_id);
return pcmk_rc_unpack_error;
}
if (!pcmk__valid_resource_or_tag(data_set, primary_id, &primary,
&primary_tag)) {
pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
"valid resource or tag", id, primary_id);
return pcmk_rc_unpack_error;
}
if ((dependent != NULL) && (primary != NULL)) {
/* Neither side references any template/tag. */
return pcmk_rc_ok;
}
if ((dependent_tag != NULL) && (primary_tag != NULL)) {
// A colocation constraint between two templates/tags makes no sense
pcmk__config_err("Ignoring constraint '%s' because two templates or "
"tags cannot be colocated", id);
return pcmk_rc_unpack_error;
}
dependent_role = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE);
primary_role = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE);
*expanded_xml = copy_xml(xml_obj);
// Convert template/tag reference in "rsc" into resource_set under constraint
if (!pcmk__tag_to_set(*expanded_xml, &dependent_set, XML_COLOC_ATTR_SOURCE,
true, data_set)) {
free_xml(*expanded_xml);
*expanded_xml = NULL;
return pcmk_rc_unpack_error;
}
if (dependent_set != NULL) {
if (dependent_role != NULL) {
// Move "rsc-role" into converted resource_set as "role"
crm_xml_add(dependent_set, "role", dependent_role);
xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_SOURCE_ROLE);
}
any_sets = true;
}
// Convert template/tag reference in "with-rsc" into resource_set under constraint
if (!pcmk__tag_to_set(*expanded_xml, &primary_set, XML_COLOC_ATTR_TARGET,
true, data_set)) {
free_xml(*expanded_xml);
*expanded_xml = NULL;
return pcmk_rc_unpack_error;
}
if (primary_set != NULL) {
if (primary_role != NULL) {
// Move "with-rsc-role" into converted resource_set as "role"
crm_xml_add(primary_set, "role", primary_role);
xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_TARGET_ROLE);
}
any_sets = true;
}
if (any_sets) {
crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation");
} else {
free_xml(*expanded_xml);
*expanded_xml = NULL;
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Parse a colocation constraint from XML into a cluster working set
*
* \param[in] xml_obj Colocation constraint XML to unpack
* \param[in] data_set Cluster working set to add constraint to
*/
void
pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set)
{
int score_i = 0;
xmlNode *set = NULL;
xmlNode *last = NULL;
xmlNode *orig_xml = NULL;
xmlNode *expanded_xml = NULL;
const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
const char *influence_s = crm_element_value(xml_obj,
XML_COLOC_ATTR_INFLUENCE);
if (score) {
score_i = char2score(score);
}
if (unpack_colocation_tags(xml_obj, &expanded_xml,
data_set) != pcmk_rc_ok) {
return;
}
if (expanded_xml) {
orig_xml = xml_obj;
xml_obj = expanded_xml;
}
for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL;
set = crm_next_same_xml(set)) {
set = expand_idref(set, data_set->input);
if (set == NULL) { // Configuration error, message already logged
if (expanded_xml != NULL) {
free_xml(expanded_xml);
}
return;
}
unpack_colocation_set(set, score_i, id, influence_s, data_set);
if (last != NULL) {
colocate_rsc_sets(id, last, set, score_i, influence_s, data_set);
}
last = set;
}
if (expanded_xml) {
free_xml(expanded_xml);
xml_obj = orig_xml;
}
if (last == NULL) {
unpack_simple_colocation(xml_obj, id, influence_s, data_set);
}
}
static void
mark_start_blocked(pe_resource_t *rsc, pe_resource_t *reason,
pe_working_set_t *data_set)
{
char *reason_text = crm_strdup_printf("colocation with %s", reason->id);
for (GList *gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (pcmk_is_set(action->flags, pe_action_runnable)
&& pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) {
pe__clear_action_flags(action, pe_action_runnable);
pe_action_set_reason(action, reason_text, false);
pcmk__block_colocated_starts(action, data_set);
pcmk__update_action_for_orderings(action, data_set);
}
}
free(reason_text);
}
/*!
* \internal
* \brief If a start action is unrunnable, block starts of colocated resources
*
* \param[in] action Action to check
* \param[in] data_set Cluster working set
*/
void
pcmk__block_colocated_starts(pe_action_t *action, pe_working_set_t *data_set)
{
GList *gIter = NULL;
pe_resource_t *rsc = NULL;
if (!pcmk_is_set(action->flags, pe_action_runnable)
&& pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) {
rsc = uber_parent(action->rsc);
if (rsc->parent) {
/* For bundles, uber_parent() returns the clone, not the bundle, so
* the existence of rsc->parent implies this is a bundle.
* In this case, we need the bundle resource, so that we can check
* if all containers are stopped/stopping.
*/
rsc = rsc->parent;
}
}
if ((rsc == NULL) || (rsc->rsc_cons_lhs == NULL)) {
return;
}
// Block colocated starts only if all children (if any) have unrunnable starts
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *)gIter->data;
pe_action_t *start = find_first_action(child->actions, NULL, RSC_START, NULL);
if ((start == NULL) || pcmk_is_set(start->flags, pe_action_runnable)) {
return;
}
}
for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *colocate_with = (pcmk__colocation_t *) gIter->data;
if (colocate_with->score == INFINITY) {
mark_start_blocked(colocate_with->dependent, action->rsc, data_set);
}
}
}
/*!
* \internal
* \brief Determine how a colocation constraint should affect a resource
*
* Colocation constraints have different effects at different points in the
* scheduler sequence. Initially, they affect a resource's location; once that
* is determined, then for promotable clones they can affect a resource
* instance's role; after both are determined, the constraints no longer matter.
* Given a specific colocation constraint, check what has been done so far to
* determine what should be affected at the current point in the scheduler.
*
* \param[in] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint
* \param[in] preview If true, pretend resources have already been allocated
*
* \return How colocation constraint should be applied at this point
*/
enum pcmk__coloc_affects
pcmk__colocation_affects(const pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation, bool preview)
{
if (!preview && pcmk_is_set(primary->flags, pe_rsc_provisional)) {
// Primary resource has not been allocated yet, so we can't do anything
return pcmk__coloc_affects_nothing;
}
if ((colocation->dependent_role >= RSC_ROLE_UNPROMOTED)
&& (dependent->parent != NULL)
&& pcmk_is_set(dependent->parent->flags, pe_rsc_promotable)
&& !pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
/* This is a colocation by role, and the dependent is a promotable clone
* that has already been allocated, so the colocation should now affect
* the role.
*/
return pcmk__coloc_affects_role;
}
if (!preview && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
/* The dependent resource has already been through allocation, so the
* constraint no longer has any effect. Log an error if a mandatory
* colocation constraint has been violated.
*/
const pe_node_t *primary_node = primary->allocated_to;
if (dependent->allocated_to == NULL) {
crm_trace("Skipping colocation '%s': %s will not run anywhere",
colocation->id, dependent->id);
} else if (colocation->score >= INFINITY) {
// Dependent resource must colocate with primary resource
if ((primary_node == NULL) ||
(primary_node->details != dependent->allocated_to->details)) {
crm_err("%s must be colocated with %s but is not (%s vs. %s)",
dependent->id, primary->id,
pe__node_name(dependent->allocated_to),
pe__node_name(primary_node));
}
} else if (colocation->score <= -CRM_SCORE_INFINITY) {
// Dependent resource must anti-colocate with primary resource
if ((primary_node != NULL) &&
(dependent->allocated_to->details == primary_node->details)) {
crm_err("%s and %s must be anti-colocated but are allocated "
"to the same node (%s)",
dependent->id, primary->id, pe__node_name(primary_node));
}
}
return pcmk__coloc_affects_nothing;
}
if ((colocation->score > 0)
&& (colocation->dependent_role != RSC_ROLE_UNKNOWN)
&& (colocation->dependent_role != dependent->next_role)) {
crm_trace("Skipping colocation '%s': dependent limited to %s role "
"but %s next role is %s",
colocation->id, role2text(colocation->dependent_role),
dependent->id, role2text(dependent->next_role));
return pcmk__coloc_affects_nothing;
}
if ((colocation->score > 0)
&& (colocation->primary_role != RSC_ROLE_UNKNOWN)
&& (colocation->primary_role != primary->next_role)) {
crm_trace("Skipping colocation '%s': primary limited to %s role "
"but %s next role is %s",
colocation->id, role2text(colocation->primary_role),
primary->id, role2text(primary->next_role));
return pcmk__coloc_affects_nothing;
}
if ((colocation->score < 0)
&& (colocation->dependent_role != RSC_ROLE_UNKNOWN)
&& (colocation->dependent_role == dependent->next_role)) {
crm_trace("Skipping anti-colocation '%s': dependent role %s matches",
colocation->id, role2text(colocation->dependent_role));
return pcmk__coloc_affects_nothing;
}
if ((colocation->score < 0)
&& (colocation->primary_role != RSC_ROLE_UNKNOWN)
&& (colocation->primary_role == primary->next_role)) {
crm_trace("Skipping anti-colocation '%s': primary role %s matches",
colocation->id, role2text(colocation->primary_role));
return pcmk__coloc_affects_nothing;
}
return pcmk__coloc_affects_location;
}
/*!
* \internal
* \brief Apply colocation to dependent for allocation purposes
*
* Update the allowed node weights of the dependent resource in a colocation,
* for the purposes of allocating it to a node
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint
*/
void
pcmk__apply_coloc_to_weights(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation)
{
const char *attribute = CRM_ATTR_ID;
const char *value = NULL;
GHashTable *work = NULL;
GHashTableIter iter;
pe_node_t *node = NULL;
if (colocation->node_attribute != NULL) {
attribute = colocation->node_attribute;
}
if (primary->allocated_to != NULL) {
value = pe_node_attribute_raw(primary->allocated_to, attribute);
} else if (colocation->score < 0) {
// Nothing to do (anti-colocation with something that is not running)
return;
}
work = pcmk__copy_node_table(dependent->allowed_nodes);
g_hash_table_iter_init(&iter, work);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (primary->allocated_to == NULL) {
pe_rsc_trace(dependent, "%s: %s@%s -= %d (%s inactive)",
colocation->id, dependent->id, pe__node_name(node),
colocation->score, primary->id);
node->weight = pcmk__add_scores(-colocation->score, node->weight);
} else if (pcmk__str_eq(pe_node_attribute_raw(node, attribute), value,
pcmk__str_casei)) {
if (colocation->score < CRM_SCORE_INFINITY) {
pe_rsc_trace(dependent, "%s: %s@%s += %d",
colocation->id, dependent->id,
pe__node_name(node), colocation->score);
node->weight = pcmk__add_scores(colocation->score,
node->weight);
}
} else if (colocation->score >= CRM_SCORE_INFINITY) {
pe_rsc_trace(dependent, "%s: %s@%s -= %d (%s mismatch)",
colocation->id, dependent->id, pe__node_name(node),
colocation->score, attribute);
node->weight = pcmk__add_scores(-colocation->score, node->weight);
}
}
if ((colocation->score <= -INFINITY) || (colocation->score >= INFINITY)
|| pcmk__any_node_available(work)) {
g_hash_table_destroy(dependent->allowed_nodes);
dependent->allowed_nodes = work;
work = NULL;
} else {
pe_rsc_info(dependent,
"%s: Rolling back scores from %s (no available nodes)",
dependent->id, primary->id);
}
if (work != NULL) {
g_hash_table_destroy(work);
}
}
/*!
* \internal
* \brief Apply colocation to dependent for role purposes
*
* Update the priority of the dependent resource in a colocation, for the
* purposes of selecting its role
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint
*/
void
pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation)
{
const char *dependent_value = NULL;
const char *primary_value = NULL;
const char *attribute = CRM_ATTR_ID;
int score_multiplier = 1;
if ((primary->allocated_to == NULL) || (dependent->allocated_to == NULL)) {
return;
}
if (colocation->node_attribute != NULL) {
attribute = colocation->node_attribute;
}
dependent_value = pe_node_attribute_raw(dependent->allocated_to, attribute);
primary_value = pe_node_attribute_raw(primary->allocated_to, attribute);
if (!pcmk__str_eq(dependent_value, primary_value, pcmk__str_casei)) {
if ((colocation->score == INFINITY)
&& (colocation->dependent_role == RSC_ROLE_PROMOTED)) {
dependent->priority = -INFINITY;
}
return;
}
if ((colocation->primary_role != RSC_ROLE_UNKNOWN)
&& (colocation->primary_role != primary->next_role)) {
return;
}
if (colocation->dependent_role == RSC_ROLE_UNPROMOTED) {
score_multiplier = -1;
}
dependent->priority = pcmk__add_scores(score_multiplier * colocation->score,
dependent->priority);
}
/*!
* \internal
* \brief Find score of highest-scored node that matches colocation attribute
*
* \param[in] rsc Resource whose allowed nodes should be searched
* \param[in] attr Colocation attribute name (must not be NULL)
* \param[in] value Colocation attribute value to require
*/
static int
best_node_score_matching_attr(const pe_resource_t *rsc, const char *attr,
const char *value)
{
GHashTableIter iter;
pe_node_t *node = NULL;
int best_score = -INFINITY;
const char *best_node = NULL;
// Find best allowed node with matching attribute
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if ((node->weight > best_score) && pcmk__node_available(node, false, false)
&& pcmk__str_eq(value, pe_node_attribute_raw(node, attr), pcmk__str_casei)) {
best_score = node->weight;
best_node = node->details->uname;
}
}
if (!pcmk__str_eq(attr, CRM_ATTR_UNAME, pcmk__str_casei)) {
if (best_node == NULL) {
crm_info("No allowed node for %s matches node attribute %s=%s",
rsc->id, attr, value);
} else {
crm_info("Allowed node %s for %s had best score (%d) "
"of those matching node attribute %s=%s",
best_node, rsc->id, best_score, attr, value);
}
}
return best_score;
}
/*!
* \internal
* \brief Add resource's colocation matches to current node allocation scores
*
* For each node in a given table, if any of a given resource's allowed nodes
* have a matching value for the colocation attribute, add the highest of those
* nodes' scores to the node's score.
*
* \param[in,out] nodes Hash table of nodes with allocation scores so far
* \param[in] rsc Resource whose allowed nodes should be compared
* \param[in] attr Colocation attribute that must match (NULL for default)
* \param[in] factor Factor by which to multiply scores being added
* \param[in] only_positive Whether to add only positive scores
*/
static void
add_node_scores_matching_attr(GHashTable *nodes, const pe_resource_t *rsc,
const char *attr, float factor,
bool only_positive)
{
GHashTableIter iter;
pe_node_t *node = NULL;
if (attr == NULL) {
attr = CRM_ATTR_UNAME;
}
// Iterate through each node
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
float weight_f = 0;
int weight = 0;
int score = 0;
int new_score = 0;
score = best_node_score_matching_attr(rsc, attr,
pe_node_attribute_raw(node, attr));
if ((factor < 0) && (score < 0)) {
/* Negative preference for a node with a negative score
* should not become a positive preference.
*
* @TODO Consider filtering only if weight is -INFINITY
*/
crm_trace("%s: Filtering %d + %f * %d (double negative disallowed)",
pe__node_name(node), node->weight, factor, score);
continue;
}
if (node->weight == INFINITY_HACK) {
crm_trace("%s: Filtering %d + %f * %d (node was marked unusable)",
pe__node_name(node), node->weight, factor, score);
continue;
}
weight_f = factor * score;
// Round the number; see http://c-faq.com/fp/round.html
weight = (int) ((weight_f < 0)? (weight_f - 0.5) : (weight_f + 0.5));
/* Small factors can obliterate the small scores that are often actually
* used in configurations. If the score and factor are nonzero, ensure
* that the result is nonzero as well.
*/
if ((weight == 0) && (score != 0)) {
if (factor > 0.0) {
weight = 1;
} else if (factor < 0.0) {
weight = -1;
}
}
new_score = pcmk__add_scores(weight, node->weight);
if (only_positive && (new_score < 0) && (node->weight > 0)) {
crm_trace("%s: Filtering %d + %f * %d = %d "
"(negative disallowed, marking node unusable)",
pe__node_name(node), node->weight, factor, score,
new_score);
node->weight = INFINITY_HACK;
continue;
}
if (only_positive && (new_score < 0) && (node->weight == 0)) {
crm_trace("%s: Filtering %d + %f * %d = %d (negative disallowed)",
pe__node_name(node), node->weight, factor, score,
new_score);
continue;
}
crm_trace("%s: %d + %f * %d = %d", pe__node_name(node),
node->weight, factor, score, new_score);
node->weight = new_score;
}
}
-static inline bool
-is_nonempty_group(pe_resource_t *rsc)
+/*!
+ * \internal
+ * \brief Initialize colocated node table for a group resource
+ *
+ * \param[in] rsc Group resource being colocated with another resource
+ * \param[in] log_id Resource ID to use in log messages
+ * \param[in] nodes Nodes to update
+ * \param[in] attr Colocation attribute (NULL to use default)
+ * \param[in] factor Incorporate scores multiplied by this factor
+ * \param[in] flags Bitmask of enum pcmk__coloc_select values
+ *
+ * \return Table of node scores initialized for colocation, or NULL if resource
+ * should be ignored for colocation purposes
+ * \note The caller is responsible for freeing a non-NULL return value using
+ * g_hash_table_destroy().
+ */
+static GHashTable *
+init_group_colocated_nodes(const pe_resource_t *rsc, const char *log_id,
+ GHashTable **nodes, const char *attr, float factor,
+ uint32_t flags)
+{
+ GHashTable *work = NULL;
+ pe_resource_t *member = NULL;
+
+ // Ignore empty groups (only possible with schema validation disabled)
+ if (rsc->children == NULL) {
+ return NULL;
+ }
+
+ if (*nodes == NULL) {
+ // Only cmp_resources() passes a NULL nodes table
+ member = pe__last_group_member(rsc);
+ } else {
+ /* The first member of the group will recursively incorporate any
+ * constraints involving other members (including the group internal
+ * colocation).
+ *
+ * @TODO The indirect colocations from the dependent group's other
+ * members will be incorporated at full strength rather than by
+ * factor, so the group's combined stickiness will be treated as
+ * (factor + (#members - 1)) * stickiness. It is questionable what
+ * the right approach should be.
+ */
+ member = rsc->children->data;
+ }
+
+ pe_rsc_trace(rsc, "%s: Merging scores from group %s using member %s "
+ "(at %.6f)", log_id, rsc->id, member->id, factor);
+ work = pcmk__copy_node_table(*nodes);
+ pcmk__add_colocated_node_scores(member, log_id, &work, attr, factor, flags);
+ return work;
+}
+
+/*!
+ * \internal
+ * \brief Initialize colocated node table for a non-group resource
+ *
+ * \param[in] rsc Non-group resource being colocated with another resource
+ * \param[in] log_id Resource ID to use in log messages
+ * \param[in] nodes Nodes to update
+ * \param[in] attr Colocation attribute (NULL to use default)
+ * \param[in] factor Incorporate scores multiplied by this factor
+ * \param[in] flags Bitmask of enum pcmk__coloc_select values
+ *
+ * \return Table of node scores initialized for colocation, or NULL if resource
+ * should be ignored for colocation purposes
+ * \note The caller is responsible for freeing a non-NULL return value using
+ * g_hash_table_destroy().
+ */
+static GHashTable *
+init_nongroup_colocated_nodes(const pe_resource_t *rsc, const char *log_id,
+ GHashTable **nodes, const char *attr,
+ float factor, uint32_t flags)
{
- return rsc && (rsc->variant == pe_group) && (rsc->children != NULL);
+ GHashTable *work = NULL;
+
+ if (*nodes == NULL) {
+ /* Only cmp_resources() passes a NULL nodes table, which indicates we
+ * should initialize it with the resource's allowed node scores.
+ */
+ work = pcmk__copy_node_table(rsc->allowed_nodes);
+
+ } else {
+ pe_rsc_trace(rsc, "%s: Merging scores from %s (at %.6f)",
+ log_id, rsc->id, factor);
+ work = pcmk__copy_node_table(*nodes);
+ add_node_scores_matching_attr(work, rsc, attr, factor,
+ pcmk_is_set(flags,
+ pcmk__coloc_select_nonnegative));
+ }
+ return work;
}
/*!
* \internal
* \brief Update nodes with scores of colocated resources' nodes
*
* Given a table of nodes and a resource, update the nodes' scores with the
* scores of the best nodes matching the attribute used for each of the
* resource's relevant colocations.
*
* \param[in,out] rsc Resource to check colocations for
* \param[in] log_id Resource ID to use in logs (if NULL, use \p rsc ID)
* \param[in,out] nodes Nodes to update
* \param[in] attr Colocation attribute (NULL to use default)
* \param[in] factor Incorporate scores multiplied by this factor
* \param[in] flags Bitmask of enum pcmk__coloc_select values
*
* \note The caller remains responsible for freeing \p *nodes.
*/
void
pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id,
GHashTable **nodes, const char *attr,
float factor, uint32_t flags)
{
GHashTable *work = NULL;
CRM_CHECK((rsc != NULL) && (nodes != NULL), return);
if (log_id == NULL) {
log_id = rsc->id;
}
// Avoid infinite recursion
if (pcmk_is_set(rsc->flags, pe_rsc_merging)) {
pe_rsc_info(rsc, "%s: Breaking dependency loop at %s",
log_id, rsc->id);
return;
}
pe__set_resource_flags(rsc, pe_rsc_merging);
- if (*nodes == NULL) {
- /* Only cmp_resources() passes a NULL nodes table, which indicates we
- * should initialize it with the resource's allowed node scores.
- */
- if (is_nonempty_group(rsc)) {
- GList *last = g_list_last(rsc->children);
- pe_resource_t *last_rsc = last->data;
-
- pe_rsc_trace(rsc, "%s: Merging scores from group %s "
- "using last member %s (at %.6f)",
- log_id, rsc->id, last_rsc->id, factor);
- last_rsc->cmds->add_colocated_node_scores(last_rsc, log_id,
- &work, attr, factor,
- flags);
- } else {
- work = pcmk__copy_node_table(rsc->allowed_nodes);
- }
-
- } else if (is_nonempty_group(rsc)) {
- pe_resource_t *member = rsc->children->data;
-
- /* The first member of the group will recursively incorporate any
- * constraints involving other members (including the group internal
- * colocation).
- *
- * @TODO The indirect colocations from the dependent group's other
- * members will be incorporated at full strength rather than by
- * factor, so the group's combined stickiness will be treated as
- * (factor + (#members - 1)) * stickiness. It is questionable what
- * the right approach should be.
- */
- pe_rsc_trace(rsc, "%s: Merging scores from first member of group %s "
- "(at %.6f)", log_id, rsc->id, factor);
- work = pcmk__copy_node_table(*nodes);
- member->cmds->add_colocated_node_scores(member, log_id, &work, attr,
- factor, flags);
-
+ if (rsc->variant == pe_group) {
+ work = init_group_colocated_nodes(rsc, log_id, nodes, attr, factor,
+ flags);
} else {
- pe_rsc_trace(rsc, "%s: Merging scores from %s (at %.6f)",
- log_id, rsc->id, factor);
- work = pcmk__copy_node_table(*nodes);
- add_node_scores_matching_attr(work, rsc, attr, factor,
- pcmk_is_set(flags,
- pcmk__coloc_select_nonnegative));
+ work = init_nongroup_colocated_nodes(rsc, log_id, nodes, attr, factor,
+ flags);
+ }
+ if (work == NULL) {
+ pe__clear_resource_flags(rsc, pe_rsc_merging);
+ return;
}
if (pcmk__any_node_available(work)) {
GList *gIter = NULL;
float multiplier = (factor < 0.0)? -1.0 : 1.0;
if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) {
gIter = rsc->rsc_cons;
pe_rsc_trace(rsc,
"Checking additional %d optional '%s with' constraints",
g_list_length(gIter), rsc->id);
- } else if (is_nonempty_group(rsc)) {
- pe_resource_t *last_rsc = g_list_last(rsc->children)->data;
+ } else if (rsc->variant == pe_group) {
+ pe_resource_t *last_rsc = pe__last_group_member(rsc);
gIter = last_rsc->rsc_cons_lhs;
pe_rsc_trace(rsc, "Checking additional %d optional 'with group %s' "
"constraints using last member %s",
g_list_length(gIter), rsc->id, last_rsc->id);
} else {
gIter = rsc->rsc_cons_lhs;
pe_rsc_trace(rsc,
"Checking additional %d optional 'with %s' constraints",
g_list_length(gIter), rsc->id);
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *other = NULL;
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) {
other = constraint->primary;
} else if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
} else {
other = constraint->dependent;
}
pe_rsc_trace(rsc, "Optionally merging score of '%s' constraint (%s with %s)",
constraint->id, constraint->dependent->id,
constraint->primary->id);
factor = multiplier * constraint->score / (float) INFINITY;
pcmk__add_colocated_node_scores(other, log_id, &work,
constraint->node_attribute, factor,
flags|pcmk__coloc_select_active);
pe__show_node_weights(true, NULL, log_id, work, rsc->cluster);
}
} else if (pcmk_is_set(flags, pcmk__coloc_select_active)) {
pe_rsc_info(rsc, "%s: Rolling back optional scores from %s",
log_id, rsc->id);
g_hash_table_destroy(work);
pe__clear_resource_flags(rsc, pe_rsc_merging);
return;
}
if (pcmk_is_set(flags, pcmk__coloc_select_nonnegative)) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, work);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node->weight == INFINITY_HACK) {
node->weight = 1;
}
}
}
if (*nodes != NULL) {
g_hash_table_destroy(*nodes);
}
*nodes = work;
pe__clear_resource_flags(rsc, pe_rsc_merging);
}
diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c
index dc33c9cc98..6bbe2d1b88 100644
--- a/lib/pacemaker/pcmk_sched_group.c
+++ b/lib/pacemaker/pcmk_sched_group.c
@@ -1,804 +1,763 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Expand a group's colocations to its members
*
* \param[in,out] rsc Group resource
*/
static void
expand_group_colocations(pe_resource_t *rsc)
{
pe_resource_t *member = NULL;
bool any_unmanaged = false;
+ GList *item = NULL;
+
+ if (rsc->children == NULL) {
+ return;
+ }
// Treat "group with R" colocations as "first member with R"
member = (pe_resource_t *) rsc->children->data;
- member->rsc_cons = g_list_concat(member->rsc_cons, rsc->rsc_cons);
+ for (item = rsc->rsc_cons; item != NULL; item = item->next) {
+ pcmk__add_this_with(member, (pcmk__colocation_t *) (item->data));
+ }
/* The above works for the whole group because each group member is
* colocated with the previous one.
*
* However, there is a special case when a group has a mandatory colocation
* with a resource that can't start. In that case,
* pcmk__block_colocated_starts() will ensure that dependent resources in
* mandatory colocations (i.e. the first member for groups) can't start
* either. But if any group member is unmanaged and already started, the
* internal group colocations are no longer sufficient to make that apply to
* later members.
*
* To handle that case, add mandatory colocations to each member after the
* first.
*/
any_unmanaged = !pcmk_is_set(member->flags, pe_rsc_managed);
- for (GList *item = rsc->children->next; item != NULL; item = item->next) {
+ for (item = rsc->children->next; item != NULL; item = item->next) {
member = item->data;
if (any_unmanaged) {
for (GList *cons_iter = rsc->rsc_cons; cons_iter != NULL;
cons_iter = cons_iter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) cons_iter->data;
if (constraint->score == INFINITY) {
- member->rsc_cons = g_list_prepend(member->rsc_cons, constraint);
+ pcmk__add_this_with(member, constraint);
}
}
} else if (!pcmk_is_set(member->flags, pe_rsc_managed)) {
any_unmanaged = true;
}
}
+ g_list_free(rsc->rsc_cons);
rsc->rsc_cons = NULL;
// Treat "R with group" colocations as "R with last member"
member = pe__last_group_member(rsc);
- member->rsc_cons_lhs = g_list_concat(member->rsc_cons_lhs,
- rsc->rsc_cons_lhs);
+ for (item = rsc->rsc_cons_lhs; item != NULL; item = item->next) {
+ pcmk__add_with_this(member, (pcmk__colocation_t *) (item->data));
+ }
+ g_list_free(rsc->rsc_cons_lhs);
rsc->rsc_cons_lhs = NULL;
}
/*!
* \internal
* \brief Assign a group resource to a node
*
* \param[in,out] rsc Group resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*/
pe_node_t *
pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer)
{
pe_node_t *first_assigned_node = NULL;
pe_resource_t *first_member = NULL;
CRM_ASSERT(rsc != NULL);
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->allocated_to; // Assignment already done
}
if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Assignment dependency loop detected involving %s",
rsc->id);
return NULL;
}
if (rsc->children == NULL) {
// No members to assign
pe__clear_resource_flags(rsc, pe_rsc_provisional);
return NULL;
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
first_member = (pe_resource_t *) rsc->children->data;
rsc->role = first_member->role;
expand_group_colocations(rsc);
pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
pe_node_t *node = NULL;
pe_rsc_trace(rsc, "Assigning group %s member %s",
rsc->id, member->id);
node = member->cmds->assign(member, prefer);
if (first_assigned_node == NULL) {
first_assigned_node = node;
}
}
pe__set_next_role(rsc, first_member->next_role, "first group member");
pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional);
if (!pe__group_flag_is_set(rsc, pe__group_colocated)) {
return NULL;
}
return first_assigned_node;
}
/*!
* \internal
* \brief Create a pseudo-operation for a group as an ordering point
*
* \param[in,out] group Group resource to create action for
* \param[in] action Action name
*
* \return Newly created pseudo-operation
*/
static pe_action_t *
create_group_pseudo_op(pe_resource_t *group, const char *action)
{
pe_action_t *op = custom_action(group, pcmk__op_key(group->id, action, 0),
action, NULL, TRUE, TRUE, group->cluster);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
return op;
}
/*!
* \internal
* \brief Create all actions needed for a given group resource
*
* \param[in,out] rsc Group resource to create actions for
*/
void
pcmk__group_create_actions(pe_resource_t *rsc)
{
CRM_ASSERT(rsc != NULL);
pe_rsc_trace(rsc, "Creating actions for group %s", rsc->id);
// Create actions for individual group members
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
member->cmds->create_actions(member);
}
// Create pseudo-actions for group itself to serve as ordering points
create_group_pseudo_op(rsc, RSC_START);
create_group_pseudo_op(rsc, RSC_STARTED);
create_group_pseudo_op(rsc, RSC_STOP);
create_group_pseudo_op(rsc, RSC_STOPPED);
if (crm_is_true(g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTABLE))) {
create_group_pseudo_op(rsc, RSC_DEMOTE);
create_group_pseudo_op(rsc, RSC_DEMOTED);
create_group_pseudo_op(rsc, RSC_PROMOTE);
create_group_pseudo_op(rsc, RSC_PROMOTED);
}
}
// User data for member_internal_constraints()
struct member_data {
// These could be derived from member but this avoids some function calls
bool ordered;
bool colocated;
bool promotable;
pe_resource_t *last_active;
pe_resource_t *previous_member;
};
/*!
* \internal
* \brief Create implicit constraints needed for a group member
*
* \param[in,out] data Group member to create implicit constraints for
* \param[in,out] user_data Group member to create implicit constraints for
*/
static void
member_internal_constraints(gpointer data, gpointer user_data)
{
pe_resource_t *member = (pe_resource_t *) data;
struct member_data *member_data = (struct member_data *) user_data;
// For ordering demote vs demote or stop vs stop
uint32_t down_flags = pe_order_implies_first_printed;
// For ordering demote vs demoted or stop vs stopped
uint32_t post_down_flags = pe_order_implies_then_printed;
// Create the individual member's implicit constraints
member->cmds->internal_constraints(member);
if (member_data->previous_member == NULL) {
// This is first member
if (member_data->ordered) {
pe__set_order_flags(down_flags, pe_order_optional);
post_down_flags = pe_order_implies_then;
}
} else if (member_data->colocated) {
// Colocate this member with the previous one
pcmk__new_colocation("group:internal_colocation", NULL, INFINITY,
member, member_data->previous_member, NULL, NULL,
pcmk_is_set(member->flags, pe_rsc_critical),
member->cluster);
}
if (member_data->promotable) {
// Demote group -> demote member -> group is demoted
pcmk__order_resource_actions(member->parent, RSC_DEMOTE,
member, RSC_DEMOTE, down_flags);
pcmk__order_resource_actions(member, RSC_DEMOTE,
member->parent, RSC_DEMOTED,
post_down_flags);
// Promote group -> promote member -> group is promoted
pcmk__order_resource_actions(member, RSC_PROMOTE,
member->parent, RSC_PROMOTED,
pe_order_runnable_left
|pe_order_implies_then
|pe_order_implies_then_printed);
pcmk__order_resource_actions(member->parent, RSC_PROMOTE,
member, RSC_PROMOTE,
pe_order_implies_first_printed);
}
// Stop group -> stop member -> group is stopped
pcmk__order_stops(member->parent, member, down_flags);
pcmk__order_resource_actions(member, RSC_STOP, member->parent, RSC_STOPPED,
post_down_flags);
// Start group -> start member -> group is started
pcmk__order_starts(member->parent, member, pe_order_implies_first_printed);
pcmk__order_resource_actions(member, RSC_START, member->parent, RSC_STARTED,
pe_order_runnable_left
|pe_order_implies_then
|pe_order_implies_then_printed);
if (!member_data->ordered) {
pcmk__order_starts(member->parent, member,
pe_order_implies_then
|pe_order_runnable_left
|pe_order_implies_first_printed);
if (member_data->promotable) {
pcmk__order_resource_actions(member->parent, RSC_PROMOTE, member,
RSC_PROMOTE,
pe_order_implies_then
|pe_order_runnable_left
|pe_order_implies_first_printed);
}
} else if (member_data->previous_member == NULL) {
pcmk__order_starts(member->parent, member, pe_order_none);
if (member_data->promotable) {
pcmk__order_resource_actions(member->parent, RSC_PROMOTE, member,
RSC_PROMOTE, pe_order_none);
}
} else {
// Order this member relative to the previous one
pcmk__order_starts(member_data->previous_member, member,
pe_order_implies_then|pe_order_runnable_left);
pcmk__order_stops(member, member_data->previous_member,
pe_order_optional|pe_order_restart);
if (member_data->promotable) {
pcmk__order_resource_actions(member_data->previous_member,
RSC_PROMOTE, member, RSC_PROMOTE,
pe_order_implies_then
|pe_order_runnable_left);
pcmk__order_resource_actions(member, RSC_DEMOTE,
member_data->previous_member,
RSC_DEMOTE, pe_order_optional);
}
}
// Make sure partially active groups shut down in sequence
if (member->running_on != NULL) {
if (member_data->ordered && (member_data->previous_member != NULL)
&& (member_data->previous_member->running_on == NULL)
&& (member_data->last_active != NULL)
&& (member_data->last_active->running_on != NULL)) {
pcmk__order_stops(member, member_data->last_active, pe_order_optional);
}
member_data->last_active = member;
}
member_data->previous_member = member;
}
/*!
* \internal
* \brief Create implicit constraints needed for a group resource
*
* \param[in,out] rsc Group resource to create implicit constraints for
*/
void
pcmk__group_internal_constraints(pe_resource_t *rsc)
{
struct member_data member_data = { false, };
CRM_ASSERT(rsc != NULL);
/* Order group pseudo-actions relative to each other for restarting:
* stop group -> group is stopped -> start group -> group is started
*/
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_runnable_left);
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
pe_order_optional);
pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED,
pe_order_runnable_left);
member_data.ordered = pe__group_flag_is_set(rsc, pe__group_ordered);
member_data.colocated = pe__group_flag_is_set(rsc, pe__group_colocated);
member_data.promotable = pcmk_is_set(uber_parent(rsc)->flags, pe_rsc_promotable);
g_list_foreach(rsc->children, member_internal_constraints, &member_data);
}
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint for a group with some other resource, apply the
* score to the dependent's allowed node weights (if we are still placing
* resources) or priority (if we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent group resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
*/
static void
colocate_group_with(pe_resource_t *dependent, const pe_resource_t *primary,
const pcmk__colocation_t *colocation)
{
pe_resource_t *member = NULL;
+ if (dependent->children == NULL) {
+ return;
+ }
+
pe_rsc_trace(primary, "Processing %s (group %s with %s) for dependent",
colocation->id, dependent->id, primary->id);
if (pe__group_flag_is_set(dependent, pe__group_colocated)) {
// Colocate first member (internal colocations will handle the rest)
member = (pe_resource_t *) dependent->children->data;
member->cmds->apply_coloc_score(member, primary, colocation, true);
return;
}
if (colocation->score >= INFINITY) {
pcmk__config_err("%s: Cannot perform mandatory colocation between "
"non-colocated group and %s",
dependent->id, primary->id);
return;
}
// Colocate each member individually
for (GList *iter = dependent->children; iter != NULL; iter = iter->next) {
member = (pe_resource_t *) iter->data;
member->cmds->apply_coloc_score(member, primary, colocation, true);
}
}
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint for some other resource with a group, apply the
* score to the dependent's allowed node weights (if we are still placing
* resources) or priority (if we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary group resource in colocation
* \param[in] colocation Colocation constraint to apply
*/
static void
colocate_with_group(pe_resource_t *dependent, const pe_resource_t *primary,
const pcmk__colocation_t *colocation)
{
pe_resource_t *member = NULL;
pe_rsc_trace(primary,
"Processing colocation %s (%s with group %s) for primary",
colocation->id, dependent->id, primary->id);
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
return;
}
if (pe__group_flag_is_set(primary, pe__group_colocated)) {
if (colocation->score >= INFINITY) {
/* For mandatory colocations, the entire group must be assignable
* (and in the specified role if any), so apply the colocation based
* on the last member.
*/
member = pe__last_group_member(primary);
- } else {
+ } else if (primary->children != NULL) {
/* For optional colocations, whether the group is partially or fully
* up doesn't matter, so apply the colocation based on the first
* member.
*/
member = (pe_resource_t *) primary->children->data;
}
if (member == NULL) {
return; // Nothing to colocate with
}
member->cmds->apply_coloc_score(dependent, member, colocation, false);
return;
}
if (colocation->score >= INFINITY) {
pcmk__config_err("%s: Cannot perform mandatory colocation with"
" non-colocated group %s",
dependent->id, primary->id);
return;
}
// Colocate dependent with each member individually
for (GList *iter = primary->children; iter != NULL; iter = iter->next) {
member = (pe_resource_t *) iter->data;
member->cmds->apply_coloc_score(dependent, member, colocation, false);
}
}
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__group_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
CRM_ASSERT((dependent != NULL) && (primary != NULL)
&& (colocation != NULL));
if (for_dependent) {
colocate_group_with(dependent, primary, colocation);
} else {
// Method should only be called for primitive dependents
CRM_ASSERT(dependent->variant == pe_native);
colocate_with_group(dependent, primary, colocation);
}
}
/*!
* \internal
* \brief Return action flags for a given group resource action
*
* \param[in,out] action Group action to get flags for
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
*/
enum pe_action_flags
pcmk__group_action_flags(pe_action_t *action, const pe_node_t *node)
{
// Default flags for a group action
enum pe_action_flags flags = pe_action_optional
|pe_action_runnable
|pe_action_pseudo;
CRM_ASSERT(action != NULL);
// Update flags considering each member's own flags for same action
for (GList *iter = action->rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
// Check whether member has the same action
enum action_tasks task = get_complex_task(member, action->task, TRUE);
const char *task_s = task2text(task);
pe_action_t *member_action = find_first_action(member->actions, NULL,
task_s, node);
if (member_action != NULL) {
enum pe_action_flags member_flags;
member_flags = member->cmds->action_flags(member_action, node);
// Group action is mandatory if any member action is
if (pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(member_flags, pe_action_optional)) {
pe_rsc_trace(action->rsc, "%s is mandatory because %s is",
action->uuid, member_action->uuid);
pe__clear_raw_action_flags(flags, "group action",
pe_action_optional);
pe__clear_action_flags(action, pe_action_optional);
}
// Group action is unrunnable if any member action is
if (!pcmk__str_eq(task_s, action->task, pcmk__str_none)
&& pcmk_is_set(flags, pe_action_runnable)
&& !pcmk_is_set(member_flags, pe_action_runnable)) {
pe_rsc_trace(action->rsc, "%s is unrunnable because %s is",
action->uuid, member_action->uuid);
pe__clear_raw_action_flags(flags, "group action",
pe_action_runnable);
pe__clear_action_flags(action, pe_action_runnable);
}
/* Group (pseudo-)actions other than stop or demote are unrunnable
* unless every member will do it.
*/
} else if ((task != stop_rsc) && (task != action_demote)) {
pe_rsc_trace(action->rsc,
"%s is not runnable because %s will not %s",
action->uuid, member->id, task_s);
pe__clear_raw_action_flags(flags, "group action",
pe_action_runnable);
}
}
return flags;
}
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions'
* flags (and runnable_before members if appropriate) as appropriate for the
* ordering. In some cases, the ordering could be disabled as well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t
pcmk__group_update_ordered_actions(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
uint32_t changed = pcmk__updated_none;
CRM_ASSERT((first != NULL) && (then != NULL) && (data_set != NULL));
// Group method can be called only for group action as "then" action
CRM_ASSERT(then->rsc != NULL);
// Update the actions for the group itself
changed |= pcmk__update_ordered_actions(first, then, node, flags, filter,
type, data_set);
// Update the actions for each group member
for (GList *iter = then->rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
pe_action_t *member_action = find_first_action(member->actions, NULL,
then->task, node);
if (member_action != NULL) {
changed |= member->cmds->update_ordered_actions(first,
member_action, node,
flags, filter, type,
data_set);
}
}
return changed;
}
/*!
* \internal
* \brief Apply a location constraint to a group's allowed node scores
*
* \param[in,out] rsc Group resource to apply constraint to
* \param[in,out] location Location constraint to apply
*/
void
pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location)
{
GList *node_list_orig = NULL;
GList *node_list_copy = NULL;
bool reset_scores = true;
CRM_ASSERT((rsc != NULL) && (location != NULL));
node_list_orig = location->node_list_rh;
node_list_copy = pcmk__copy_node_list(node_list_orig, true);
reset_scores = pe__group_flag_is_set(rsc, pe__group_colocated);
// Apply the constraint for the group itself (updates node scores)
pcmk__apply_location(rsc, location);
// Apply the constraint for each member
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
member->cmds->apply_location(member, location);
if (reset_scores) {
/* The first member of colocated groups needs to use the original
* node scores, but subsequent members should work on a copy, since
* the first member's scores already incorporate theirs.
*/
reset_scores = false;
location->node_list_rh = node_list_copy;
}
}
location->node_list_rh = node_list_orig;
g_list_free_full(node_list_copy, free);
}
-/*!
- * \internal
- * \brief Update nodes with scores of colocated resources' nodes
- *
- * Given a table of nodes and a group resource, update the nodes' scores with
- * the scores of the best nodes matching the attribute used for each of the
- * group's relevant colocations.
- *
- * \param[in,out] rsc Group resource to check colocations for
- * \param[in] log_id Resource ID to use in log messages
- * \param[in,out] nodes Nodes to update
- * \param[in] attr Colocation attribute (NULL to use default)
- * \param[in] factor Incorporate scores multiplied by this factor
- * \param[in] flags Bitmask of enum pcmk__coloc_select values
- *
- * \note The caller remains responsible for freeing \p *nodes.
- */
-void
-pcmk__group_add_colocated_node_scores(pe_resource_t *rsc, const char *log_id,
- GHashTable **nodes, const char *attr,
- float factor, uint32_t flags)
-{
- pe_resource_t *member = NULL;
-
- CRM_ASSERT((rsc != NULL) && (nodes != NULL));
-
- if (log_id == NULL) {
- log_id = rsc->id;
- }
-
- if (pcmk_is_set(rsc->flags, pe_rsc_merging)) {
- pe_rsc_info(rsc, "Breaking colocation dependency loop with %s at %s",
- rsc->id, log_id);
- return;
- }
- pe__set_resource_flags(rsc, pe_rsc_merging);
-
- // Add first member's colocation scores (which incorporate other members')
- member = (pe_resource_t *) rsc->children->data;
- member->cmds->add_colocated_node_scores(member, log_id, nodes, attr,
- factor, flags);
-
- // Add scores of dependent in each colocation with this group
- for (GList *iter = rsc->rsc_cons_lhs; iter != NULL; iter = iter->next) {
- pcmk__colocation_t *colocation = (pcmk__colocation_t *) iter->data;
-
- pcmk__add_colocated_node_scores(colocation->dependent, rsc->id, nodes,
- colocation->node_attribute,
- colocation->score / (float) INFINITY,
- flags);
- }
-
- pe__clear_resource_flags(rsc, pe_rsc_merging);
-}
-
// Group implementation of resource_alloc_functions_t:colocated_resources()
GList *
pcmk__group_colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs)
{
pe_resource_t *member = NULL;
CRM_ASSERT(rsc != NULL);
if (orig_rsc == NULL) {
orig_rsc = rsc;
}
if (pe__group_flag_is_set(rsc, pe__group_colocated)
|| pe_rsc_is_clone(rsc->parent)) {
/* This group has colocated members and/or is cloned -- either way,
* add every child's colocated resources to the list.
*/
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
member = (pe_resource_t *) iter->data;
colocated_rscs = member->cmds->colocated_resources(member, orig_rsc,
colocated_rscs);
}
} else if (rsc->children != NULL) {
/* This group's members are not colocated, and the group is not cloned,
* so just add the first child's colocations to the list.
*/
member = (pe_resource_t *) rsc->children->data;
colocated_rscs = member->cmds->colocated_resources(member, orig_rsc,
colocated_rscs);
}
// Now consider colocations where the group itself is specified
colocated_rscs = pcmk__colocated_resources(rsc, orig_rsc, colocated_rscs);
return colocated_rscs;
}
// Group implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__group_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
pe_resource_t *member = NULL;
CRM_ASSERT((rsc != NULL) && (orig_rsc != NULL) && (utilization != NULL));
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return;
}
pe_rsc_trace(orig_rsc, "%s: Adding group %s as colocated utilization",
orig_rsc->id, rsc->id);
if (pe__group_flag_is_set(rsc, pe__group_colocated)
|| pe_rsc_is_clone(rsc->parent)) {
// Every group member will be on same node, so sum all members
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
member = (pe_resource_t *) iter->data;
if (pcmk_is_set(member->flags, pe_rsc_provisional)
&& (g_list_find(all_rscs, member) == NULL)) {
member->cmds->add_utilization(member, orig_rsc, all_rscs,
utilization);
}
}
- } else {
+ } else if (rsc->children != NULL) {
// Just add first member's utilization
member = (pe_resource_t *) rsc->children->data;
if ((member != NULL)
&& pcmk_is_set(member->flags, pe_rsc_provisional)
&& (g_list_find(all_rscs, member) == NULL)) {
member->cmds->add_utilization(member, orig_rsc, all_rscs,
utilization);
}
}
}
// Group implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__group_shutdown_lock(pe_resource_t *rsc)
{
CRM_ASSERT(rsc != NULL);
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
member->cmds->shutdown_lock(member);
}
}
diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c
index f21a76aa17..0b822b0891 100644
--- a/lib/pacemaker/pcmk_sched_primitive.c
+++ b/lib/pacemaker/pcmk_sched_primitive.c
@@ -1,1522 +1,1521 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include "libpacemaker_private.h"
static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void promote_resource(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static void assert_role_error(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the immediate next role when transitioning from one role
* to a target role. For example, when going from Stopped to Promoted, the
* next role is Unpromoted, because the resource must be started before it
* can be promoted. The current state then becomes Started, which is fed
* into this array again, giving a next role of Promoted.
*
* Current role Immediate next role Final target role
* ------------ ------------------- -----------------
*/
/* Unknown */ { RSC_ROLE_UNKNOWN, /* Unknown */
RSC_ROLE_STOPPED, /* Stopped */
RSC_ROLE_STOPPED, /* Started */
RSC_ROLE_STOPPED, /* Unpromoted */
RSC_ROLE_STOPPED, /* Promoted */
},
/* Stopped */ { RSC_ROLE_STOPPED, /* Unknown */
RSC_ROLE_STOPPED, /* Stopped */
RSC_ROLE_STARTED, /* Started */
RSC_ROLE_UNPROMOTED, /* Unpromoted */
RSC_ROLE_UNPROMOTED, /* Promoted */
},
/* Started */ { RSC_ROLE_STOPPED, /* Unknown */
RSC_ROLE_STOPPED, /* Stopped */
RSC_ROLE_STARTED, /* Started */
RSC_ROLE_UNPROMOTED, /* Unpromoted */
RSC_ROLE_PROMOTED, /* Promoted */
},
/* Unpromoted */ { RSC_ROLE_STOPPED, /* Unknown */
RSC_ROLE_STOPPED, /* Stopped */
RSC_ROLE_STOPPED, /* Started */
RSC_ROLE_UNPROMOTED, /* Unpromoted */
RSC_ROLE_PROMOTED, /* Promoted */
},
/* Promoted */ { RSC_ROLE_STOPPED, /* Unknown */
RSC_ROLE_UNPROMOTED, /* Stopped */
RSC_ROLE_UNPROMOTED, /* Started */
RSC_ROLE_UNPROMOTED, /* Unpromoted */
RSC_ROLE_PROMOTED, /* Promoted */
},
};
/*!
* \internal
* \brief Function to schedule actions needed for a role change
*
* \param[in,out] rsc Resource whose role is changing
* \param[in] node Node where resource will be in its next role
* \param[in] optional Whether scheduled actions should be optional
*/
typedef void (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the function needed to transition directly from one role
* to another. NULL indicates that nothing is needed.
*
* Current role Transition function Next role
* ------------ ------------------- ----------
*/
/* Unknown */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
assert_role_error, /* Started */
assert_role_error, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Stopped */ { assert_role_error, /* Unknown */
NULL, /* Stopped */
start_resource, /* Started */
start_resource, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Started */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
NULL, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Unpromoted */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
stop_resource, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Promoted */ { assert_role_error, /* Unknown */
demote_resource, /* Stopped */
demote_resource, /* Started */
demote_resource, /* Unpromoted */
NULL, /* Promoted */
},
};
/*!
* \internal
* \brief Get a list of a resource's allowed nodes sorted by node weight
*
* \param[in] rsc Resource to check
*
* \return List of allowed nodes sorted by node weight
*/
static GList *
sorted_allowed_nodes(const pe_resource_t *rsc)
{
if (rsc->allowed_nodes != NULL) {
GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
if (nodes != NULL) {
return pcmk__sort_nodes(nodes, pe__current_node(rsc));
}
}
return NULL;
}
/*!
* \internal
* \brief Assign a resource to its best allowed node, if possible
*
* \param[in,out] rsc Resource to choose a node for
* \param[in] prefer If not NULL, prefer this node when all else equal
*
* \return true if \p rsc could be assigned to a node, otherwise false
*/
static bool
assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer)
{
GList *nodes = NULL;
pe_node_t *chosen = NULL;
pe_node_t *best = NULL;
bool result = false;
const pe_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
if (prefer == NULL) {
prefer = most_free_node;
}
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
// We've already finished assignment of resources to nodes
return rsc->allocated_to != NULL;
}
// Sort allowed nodes by weight
nodes = sorted_allowed_nodes(rsc);
if (nodes != NULL) {
best = (pe_node_t *) nodes->data; // First node has best score
}
if ((prefer != NULL) && (nodes != NULL)) {
// Get the allowed node version of prefer
chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (chosen == NULL) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
pe__node_name(prefer), rsc->id);
/* Favor the preferred node as long as its weight is at least as good as
* the best allowed node's.
*
* An alternative would be to favor the preferred node even if the best
* node is better, when the best node's weight is less than INFINITY.
*/
} else if (chosen->weight < best->weight) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else if (!pcmk__node_available(chosen, true, false)) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else {
pe_rsc_trace(rsc,
"Chose preferred node %s for %s (ignoring %d candidates)",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
}
if ((chosen == NULL) && (best != NULL)) {
/* Either there is no preferred node, or the preferred node is not
* suitable, but another node is allowed to run the resource.
*/
chosen = best;
if (!pe_rsc_is_unique_clone(rsc->parent)
&& (chosen->weight > 0) // Zero not acceptable
&& pcmk__node_available(chosen, false, false)) {
/* If the resource is already running on a node, prefer that node if
* it is just as good as the chosen node.
*
* We don't do this for unique clone instances, because
* distribute_children() has already assigned instances to their
* running nodes when appropriate, and if we get here, we don't want
* remaining unassigned instances to prefer a node that's already
* running another instance.
*/
pe_node_t *running = pe__current_node(rsc);
if (running == NULL) {
// Nothing to do
} else if (!pcmk__node_available(running, true, false)) {
pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources",
rsc->id, pe__node_name(running));
} else {
int nodes_with_best_score = 1;
for (GList *iter = nodes->next; iter; iter = iter->next) {
pe_node_t *allowed = (pe_node_t *) iter->data;
if (allowed->weight != chosen->weight) {
// The nodes are sorted by weight, so no more are equal
break;
}
if (pe__same_node(allowed, running)) {
// Scores are equal, so prefer the current node
chosen = allowed;
}
nodes_with_best_score++;
}
if (nodes_with_best_score > 1) {
do_crm_log(((chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO),
"Chose %s for %s from %d nodes with score %s",
pe__node_name(chosen), rsc->id,
nodes_with_best_score,
pcmk_readable_score(chosen->weight));
}
}
}
pe_rsc_trace(rsc, "Chose %s for %s from %d candidates",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
result = pcmk__finalize_assignment(rsc, chosen, false);
g_list_free(nodes);
return result;
}
/*!
* \internal
* \brief Apply a "this with" colocation to a node's allowed node scores
*
* \param[in,out] data Colocation to apply
* \param[in,out] user_data Resource being assigned
*/
static void
apply_this_with(gpointer data, gpointer user_data)
{
pcmk__colocation_t *colocation = (pcmk__colocation_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
GHashTable *archive = NULL;
pe_resource_t *other = colocation->primary;
// In certain cases, we will need to revert the node scores
if ((colocation->dependent_role >= RSC_ROLE_PROMOTED)
|| ((colocation->score < 0) && (colocation->score > -INFINITY))) {
archive = pcmk__copy_node_table(rsc->allowed_nodes);
}
pe_rsc_trace(rsc,
"%s: Assigning colocation %s primary %s first"
"(score=%d role=%s)",
rsc->id, colocation->id, other->id,
colocation->score, role2text(colocation->dependent_role));
other->cmds->assign(other, NULL);
// Apply the colocation score to this resource's allowed node scores
rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
if ((archive != NULL)
&& !pcmk__any_node_available(rsc->allowed_nodes)) {
pe_rsc_info(rsc,
"%s: Reverting scores from colocation with %s "
"because no nodes allowed",
rsc->id, other->id);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = archive;
archive = NULL;
}
if (archive != NULL) {
g_hash_table_destroy(archive);
}
}
/*!
* \internal
* \brief Apply a "with this" colocation to a node's allowed node scores
*
* \param[in,out] data Colocation to apply
* \param[in,out] user_data Resource being assigned
*/
static void
apply_with_this(void *data, void *user_data)
{
pcmk__colocation_t *colocation = (pcmk__colocation_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
pe_resource_t *other = colocation->dependent;
const float factor = colocation->score / (float) INFINITY;
if (!pcmk__colocation_has_influence(colocation, NULL)) {
return;
}
pe_rsc_trace(rsc,
"%s: Incorporating attenuated %s assignment scores due "
"to colocation %s", rsc->id, other->id, colocation->id);
- other->cmds->add_colocated_node_scores(other, rsc->id,
- &rsc->allowed_nodes,
- colocation->node_attribute,
- factor, pcmk__coloc_select_active);
+ pcmk__add_colocated_node_scores(other, rsc->id, &rsc->allowed_nodes,
+ colocation->node_attribute, factor,
+ pcmk__coloc_select_active);
}
/*!
* \internal
* \brief Update a Pacemaker Remote node once its connection has been assigned
*
* \param[in] connection Connection resource that has been assigned
*/
static void
remote_connection_assigned(const pe_resource_t *connection)
{
pe_node_t *remote_node = pe_find_node(connection->cluster->nodes,
connection->id);
CRM_CHECK(remote_node != NULL, return);
if ((connection->allocated_to != NULL)
&& (connection->next_role != RSC_ROLE_STOPPED)) {
crm_trace("Pacemaker Remote node %s will be online",
remote_node->details->id);
remote_node->details->online = TRUE;
if (remote_node->details->unseen) {
// Avoid unnecessary fence, since we will attempt connection
remote_node->details->unclean = FALSE;
}
} else {
crm_trace("Pacemaker Remote node %s will be shut down "
"(%sassigned connection's next role is %s)",
remote_node->details->id,
((connection->allocated_to == NULL)? "un" : ""),
role2text(connection->next_role));
remote_node->details->shutdown = TRUE;
}
}
/*!
* \internal
* \brief Assign a primitive resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*/
pe_node_t *
pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer)
{
CRM_ASSERT(rsc != NULL);
// Never assign a child without parent being assigned first
if ((rsc->parent != NULL)
&& !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "%s: Assigning parent %s first",
rsc->id, rsc->parent->id);
rsc->parent->cmds->assign(rsc->parent, prefer);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->allocated_to; // Assignment has already been done
}
// Ensure we detect assignment loops
if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
return NULL;
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
pe__show_node_weights(true, rsc, "Pre-assignment", rsc->allowed_nodes,
rsc->cluster);
g_list_foreach(rsc->rsc_cons, apply_this_with, rsc);
pe__show_node_weights(true, rsc, "Post-this-with", rsc->allowed_nodes,
rsc->cluster);
g_list_foreach(rsc->rsc_cons_lhs, apply_with_this, rsc);
if (rsc->next_role == RSC_ROLE_STOPPED) {
pe_rsc_trace(rsc,
"Banning %s from all nodes because it will be stopped",
rsc->id);
resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE,
rsc->cluster);
} else if ((rsc->next_role > rsc->role)
&& !pcmk_is_set(rsc->cluster->flags, pe_flag_have_quorum)
&& (rsc->cluster->no_quorum_policy == no_quorum_freeze)) {
crm_notice("Resource %s cannot be elevated from %s to %s due to "
"no-quorum-policy=freeze",
rsc->id, role2text(rsc->role), role2text(rsc->next_role));
pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze");
}
pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
// Unmanage resource if fencing is enabled but no device is configured
if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)
&& !pcmk_is_set(rsc->cluster->flags, pe_flag_have_stonith_resource)) {
pe__clear_resource_flags(rsc, pe_rsc_managed);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
// Unmanaged resources stay on their current node
const char *reason = NULL;
pe_node_t *assign_to = NULL;
pe__set_next_role(rsc, rsc->role, "unmanaged");
assign_to = pe__current_node(rsc);
if (assign_to == NULL) {
reason = "inactive";
} else if (rsc->role == RSC_ROLE_PROMOTED) {
reason = "promoted";
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
reason = "failed";
} else {
reason = "active";
}
pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
(assign_to? assign_to->details->uname : "no node"), reason);
pcmk__finalize_assignment(rsc, assign_to, true);
} else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) {
pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id);
pcmk__finalize_assignment(rsc, NULL, true);
} else if (pcmk_is_set(rsc->flags, pe_rsc_provisional)
&& assign_best_node(rsc, prefer)) {
// Assignment successful
} else if (rsc->allocated_to == NULL) {
if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
} else if (rsc->running_on != NULL) {
pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
}
} else {
pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id,
pe__node_name(rsc->allocated_to));
}
pe__clear_resource_flags(rsc, pe_rsc_allocating);
if (rsc->is_remote_node) {
remote_connection_assigned(rsc);
}
return rsc->allocated_to;
}
/*!
* \internal
* \brief Schedule actions to bring resource down and back to current role
*
* \param[in,out] rsc Resource to restart
* \param[in] current Node that resource should be brought down on
* \param[in] need_stop Whether the resource must be stopped
* \param[in] need_promote Whether the resource must be promoted
*
* \return Role that resource would have after scheduled actions are taken
*/
static void
schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
bool need_stop, bool need_promote)
{
enum rsc_role_e role = rsc->role;
enum rsc_role_e next_role;
rsc_transition_fn fn = NULL;
pe__set_resource_flags(rsc, pe_rsc_restarting);
// Bring resource down to a stop on its current node
while (role != RSC_ROLE_STOPPED) {
next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
(need_stop? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, current, !need_stop);
role = next_role;
}
// Bring resource up to its next role on its next node
while ((rsc->role <= rsc->next_role) && (role != rsc->role)
&& !pcmk_is_set(rsc->flags, pe_rsc_block)) {
bool required = need_stop;
next_role = rsc_state_matrix[role][rsc->role];
if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
required = true;
}
pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
(required? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->allocated_to, !required);
role = next_role;
}
pe__clear_resource_flags(rsc, pe_rsc_restarting);
}
/*!
* \internal
* \brief If a resource's next role is not explicitly specified, set a default
*
* \param[in,out] rsc Resource to set next role for
*
* \return "explicit" if next role was explicitly set, otherwise "implicit"
*/
static const char *
set_default_next_role(pe_resource_t *rsc)
{
if (rsc->next_role != RSC_ROLE_UNKNOWN) {
return "explicit";
}
if (rsc->allocated_to == NULL) {
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "assignment");
} else {
pe__set_next_role(rsc, RSC_ROLE_STARTED, "assignment");
}
return "implicit";
}
/*!
* \internal
* \brief Create an action to represent an already pending start
*
* \param[in,out] rsc Resource to create start action for
*/
static void
create_pending_start(pe_resource_t *rsc)
{
pe_action_t *start = NULL;
pe_rsc_trace(rsc,
"Creating action for %s to represent already pending start",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
pe__set_action_flags(start, pe_action_print_always);
}
/*!
* \internal
* \brief Schedule actions needed to take a resource to its next role
*
* \param[in,out] rsc Resource to schedule actions for
*/
static void
schedule_role_transition_actions(pe_resource_t *rsc)
{
enum rsc_role_e role = rsc->role;
while (role != rsc->next_role) {
enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
rsc_transition_fn fn = NULL;
pe_rsc_trace(rsc,
"Creating action to take %s from %s to %s (ending at %s)",
rsc->id, role2text(role), role2text(next_role),
role2text(rsc->next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->allocated_to, false);
role = next_role;
}
}
/*!
* \internal
* \brief Create all actions needed for a given primitive resource
*
* \param[in,out] rsc Primitive resource to create actions for
*/
void
pcmk__primitive_create_actions(pe_resource_t *rsc)
{
bool need_stop = false;
bool need_promote = false;
bool is_moving = false;
bool allow_migrate = false;
bool multiply_active = false;
pe_node_t *current = NULL;
unsigned int num_all_active = 0;
unsigned int num_clean_active = 0;
const char *next_role_source = NULL;
CRM_ASSERT(rsc != NULL);
next_role_source = set_default_next_role(rsc);
pe_rsc_trace(rsc,
"Creating all actions for %s transition from %s to %s "
"(%s) on %s",
rsc->id, role2text(rsc->role), role2text(rsc->next_role),
next_role_source, pe__node_name(rsc->allocated_to));
current = pe__find_active_on(rsc, &num_all_active, &num_clean_active);
g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
rsc);
if ((current != NULL) && (rsc->allocated_to != NULL)
&& (current->details != rsc->allocated_to->details)
&& (rsc->next_role >= RSC_ROLE_STARTED)) {
pe_rsc_trace(rsc, "Moving %s from %s to %s",
rsc->id, pe__node_name(current),
pe__node_name(rsc->allocated_to));
is_moving = true;
allow_migrate = pcmk__rsc_can_migrate(rsc, current);
// This is needed even if migrating (though I'm not sure why ...)
need_stop = true;
}
// Check whether resource is partially migrated and/or multiply active
if ((rsc->partial_migration_source != NULL)
&& (rsc->partial_migration_target != NULL)
&& allow_migrate && (num_all_active == 2)
&& pe__same_node(current, rsc->partial_migration_source)
&& pe__same_node(rsc->allocated_to, rsc->partial_migration_target)) {
/* A partial migration is in progress, and the migration target remains
* the same as when the migration began.
*/
pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else if ((rsc->partial_migration_source != NULL)
|| (rsc->partial_migration_target != NULL)) {
// A partial migration is in progress but can't be continued
if (num_all_active > 2) {
// The resource is migrating *and* multiply active!
crm_notice("Forcing recovery of %s because it is migrating "
"from %s to %s and possibly active elsewhere",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else {
// The migration source or target isn't available
crm_notice("Forcing recovery of %s because it can no longer "
"migrate from %s to %s",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
}
need_stop = true;
rsc->partial_migration_source = rsc->partial_migration_target = NULL;
allow_migrate = false;
} else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
multiply_active = (num_all_active > 1);
} else {
/* If a resource has "requires" set to nothing or quorum, don't consider
* it active on unclean nodes (similar to how all resources behave when
* stonith-enabled is false). We can start such resources elsewhere
* before fencing completes, and if we considered the resource active on
* the failed node, we would attempt recovery for being active on
* multiple nodes.
*/
multiply_active = (num_clean_active > 1);
}
if (multiply_active) {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Resource was (possibly) incorrectly multiply active
pe_proc_err("%s resource %s might be active on %u nodes (%s)",
pcmk__s(class, "Untyped"), rsc->id, num_all_active,
recovery2text(rsc->recovery_type));
crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ"
"#Resource_is_Too_Active for more information");
switch (rsc->recovery_type) {
case recovery_stop_start:
need_stop = true;
break;
case recovery_stop_unexpected:
need_stop = true; // stop_resource() will skip expected node
pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
break;
default:
break;
}
} else {
pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
}
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
create_pending_start(rsc);
}
if (is_moving) {
// Remaining tests are only for resources staying where they are
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
if (pcmk_is_set(rsc->flags, pe_rsc_stop)) {
need_stop = true;
pe_rsc_trace(rsc, "Recovering %s", rsc->id);
} else {
pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
if (rsc->next_role == RSC_ROLE_PROMOTED) {
need_promote = true;
}
}
} else if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
need_stop = true;
} else if ((rsc->role > RSC_ROLE_STARTED) && (current != NULL)
&& (rsc->allocated_to != NULL)) {
pe_action_t *start = NULL;
pe_rsc_trace(rsc, "Creating start action for promoted resource %s",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
if (!pcmk_is_set(start->flags, pe_action_optional)) {
// Recovery of a promoted resource
pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
need_stop = true;
}
}
// Create any actions needed to bring resource down and back up to same role
schedule_restart_actions(rsc, current, need_stop, need_promote);
// Create any actions needed to take resource from this role to the next
schedule_role_transition_actions(rsc);
pcmk__create_recurring_actions(rsc);
if (allow_migrate) {
pcmk__create_migration_actions(rsc, current);
}
}
/*!
* \internal
* \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
*
* \param[in] rsc Resource to check
*/
static void
rsc_avoids_remote_nodes(const pe_resource_t *rsc)
{
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (node->details->remote_rsc != NULL) {
node->weight = -INFINITY;
}
}
}
/*!
* \internal
* \brief Return allowed nodes as (possibly sorted) list
*
* Convert a resource's hash table of allowed nodes to a list. If printing to
* stdout, sort the list, to keep action ID numbers consistent for regression
* test output (while avoiding the performance hit on a live cluster).
*
* \param[in] rsc Resource to check for allowed nodes
*
* \return List of resource's allowed nodes
* \note Callers should take care not to rely on the list being sorted.
*/
static GList *
allowed_nodes_as_list(const pe_resource_t *rsc)
{
GList *allowed_nodes = NULL;
if (rsc->allowed_nodes) {
allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
}
if (!pcmk__is_daemon) {
allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
}
return allowed_nodes;
}
/*!
* \internal
* \brief Create implicit constraints needed for a primitive resource
*
* \param[in,out] rsc Primitive resource to create implicit constraints for
*/
void
pcmk__primitive_internal_constraints(pe_resource_t *rsc)
{
pe_resource_t *top = NULL;
GList *allowed_nodes = NULL;
bool check_unfencing = false;
bool check_utilization = false;
CRM_ASSERT(rsc != NULL);
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc,
"Skipping implicit constraints for unmanaged resource %s",
rsc->id);
return;
}
top = uber_parent(rsc);
// Whether resource requires unfencing
check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device)
&& pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing)
&& pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing);
// Whether a non-default placement strategy is used
check_utilization = (g_hash_table_size(rsc->utilization) > 0)
&& !pcmk__str_eq(rsc->cluster->placement_strategy,
"default", pcmk__str_casei);
// Order stops before starts (i.e. restart)
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
pe_order_optional|pe_order_implies_then|pe_order_restart,
rsc->cluster);
// Promotable ordering: demote before stop, start before promote
if (pcmk_is_set(top->flags, pe_rsc_promotable)
|| (rsc->role > RSC_ROLE_UNPROMOTED)) {
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
pe_order_promoted_implies_first, rsc->cluster);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL,
pe_order_runnable_left, rsc->cluster);
}
// Don't clear resource history if probing on same node
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0),
NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0),
NULL, pe_order_same_node|pe_order_then_cancels_first,
rsc->cluster);
// Certain checks need allowed nodes
if (check_unfencing || check_utilization || (rsc->container != NULL)) {
allowed_nodes = allowed_nodes_as_list(rsc);
}
if (check_unfencing) {
g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
}
if (check_utilization) {
pcmk__create_utilization_constraints(rsc, allowed_nodes);
}
if (rsc->container != NULL) {
pe_resource_t *remote_rsc = NULL;
if (rsc->is_remote_node) {
// rsc is the implicit remote connection for a guest or bundle node
/* Guest resources are not allowed to run on Pacemaker Remote nodes,
* to avoid nesting remotes. However, bundles are allowed.
*/
if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
rsc_avoids_remote_nodes(rsc->container);
}
/* If someone cleans up a guest or bundle node's container, we will
* likely schedule a (re-)probe of the container and recovery of the
* connection. Order the connection stop after the container probe,
* so that if we detect the container running, we will trigger a new
* transition and avoid the unnecessary recovery.
*/
pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc,
RSC_STOP, pe_order_optional);
/* A user can specify that a resource must start on a Pacemaker Remote
* node by explicitly configuring it with the container=NODENAME
* meta-attribute. This is of questionable merit, since location
* constraints can accomplish the same thing. But we support it, so here
* we check whether a resource (that is not itself a remote connection)
* has container set to a remote node or guest node resource.
*/
} else if (rsc->container->is_remote_node) {
remote_rsc = rsc->container;
} else {
remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
rsc->container);
}
if (remote_rsc != NULL) {
/* Force the resource on the Pacemaker Remote node instead of
* colocating the resource with the container resource.
*/
for (GList *item = allowed_nodes; item; item = item->next) {
pe_node_t *node = item->data;
if (node->details->remote_rsc != remote_rsc) {
node->weight = -INFINITY;
}
}
} else {
/* This resource is either a filler for a container that does NOT
* represent a Pacemaker Remote node, or a Pacemaker Remote
* connection resource for a guest node or bundle.
*/
int score;
crm_trace("Order and colocate %s relative to its container %s",
rsc->id, rsc->container->id);
pcmk__new_ordering(rsc->container,
pcmk__op_key(rsc->container->id, RSC_START, 0),
NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0),
NULL,
pe_order_implies_then|pe_order_runnable_left,
rsc->cluster);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
rsc->container,
pcmk__op_key(rsc->container->id, RSC_STOP, 0),
NULL, pe_order_implies_first, rsc->cluster);
if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
score = 10000; /* Highly preferred but not essential */
} else {
score = INFINITY; /* Force them to run on the same host */
}
pcmk__new_colocation("resource-with-container", NULL, score, rsc,
rsc->container, NULL, NULL, true,
rsc->cluster);
}
}
if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) {
/* Remote connections and fencing devices are not allowed to run on
* Pacemaker Remote nodes
*/
rsc_avoids_remote_nodes(rsc);
}
g_list_free(allowed_nodes);
}
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
enum pcmk__coloc_affects filter_results;
CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
return);
if (for_dependent) {
// Always process on behalf of primary resource
primary->cmds->apply_coloc_score(dependent, primary, colocation, false);
return;
}
filter_results = pcmk__colocation_affects(dependent, primary, colocation,
false);
pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
((colocation->score > 0)? "Colocating" : "Anti-colocating"),
dependent->id, primary->id, colocation->id, colocation->score,
filter_results);
switch (filter_results) {
case pcmk__coloc_affects_role:
pcmk__apply_coloc_to_priority(dependent, primary, colocation);
break;
case pcmk__coloc_affects_location:
pcmk__apply_coloc_to_weights(dependent, primary, colocation);
break;
default: // pcmk__coloc_affects_nothing
return;
}
}
/*!
* \internal
* \brief Return action flags for a given primitive resource action
*
* \param[in,out] action Action to get flags for
* \param[in] node If not NULL, limit effects to this node (ignored)
*
* \return Flags appropriate to \p action on \p node
*/
enum pe_action_flags
pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node)
{
CRM_ASSERT(action != NULL);
return action->flags;
}
/*!
* \internal
* \brief Check whether a node is a multiply active resource's expected node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p rsc is multiply active with multiple-active set to
* stop_unexpected, and \p node is the node where it will remain active
* \note This assumes that the resource's next role cannot be changed to stopped
* after this is called, which should be reasonable if status has already
* been unpacked and resources have been assigned to nodes.
*/
static bool
is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
{
return pcmk_all_flags_set(rsc->flags,
pe_rsc_stop_unexpected|pe_rsc_restarting)
&& (rsc->next_role > RSC_ROLE_STOPPED)
&& pe__same_node(rsc->allocated_to, node);
}
/*!
* \internal
* \brief Schedule actions needed to stop a resource wherever it is active
*
* \param[in,out] rsc Resource being stopped
* \param[in] node Node where resource is being stopped (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *current = (pe_node_t *) iter->data;
pe_action_t *stop = NULL;
if (is_expected_node(rsc, current)) {
/* We are scheduling restart actions for a multiply active resource
* with multiple-active=stop_unexpected, and this is where it should
* not be stopped.
*/
pe_rsc_trace(rsc,
"Skipping stop of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(current));
continue;
}
if (rsc->partial_migration_target != NULL) {
// Continue migration if node originally was and remains target
if (pe__same_node(current, rsc->partial_migration_target)
&& pe__same_node(current, rsc->allocated_to)) {
pe_rsc_trace(rsc,
"Skipping stop of %s on %s "
"because partial migration there will continue",
rsc->id, pe__node_name(current));
continue;
} else {
pe_rsc_trace(rsc,
"Forcing stop of %s on %s "
"because migration target changed",
rsc->id, pe__node_name(current));
optional = false;
}
}
pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
rsc->id, pe__node_name(current));
stop = stop_action(rsc, current, optional);
if (rsc->allocated_to == NULL) {
pe_action_set_reason(stop, "node availability", true);
} else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting
|pe_rsc_stop_unexpected)) {
/* We are stopping a multiply active resource on a node that is
* not its expected node, and we are still scheduling restart
* actions, so the stop is for being multiply active.
*/
pe_action_set_reason(stop, "being multiply active", true);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe__clear_action_flags(stop, pe_action_runnable);
}
if (pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)) {
pcmk__schedule_cleanup(rsc, current, optional);
}
if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) {
pe_action_t *unfence = pe_fence_op(current, "on", true, NULL, false,
rsc->cluster);
order_actions(stop, unfence, pe_order_implies_first);
if (!pcmk__node_unfenced(current)) {
pe_proc_err("Stopping %s until %s can be unfenced",
rsc->id, pe__node_name(current));
}
}
}
}
/*!
* \internal
* \brief Schedule actions needed to start a resource on a node
*
* \param[in,out] rsc Resource being started
* \param[in] node Node where resource should be started
* \param[in] optional Whether actions should be optional
*/
static void
start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
pe_action_t *start = NULL;
CRM_ASSERT(node != NULL);
pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
(optional? "optional" : "required"), rsc->id,
pe__node_name(node), node->weight);
start = start_action(rsc, node, TRUE);
pcmk__order_vs_unfence(rsc, node, start, pe_order_implies_then);
if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) {
pe__clear_action_flags(start, pe_action_optional);
}
if (is_expected_node(rsc, node)) {
/* This could be a problem if the start becomes necessary for other
* reasons later.
*/
pe_rsc_trace(rsc,
"Start of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(node));
pe__set_action_flags(start, pe_action_pseudo);
}
}
/*!
* \internal
* \brief Schedule actions needed to promote a resource on a node
*
* \param[in,out] rsc Resource being promoted
* \param[in] node Node where resource should be promoted
* \param[in] optional Whether actions should be optional
*/
static void
promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
GList *iter = NULL;
GList *action_list = NULL;
bool runnable = true;
CRM_ASSERT(node != NULL);
// Any start must be runnable for promotion to be runnable
action_list = pe__resource_actions(rsc, node, RSC_START, true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *start = (pe_action_t *) iter->data;
if (!pcmk_is_set(start->flags, pe_action_runnable)) {
runnable = false;
}
}
g_list_free(action_list);
if (runnable) {
pe_action_t *promote = promote_action(rsc, node, optional);
pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pe__node_name(node));
if (is_expected_node(rsc, node)) {
/* This could be a problem if the promote becomes necessary for
* other reasons later.
*/
pe_rsc_trace(rsc,
"Promotion of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(node));
pe__set_action_flags(promote, pe_action_pseudo);
}
} else {
pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
rsc->id, pe__node_name(node));
action_list = pe__resource_actions(rsc, node, RSC_PROMOTE, true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *promote = (pe_action_t *) iter->data;
pe__clear_action_flags(promote, pe_action_runnable);
}
g_list_free(action_list);
}
}
/*!
* \internal
* \brief Schedule actions needed to demote a resource wherever it is active
*
* \param[in,out] rsc Resource being demoted
* \param[in] node Node where resource should be demoted (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
/* Since this will only be called for a primitive (possibly as an instance
* of a collective resource), the resource is multiply active if it is
* running on more than one node, so we want to demote on all of them as
* part of recovery, regardless of which one is the desired node.
*/
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *current = (pe_node_t *) iter->data;
if (is_expected_node(rsc, current)) {
pe_rsc_trace(rsc,
"Skipping demote of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(current));
} else {
pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pe__node_name(current));
demote_action(rsc, current, optional);
}
}
}
static void
assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
CRM_ASSERT(false);
}
/*!
* \internal
* \brief Schedule cleanup of a resource
*
* \param[in,out] rsc Resource to clean up
* \param[in] node Node to clean up on
* \param[in] optional Whether clean-up should be optional
*/
void
pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional)
{
/* If the cleanup is required, its orderings are optional, because they're
* relevant only if both actions are required. Conversely, if the cleanup is
* optional, the orderings make the then action required if the first action
* becomes required.
*/
uint32_t flag = optional? pe_order_implies_then : pe_order_optional;
CRM_CHECK((rsc != NULL) && (node != NULL), return);
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
rsc->id, pe__node_name(node));
return;
}
if (node->details->unclean || !node->details->online) {
pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
rsc->id, pe__node_name(node));
return;
}
crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node));
delete_action(rsc, node, optional);
// stop -> clean-up -> start
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE, flag);
pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START, flag);
}
/*!
* \internal
* \brief Add primitive meta-attributes relevant to graph actions to XML
*
* \param[in] rsc Primitive resource whose meta-attributes should be added
* \param[in,out] xml Transition graph action attributes XML to add to
*/
void
pcmk__primitive_add_graph_meta(pe_resource_t *rsc, xmlNode *xml)
{
char *name = NULL;
char *value = NULL;
const pe_resource_t *parent = NULL;
CRM_ASSERT((rsc != NULL) && (xml != NULL));
/* Clone instance numbers get set internally as meta-attributes, and are
* needed in the transition graph (for example, to tell unique clone
* instances apart).
*/
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
if (value != NULL) {
name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
crm_xml_add(xml, name, value);
free(name);
}
// Not sure if this one is really needed ...
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
if (value != NULL) {
name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
crm_xml_add(xml, name, value);
free(name);
}
/* The container meta-attribute can be set on the primitive itself or one of
* its parents (for example, a group inside a container resource), so check
* them all, and keep the highest one found.
*/
for (parent = rsc; parent != NULL; parent = parent->parent) {
if (parent->container != NULL) {
crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER,
parent->container->id);
}
}
/* Bundle replica children will get their external-ip set internally as a
* meta-attribute. The graph action needs it, but under a different naming
* convention than other meta-attributes.
*/
value = g_hash_table_lookup(rsc->meta, "external-ip");
if (value != NULL) {
crm_xml_add(xml, "pcmk_external_ip", value);
}
}
// Primitive implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__primitive_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return;
}
pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization",
orig_rsc->id, rsc->id);
pcmk__release_node_capacity(utilization, rsc);
}
/*!
* \internal
* \brief Get epoch time of node's shutdown attribute (or now if none)
*
* \param[in] node Node to check
* \param[in] data_set Cluster working set
*
* \return Epoch time corresponding to shutdown attribute if set or now if not
*/
static time_t
shutdown_time(const pe_node_t *node)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
time_t result = 0;
if (shutdown != NULL) {
long long result_ll;
if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
result = (time_t) result_ll;
}
}
return (result == 0)? get_effective_time(node->details->data_set) : result;
}
/*!
* \internal
* \brief Ban a resource from a node if it's not locked to the node
*
* \param[in] data Node to check
* \param[in] user_data Resource to check
*/
static void
ban_if_not_locked(gpointer data, gpointer user_data)
{
pe_node_t *node = (pe_node_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) {
resource_location(rsc, node, -CRM_SCORE_INFINITY,
XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster);
}
}
// Primitive implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__primitive_shutdown_lock(pe_resource_t *rsc)
{
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Fence devices and remote connections can't be locked
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
|| pe__resource_is_remote_conn(rsc, rsc->cluster)) {
return;
}
if (rsc->lock_node != NULL) {
// The lock was obtained from resource history
if (rsc->running_on != NULL) {
/* The resource was started elsewhere even though it is now
* considered locked. This shouldn't be possible, but as a
* failsafe, we don't want to disturb the resource now.
*/
pe_rsc_info(rsc,
"Cancelling shutdown lock because %s is already active",
rsc->id);
pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster);
rsc->lock_node = NULL;
rsc->lock_time = 0;
}
// Only a resource active on exactly one node can be locked
} else if (pcmk__list_of_1(rsc->running_on)) {
pe_node_t *node = rsc->running_on->data;
if (node->details->shutdown) {
if (node->details->unclean) {
pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
rsc->id, pe__node_name(node));
} else {
rsc->lock_node = node;
rsc->lock_time = shutdown_time(node);
}
}
}
if (rsc->lock_node == NULL) {
// No lock needed
return;
}
if (rsc->cluster->shutdown_lock > 0) {
time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
rsc->id, pe__node_name(rsc->lock_node),
(long long) lock_expiration);
pe__update_recheck_time(++lock_expiration, rsc->cluster);
} else {
pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
rsc->id, pe__node_name(rsc->lock_node));
}
// If resource is locked to one node, ban it from all other nodes
g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc);
}
diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c
index cefb7cd24a..5186a2be79 100644
--- a/lib/pacemaker/pcmk_sched_promotable.c
+++ b/lib/pacemaker/pcmk_sched_promotable.c
@@ -1,1268 +1,1264 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Add implicit promotion ordering for a promotable instance
*
* \param[in] clone Clone resource
* \param[in] child Instance of \p clone being ordered
* \param[in] last Previous instance ordered (NULL if \p child is first)
*/
static void
order_instance_promotion(pe_resource_t *clone, pe_resource_t *child,
pe_resource_t *last)
{
// "Promote clone" -> promote instance -> "clone promoted"
pcmk__order_resource_actions(clone, RSC_PROMOTE, child, RSC_PROMOTE,
pe_order_optional);
pcmk__order_resource_actions(child, RSC_PROMOTE, clone, RSC_PROMOTED,
pe_order_optional);
// If clone is ordered, order this instance relative to last
if ((last != NULL) && pe__clone_is_ordered(clone)) {
pcmk__order_resource_actions(last, RSC_PROMOTE, child, RSC_PROMOTE,
pe_order_optional);
}
}
/*!
* \internal
* \brief Add implicit demotion ordering for a promotable instance
*
* \param[in] clone Clone resource
* \param[in] child Instance of \p clone being ordered
* \param[in] last Previous instance ordered (NULL if \p child is first)
*/
static void
order_instance_demotion(pe_resource_t *clone, pe_resource_t *child,
pe_resource_t *last)
{
// "Demote clone" -> demote instance -> "clone demoted"
pcmk__order_resource_actions(clone, RSC_DEMOTE, child, RSC_DEMOTE,
pe_order_implies_first_printed);
pcmk__order_resource_actions(child, RSC_DEMOTE, clone, RSC_DEMOTED,
pe_order_implies_then_printed);
// If clone is ordered, order this instance relative to last
if ((last != NULL) && pe__clone_is_ordered(clone)) {
pcmk__order_resource_actions(child, RSC_DEMOTE, last, RSC_DEMOTE,
pe_order_optional);
}
}
/*!
* \internal
* \brief Check whether an instance will be promoted or demoted
*
* \param[in] rsc Instance to check
* \param[in] demoting If \p rsc will be demoted, this will be set to true
* \param[in] promoting If \p rsc will be promoted, this will be set to true
*/
static void
check_for_role_change(pe_resource_t *rsc, bool *demoting, bool *promoting)
{
GList *iter = NULL;
// If this is a cloned group, check group members recursively
if (rsc->children != NULL) {
for (iter = rsc->children; iter != NULL; iter = iter->next) {
check_for_role_change((pe_resource_t *) iter->data,
demoting, promoting);
}
return;
}
for (iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *action = (pe_action_t *) iter->data;
if (*promoting && *demoting) {
return;
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
continue;
} else if (pcmk__str_eq(RSC_DEMOTE, action->task, pcmk__str_none)) {
*demoting = true;
} else if (pcmk__str_eq(RSC_PROMOTE, action->task, pcmk__str_none)) {
*promoting = true;
}
}
}
/*!
* \internal
* \brief Add promoted-role location constraint scores to an instance's priority
*
* Adjust a promotable clone instance's promotion priority by the scores of any
* location constraints in a list that are both limited to the promoted role and
* for the node where the instance will be placed.
*
* \param[in] child Promotable clone instance
* \param[in] location_constraints List of location constraints to apply
* \param[in] chosen Node where \p child will be placed
*/
static void
apply_promoted_locations(pe_resource_t *child, GList *location_constraints,
pe_node_t *chosen)
{
for (GList *iter = location_constraints; iter; iter = iter->next) {
pe__location_t *location = iter->data;
pe_node_t *weighted_node = NULL;
if (location->role_filter == RSC_ROLE_PROMOTED) {
weighted_node = pe_find_node_id(location->node_list_rh,
chosen->details->id);
}
if (weighted_node != NULL) {
int new_priority = pcmk__add_scores(child->priority,
weighted_node->weight);
pe_rsc_trace(child,
"Applying location %s to %s promotion priority on %s: "
"%d + %d = %d",
location->id, child->id, pe__node_name(weighted_node),
child->priority, weighted_node->weight, new_priority);
child->priority = new_priority;
}
}
}
/*!
* \internal
* \brief Get the node that an instance will be promoted on
*
* \param[in] rsc Promotable clone instance to check
*
* \return Node that \p rsc will be promoted on, or NULL if none
*/
static pe_node_t *
node_to_be_promoted_on(pe_resource_t *rsc)
{
pe_node_t *node = NULL;
pe_node_t *local_node = NULL;
pe_resource_t *parent = uber_parent(rsc);
// If this is a cloned group, bail if any group member can't be promoted
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
if (node_to_be_promoted_on(child) == NULL) {
pe_rsc_trace(rsc,
"%s can't be promoted because member %s can't",
rsc->id, child->id);
return NULL;
}
}
node = rsc->fns->location(rsc, NULL, FALSE);
if (node == NULL) {
pe_rsc_trace(rsc, "%s can't be promoted because it won't be active",
rsc->id);
return NULL;
} else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
if (rsc->fns->state(rsc, TRUE) == RSC_ROLE_PROMOTED) {
crm_notice("Unmanaged instance %s will be left promoted on %s",
rsc->id, pe__node_name(node));
} else {
pe_rsc_trace(rsc, "%s can't be promoted because it is unmanaged",
rsc->id);
return NULL;
}
} else if (rsc->priority < 0) {
pe_rsc_trace(rsc,
"%s can't be promoted because its promotion priority %d "
"is negative",
rsc->id, rsc->priority);
return NULL;
} else if (!pcmk__node_available(node, false, true)) {
pe_rsc_trace(rsc, "%s can't be promoted because %s can't run resources",
rsc->id, pe__node_name(node));
return NULL;
}
local_node = pe_hash_table_lookup(parent->allowed_nodes, node->details->id);
if (local_node == NULL) {
/* It should not be possible for the scheduler to have allocated the
* instance to a node where its parent is not allowed, but it's good to
* have a fail-safe.
*/
if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
crm_warn("%s can't be promoted because %s is not allowed on %s "
"(scheduler bug?)",
rsc->id, parent->id, pe__node_name(node));
} // else the instance is unmanaged and already promoted
return NULL;
} else if ((local_node->count >= pe__clone_promoted_node_max(parent))
&& pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc,
"%s can't be promoted because %s has "
"maximum promoted instances already",
rsc->id, pe__node_name(node));
return NULL;
}
return local_node;
}
/*!
* \internal
* \brief Compare two promotable clone instances by promotion priority
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a has higher promotion priority,
* a positive number if \p b has higher promotion priority,
* or 0 if promotion priorities are equal
*/
static gint
cmp_promotable_instance(gconstpointer a, gconstpointer b)
{
const pe_resource_t *rsc1 = (const pe_resource_t *) a;
const pe_resource_t *rsc2 = (const pe_resource_t *) b;
enum rsc_role_e role1 = RSC_ROLE_UNKNOWN;
enum rsc_role_e role2 = RSC_ROLE_UNKNOWN;
CRM_ASSERT((rsc1 != NULL) && (rsc2 != NULL));
// Check sort index set by pcmk__set_instance_roles()
if (rsc1->sort_index > rsc2->sort_index) {
pe_rsc_trace(rsc1,
"%s has higher promotion priority than %s "
"(sort index %d > %d)",
rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index);
return -1;
} else if (rsc1->sort_index < rsc2->sort_index) {
pe_rsc_trace(rsc1,
"%s has lower promotion priority than %s "
"(sort index %d < %d)",
rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index);
return 1;
}
// If those are the same, prefer instance whose current role is higher
role1 = rsc1->fns->state(rsc1, TRUE);
role2 = rsc2->fns->state(rsc2, TRUE);
if (role1 > role2) {
pe_rsc_trace(rsc1,
"%s has higher promotion priority than %s "
"(higher current role)",
rsc1->id, rsc2->id);
return -1;
} else if (role1 < role2) {
pe_rsc_trace(rsc1,
"%s has lower promotion priority than %s "
"(lower current role)",
rsc1->id, rsc2->id);
return 1;
}
// Finally, do normal clone instance sorting
return pcmk__cmp_instance(a, b);
}
/*!
* \internal
* \brief Add a promotable clone instance's sort index to its node's weight
*
* Add a promotable clone instance's sort index (which sums its promotion
* preferences and scores of relevant location constraints for the promoted
* role) to the node weight of the instance's allocated node.
*
* \param[in] data Promotable clone instance
* \param[in] user_data Clone parent of \p data
*/
static void
add_sort_index_to_node_weight(gpointer data, gpointer user_data)
{
pe_resource_t *child = (pe_resource_t *) data;
pe_resource_t *clone = (pe_resource_t *) user_data;
pe_node_t *node = NULL;
pe_node_t *chosen = NULL;
if (child->sort_index < 0) {
pe_rsc_trace(clone, "Not adding sort index of %s: negative", child->id);
return;
}
chosen = child->fns->location(child, NULL, FALSE);
if (chosen == NULL) {
pe_rsc_trace(clone, "Not adding sort index of %s: inactive", child->id);
return;
}
node = (pe_node_t *) pe_hash_table_lookup(clone->allowed_nodes,
chosen->details->id);
CRM_ASSERT(node != NULL);
pe_rsc_trace(clone, "Adding sort index %s of %s to weight for %s",
pcmk_readable_score(child->sort_index), child->id,
pe__node_name(node));
node->weight = pcmk__add_scores(child->sort_index, node->weight);
}
/*!
* \internal
* \brief Apply colocation to dependent's node weights if for promoted role
*
* \param[in] data Colocation constraint to apply
* \param[in] user_data Promotable clone that is constraint's dependent
*/
static void
apply_coloc_to_dependent(gpointer data, gpointer user_data)
{
pcmk__colocation_t *constraint = (pcmk__colocation_t *) data;
pe_resource_t *clone = (pe_resource_t *) user_data;
pe_resource_t *primary = constraint->primary;
uint32_t flags = pcmk__coloc_select_default;
float factor = constraint->score / (float) INFINITY;
if (constraint->dependent_role != RSC_ROLE_PROMOTED) {
return;
}
if (constraint->score < INFINITY) {
flags = pcmk__coloc_select_active;
}
pe_rsc_trace(clone, "Applying colocation %s (promoted %s with %s) @%s",
constraint->id, constraint->dependent->id,
constraint->primary->id,
pcmk_readable_score(constraint->score));
- primary->cmds->add_colocated_node_scores(primary, clone->id,
- &clone->allowed_nodes,
- constraint->node_attribute,
- factor, flags);
+ pcmk__add_colocated_node_scores(primary, clone->id, &clone->allowed_nodes,
+ constraint->node_attribute, factor, flags);
}
/*!
* \internal
* \brief Apply colocation to primary's node weights if for promoted role
*
* \param[in] data Colocation constraint to apply
* \param[in] user_data Promotable clone that is constraint's primary
*/
static void
apply_coloc_to_primary(gpointer data, gpointer user_data)
{
pcmk__colocation_t *constraint = (pcmk__colocation_t *) data;
pe_resource_t *clone = (pe_resource_t *) user_data;
pe_resource_t *dependent = constraint->dependent;
const float factor = constraint->score / (float) INFINITY;
const uint32_t flags = pcmk__coloc_select_active
|pcmk__coloc_select_nonnegative;
if ((constraint->primary_role != RSC_ROLE_PROMOTED)
|| !pcmk__colocation_has_influence(constraint, NULL)) {
return;
}
pe_rsc_trace(clone, "Applying colocation %s (%s with promoted %s) @%s",
constraint->id, constraint->dependent->id,
constraint->primary->id,
pcmk_readable_score(constraint->score));
- dependent->cmds->add_colocated_node_scores(dependent, clone->id,
- &clone->allowed_nodes,
- constraint->node_attribute,
- factor, flags);
+ pcmk__add_colocated_node_scores(dependent, clone->id, &clone->allowed_nodes,
+ constraint->node_attribute, factor, flags);
}
/*!
* \internal
* \brief Set clone instance's sort index to its node's weight
*
* \param[in] data Promotable clone instance
* \param[in] user_data Parent clone of \p data
*/
static void
set_sort_index_to_node_weight(gpointer data, gpointer user_data)
{
pe_resource_t *child = (pe_resource_t *) data;
pe_resource_t *clone = (pe_resource_t *) user_data;
pe_node_t *chosen = child->fns->location(child, NULL, FALSE);
if (!pcmk_is_set(child->flags, pe_rsc_managed)
&& (child->next_role == RSC_ROLE_PROMOTED)) {
child->sort_index = INFINITY;
pe_rsc_trace(clone,
"Final sort index for %s is INFINITY (unmanaged promoted)",
child->id);
} else if ((chosen == NULL) || (child->sort_index < 0)) {
pe_rsc_trace(clone,
"Final sort index for %s is %d (ignoring node weight)",
child->id, child->sort_index);
} else {
pe_node_t *node = NULL;
node = (pe_node_t *) pe_hash_table_lookup(clone->allowed_nodes,
chosen->details->id);
CRM_ASSERT(node != NULL);
child->sort_index = node->weight;
pe_rsc_trace(clone,
"Merging weights for %s: final sort index for %s is %d",
clone->id, child->id, child->sort_index);
}
}
/*!
* \internal
* \brief Sort a promotable clone's instances by descending promotion priority
*
* \param[in] clone Promotable clone to sort
*/
static void
sort_promotable_instances(pe_resource_t *clone)
{
if (pe__set_clone_flag(clone, pe__clone_promotion_constrained)
== pcmk_rc_already) {
return;
}
pe__set_resource_flags(clone, pe_rsc_merging);
for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
pe_rsc_trace(clone,
"Merging weights for %s: initial sort index for %s is %d",
clone->id, child->id, child->sort_index);
}
pe__show_node_weights(true, clone, "Before", clone->allowed_nodes,
clone->cluster);
g_list_foreach(clone->children, add_sort_index_to_node_weight, clone);
g_list_foreach(clone->rsc_cons, apply_coloc_to_dependent, clone);
g_list_foreach(clone->rsc_cons_lhs, apply_coloc_to_primary, clone);
// Ban resource from all nodes if it needs a ticket but doesn't have it
pcmk__require_promotion_tickets(clone);
pe__show_node_weights(true, clone, "After", clone->allowed_nodes,
clone->cluster);
// Reset sort indexes to final node weights
g_list_foreach(clone->children, set_sort_index_to_node_weight, clone);
// Finally, sort instances in descending order of promotion priority
clone->children = g_list_sort(clone->children, cmp_promotable_instance);
pe__clear_resource_flags(clone, pe_rsc_merging);
}
/*!
* \internal
* \brief Find the active instance (if any) of an anonymous clone on a node
*
* \param[in] clone Anonymous clone to check
* \param[in] id Instance ID (without instance number) to check
* \param[in] node Node to check
*
* \return
*/
static pe_resource_t *
find_active_anon_instance(pe_resource_t *clone, const char *id,
const pe_node_t *node)
{
for (GList *iter = clone->children; iter; iter = iter->next) {
pe_resource_t *child = iter->data;
pe_resource_t *active = NULL;
// Use ->find_rsc() in case this is a cloned group
active = clone->fns->find_rsc(child, id, node,
pe_find_clone|pe_find_current);
if (active != NULL) {
return active;
}
}
return NULL;
}
/*
* \brief Check whether an anonymous clone instance is known on a node
*
* \param[in] clone Anonymous clone to check
* \param[in] id Instance ID (without instance number) to check
* \param[in] node Node to check
*
* \return true if \p id instance of \p clone is known on \p node,
* otherwise false
*/
static bool
anonymous_known_on(const pe_resource_t *clone, const char *id,
const pe_node_t *node)
{
for (GList *iter = clone->children; iter; iter = iter->next) {
pe_resource_t *child = iter->data;
/* Use ->find_rsc() because this might be a cloned group, and knowing
* that other members of the group are known here implies nothing.
*/
child = clone->fns->find_rsc(child, id, NULL, pe_find_clone);
CRM_LOG_ASSERT(child != NULL);
if (child != NULL) {
if (g_hash_table_lookup(child->known_on, node->details->id)) {
return true;
}
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node is allowed to run a resource
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p node is allowed to run \p rsc, otherwise false
*/
static bool
is_allowed(const pe_resource_t *rsc, const pe_node_t *node)
{
pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes,
node->details->id);
return (allowed != NULL) && (allowed->weight >= 0);
}
/*!
* \brief Check whether a clone instance's promotion score should be considered
*
* \param[in] rsc Promotable clone instance to check
* \param[in] node Node where score would be applied
*
* \return true if \p rsc's promotion score should be considered on \p node,
* otherwise false
*/
static bool
promotion_score_applies(pe_resource_t *rsc, const pe_node_t *node)
{
char *id = clone_strip(rsc->id);
pe_resource_t *parent = uber_parent(rsc);
pe_resource_t *active = NULL;
const char *reason = "allowed";
// Some checks apply only to anonymous clone instances
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
// If instance is active on the node, its score definitely applies
active = find_active_anon_instance(parent, id, node);
if (active == rsc) {
reason = "active";
goto check_allowed;
}
/* If *no* instance is active on this node, this instance's score will
* count if it has been probed on this node.
*/
if ((active == NULL) && anonymous_known_on(parent, id, node)) {
reason = "probed";
goto check_allowed;
}
}
/* If this clone's status is unknown on *all* nodes (e.g. cluster startup),
* take all instances' scores into account, to make sure we use any
* permanent promotion scores.
*/
if ((rsc->running_on == NULL) && (g_hash_table_size(rsc->known_on) == 0)) {
reason = "none probed";
goto check_allowed;
}
/* Otherwise, we've probed and/or started the resource *somewhere*, so
* consider promotion scores on nodes where we know the status.
*/
if ((pe_hash_table_lookup(rsc->known_on, node->details->id) != NULL)
|| (pe_find_node_id(rsc->running_on, node->details->id) != NULL)) {
reason = "known";
} else {
pe_rsc_trace(rsc,
"Ignoring %s promotion score (for %s) on %s: not probed",
rsc->id, id, pe__node_name(node));
free(id);
return false;
}
check_allowed:
if (is_allowed(rsc, node)) {
pe_rsc_trace(rsc, "Counting %s promotion score (for %s) on %s: %s",
rsc->id, id, pe__node_name(node), reason);
free(id);
return true;
}
pe_rsc_trace(rsc, "Ignoring %s promotion score (for %s) on %s: not allowed",
rsc->id, id, pe__node_name(node));
free(id);
return false;
}
/*!
* \internal
* \brief Get the value of a promotion score node attribute
*
* \param[in] rsc Promotable clone instance to get promotion score for
* \param[in] node Node to get promotion score for
* \param[in] name Resource name to use in promotion score attribute name
*
* \return Value of promotion score node attribute for \p rsc on \p node
*/
static const char *
promotion_attr_value(pe_resource_t *rsc, const pe_node_t *node,
const char *name)
{
char *attr_name = NULL;
const char *attr_value = NULL;
CRM_CHECK((rsc != NULL) && (node != NULL) && (name != NULL), return NULL);
attr_name = pcmk_promotion_score_name(name);
attr_value = pe_node_attribute_calculated(node, attr_name, rsc);
free(attr_name);
return attr_value;
}
/*!
* \internal
* \brief Get the promotion score for a clone instance on a node
*
* \param[in] rsc Promotable clone instance to get score for
* \param[in] node Node to get score for
* \param[out] is_default If non-NULL, will be set true if no score available
*
* \return Promotion score for \p rsc on \p node (or 0 if none)
*/
static int
promotion_score(pe_resource_t *rsc, const pe_node_t *node, bool *is_default)
{
char *name = NULL;
const char *attr_value = NULL;
if (is_default != NULL) {
*is_default = true;
}
CRM_CHECK((rsc != NULL) && (node != NULL), return 0);
/* If this is an instance of a cloned group, the promotion score is the sum
* of all members' promotion scores.
*/
if (rsc->children != NULL) {
int score = 0;
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
bool child_default = false;
int child_score = promotion_score(child, node, &child_default);
if (!child_default && (is_default != NULL)) {
*is_default = false;
}
score += child_score;
}
return score;
}
if (!promotion_score_applies(rsc, node)) {
return 0;
}
/* For the promotion score attribute name, use the name the resource is
* known as in resource history, since that's what crm_attribute --promotion
* would have used.
*/
name = (rsc->clone_name == NULL)? rsc->id : rsc->clone_name;
attr_value = promotion_attr_value(rsc, node, name);
if (attr_value != NULL) {
pe_rsc_trace(rsc, "Promotion score for %s on %s = %s",
name, pe__node_name(node), pcmk__s(attr_value, "(unset)"));
} else if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
/* If we don't have any resource history yet, we won't have clone_name.
* In that case, for anonymous clones, try the resource name without
* any instance number.
*/
name = clone_strip(rsc->id);
if (strcmp(rsc->id, name) != 0) {
attr_value = promotion_attr_value(rsc, node, name);
pe_rsc_trace(rsc, "Promotion score for %s on %s (for %s) = %s",
name, pe__node_name(node), rsc->id,
pcmk__s(attr_value, "(unset)"));
}
free(name);
}
if (attr_value == NULL) {
return 0;
}
if (is_default != NULL) {
*is_default = false;
}
return char2score(attr_value);
}
/*!
* \internal
* \brief Include promotion scores in instances' node weights and priorities
*
* \param[in] rsc Promotable clone resource to update
*/
void
pcmk__add_promotion_scores(pe_resource_t *rsc)
{
if (pe__set_clone_flag(rsc, pe__clone_promotion_added) == pcmk_rc_already) {
return;
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
GHashTableIter iter;
pe_node_t *node = NULL;
int score, new_score;
g_hash_table_iter_init(&iter, child_rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (!pcmk__node_available(node, false, false)) {
/* This node will never be promoted, so don't apply the
* promotion score, as that may lead to clone shuffling.
*/
continue;
}
score = promotion_score(child_rsc, node, NULL);
if (score > 0) {
new_score = pcmk__add_scores(node->weight, score);
if (new_score != node->weight) {
pe_rsc_trace(rsc,
"Adding promotion score to preference "
"for %s on %s (%d->%d)",
child_rsc->id, pe__node_name(node),
node->weight, new_score);
node->weight = new_score;
}
}
if (score > child_rsc->priority) {
pe_rsc_trace(rsc,
"Updating %s priority to promotion score (%d->%d)",
child_rsc->id, child_rsc->priority, score);
child_rsc->priority = score;
}
}
}
}
/*!
* \internal
* \brief If a resource's current role is started, change it to unpromoted
*
* \param[in] data Resource to update
* \param[in] user_data Ignored
*/
static void
set_current_role_unpromoted(void *data, void *user_data)
{
pe_resource_t *rsc = (pe_resource_t *) data;
if (rsc->role == RSC_ROLE_STARTED) {
// Promotable clones should use unpromoted role instead of started
rsc->role = RSC_ROLE_UNPROMOTED;
}
g_list_foreach(rsc->children, set_current_role_unpromoted, NULL);
}
/*!
* \internal
* \brief Set a resource's next role to unpromoted (or stopped if unassigned)
*
* \param[in] data Resource to update
* \param[in] user_data Ignored
*/
static void
set_next_role_unpromoted(void *data, void *user_data)
{
pe_resource_t *rsc = (pe_resource_t *) data;
GList *assigned = NULL;
rsc->fns->location(rsc, &assigned, FALSE);
if (assigned == NULL) {
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "stopped instance");
} else {
pe__set_next_role(rsc, RSC_ROLE_UNPROMOTED, "unpromoted instance");
g_list_free(assigned);
}
g_list_foreach(rsc->children, set_next_role_unpromoted, NULL);
}
/*!
* \internal
* \brief Set a resource's next role to promoted if not already set
*
* \param[in] data Resource to update
* \param[in] user_data Ignored
*/
static void
set_next_role_promoted(void *data, gpointer user_data)
{
pe_resource_t *rsc = (pe_resource_t *) data;
if (rsc->next_role == RSC_ROLE_UNKNOWN) {
pe__set_next_role(rsc, RSC_ROLE_PROMOTED, "promoted instance");
}
g_list_foreach(rsc->children, set_next_role_promoted, NULL);
}
/*!
* \internal
* \brief Show instance's promotion score on node where it will be active
*
* \param[in] instance Promotable clone instance to show
*/
static void
show_promotion_score(pe_resource_t *instance)
{
pe_node_t *chosen = instance->fns->location(instance, NULL, FALSE);
if (pcmk_is_set(instance->cluster->flags, pe_flag_show_scores)
&& !pcmk__is_daemon && (instance->cluster->priv != NULL)) {
pcmk__output_t *out = instance->cluster->priv;
out->message(out, "promotion-score", instance, chosen,
pcmk_readable_score(instance->sort_index));
} else {
pe_rsc_debug(uber_parent(instance),
"%s promotion score on %s: sort=%s priority=%s",
instance->id,
((chosen == NULL)? "none" : pe__node_name(chosen)),
pcmk_readable_score(instance->sort_index),
pcmk_readable_score(instance->priority));
}
}
/*!
* \internal
* \brief Set a clone instance's promotion priority
*
* \param[in] data Promotable clone instance to update
* \param[in] user_data Instance's parent clone
*/
static void
set_instance_priority(gpointer data, gpointer user_data)
{
pe_resource_t *instance = (pe_resource_t *) data;
pe_resource_t *clone = (pe_resource_t *) user_data;
pe_node_t *chosen = NULL;
enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
GList *list = NULL;
pe_rsc_trace(clone, "Assigning priority for %s: %s", instance->id,
role2text(instance->next_role));
if (instance->fns->state(instance, TRUE) == RSC_ROLE_STARTED) {
set_current_role_unpromoted(instance, NULL);
}
// Only an instance that will be active can be promoted
chosen = instance->fns->location(instance, &list, FALSE);
if (pcmk__list_of_multiple(list)) {
pcmk__config_err("Cannot promote non-colocated child %s",
instance->id);
}
g_list_free(list);
if (chosen == NULL) {
return;
}
next_role = instance->fns->state(instance, FALSE);
switch (next_role) {
case RSC_ROLE_STARTED:
case RSC_ROLE_UNKNOWN:
// Set instance priority to its promotion score (or -1 if none)
{
bool is_default = false;
instance->priority = promotion_score(instance, chosen,
&is_default);
if (is_default) {
/*
* Default to -1 if no value is set. This allows
* instances eligible for promotion to be specified
* based solely on rsc_location constraints, but
* prevents any instance from being promoted if neither
* a constraint nor a promotion score is present
*/
instance->priority = -1;
}
}
break;
case RSC_ROLE_UNPROMOTED:
case RSC_ROLE_STOPPED:
// Instance can't be promoted
instance->priority = -INFINITY;
break;
case RSC_ROLE_PROMOTED:
// Nothing needed (re-creating actions after scheduling fencing)
break;
default:
CRM_CHECK(FALSE, crm_err("Unknown resource role %d for %s",
next_role, instance->id));
}
// Add relevant location constraint scores for promoted role
apply_promoted_locations(instance, instance->rsc_location, chosen);
apply_promoted_locations(instance, clone->rsc_location, chosen);
// Apply relevant colocations with promoted role
for (GList *iter = instance->rsc_cons; iter != NULL; iter = iter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) iter->data;
instance->cmds->apply_coloc_score(instance, cons->primary, cons, true);
}
instance->sort_index = instance->priority;
if (next_role == RSC_ROLE_PROMOTED) {
instance->sort_index = INFINITY;
}
pe_rsc_trace(clone, "Assigning %s priority = %d",
instance->id, instance->priority);
}
/*!
* \internal
* \brief Set a promotable clone instance's role
*
* \param[in] data Promotable clone instance to update
* \param[in] user_data Pointer to count of instances chosen for promotion
*/
static void
set_instance_role(gpointer data, gpointer user_data)
{
pe_resource_t *instance = (pe_resource_t *) data;
int *count = (int *) user_data;
pe_resource_t *clone = uber_parent(instance);
pe_node_t *chosen = NULL;
show_promotion_score(instance);
if (instance->sort_index < 0) {
pe_rsc_trace(clone, "Not supposed to promote instance %s",
instance->id);
} else if ((*count < pe__clone_promoted_max(instance))
|| !pcmk_is_set(clone->flags, pe_rsc_managed)) {
chosen = node_to_be_promoted_on(instance);
}
if (chosen == NULL) {
set_next_role_unpromoted(instance, NULL);
return;
}
if ((instance->role < RSC_ROLE_PROMOTED)
&& !pcmk_is_set(instance->cluster->flags, pe_flag_have_quorum)
&& (instance->cluster->no_quorum_policy == no_quorum_freeze)) {
crm_notice("Clone instance %s cannot be promoted without quorum",
instance->id);
set_next_role_unpromoted(instance, NULL);
return;
}
chosen->count++;
pe_rsc_info(clone, "Choosing %s (%s) on %s for promotion",
instance->id, role2text(instance->role),
pe__node_name(chosen));
set_next_role_promoted(instance, NULL);
(*count)++;
}
/*!
* \internal
* \brief Set roles for all instances of a promotable clone
*
* \param[in] clone Promotable clone resource to update
*/
void
pcmk__set_instance_roles(pe_resource_t *rsc)
{
int promoted = 0;
GHashTableIter iter;
pe_node_t *node = NULL;
// Repurpose count to track the number of promoted instances allocated
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
node->count = 0;
}
// Set instances' promotion priorities and sort by highest priority first
g_list_foreach(rsc->children, set_instance_priority, rsc);
sort_promotable_instances(rsc);
// Choose the first N eligible instances to be promoted
g_list_foreach(rsc->children, set_instance_role, &promoted);
pe_rsc_info(rsc, "%s: Promoted %d instances of a possible %d",
rsc->id, promoted, pe__clone_promoted_max(rsc));
}
/*!
*
* \internal
* \brief Create actions for promotable clone instances
*
* \param[in] clone Promotable clone to create actions for
* \param[out] any_promoting Will be set true if any instance is promoting
* \param[out] any_demoting Will be set true if any instance is demoting
*/
static void
create_promotable_instance_actions(pe_resource_t *clone,
bool *any_promoting, bool *any_demoting)
{
for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
instance->cmds->create_actions(instance);
check_for_role_change(instance, any_demoting, any_promoting);
}
}
/*!
* \internal
* \brief Reset each promotable instance's resource priority
*
* Reset the priority of each instance of a promotable clone to the clone's
* priority (after promotion actions are scheduled, when instance priorities
* were repurposed as promotion scores).
*
* \param[in] clone Promotable clone to reset
*/
static void
reset_instance_priorities(pe_resource_t *clone)
{
for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
instance->priority = clone->priority;
}
}
/*!
* \internal
* \brief Create actions specific to promotable clones
*
* \param[in] clone Promotable clone to create actions for
*/
void
pcmk__create_promotable_actions(pe_resource_t *clone)
{
bool any_promoting = false;
bool any_demoting = false;
// Create actions for each clone instance individually
create_promotable_instance_actions(clone, &any_promoting, &any_demoting);
// Create pseudo-actions for clone as a whole
pe__create_promotable_pseudo_ops(clone, any_promoting, any_demoting);
// Undo our temporary repurposing of resource priority for instances
reset_instance_priorities(clone);
}
/*!
* \internal
* \brief Create internal orderings for a promotable clone's instances
*
* \param[in] clone Promotable clone instance to order
*/
void
pcmk__order_promotable_instances(pe_resource_t *clone)
{
pe_resource_t *previous = NULL; // Needed for ordered clones
pcmk__promotable_restart_ordering(clone);
for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
// Demote before promote
pcmk__order_resource_actions(instance, RSC_DEMOTE,
instance, RSC_PROMOTE,
pe_order_optional);
order_instance_promotion(clone, instance, previous);
order_instance_demotion(clone, instance, previous);
previous = instance;
}
}
/*!
* \internal
* \brief Update dependent's allowed nodes for colocation with promotable
*
* \param[in,out] dependent Dependent resource to update
* \param[in] primary_node Node where an instance of the primary will be
* \param[in] colocation Colocation constraint to apply
*/
static void
update_dependent_allowed_nodes(pe_resource_t *dependent,
const pe_node_t *primary_node,
const pcmk__colocation_t *colocation)
{
GHashTableIter iter;
pe_node_t *node = NULL;
const char *primary_value = NULL;
const char *attr = NULL;
if (colocation->score >= INFINITY) {
return; // Colocation is mandatory, so allowed node scores don't matter
}
// Get value of primary's colocation node attribute
attr = colocation->node_attribute;
if (attr == NULL) {
attr = CRM_ATTR_UNAME;
}
primary_value = pe_node_attribute_raw(primary_node, attr);
pe_rsc_trace(colocation->primary,
"Applying %s (%s with %s on %s by %s @%d) to %s",
colocation->id, colocation->dependent->id,
colocation->primary->id, pe__node_name(primary_node), attr,
colocation->score, dependent->id);
g_hash_table_iter_init(&iter, dependent->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
const char *dependent_value = pe_node_attribute_raw(node, attr);
if (pcmk__str_eq(primary_value, dependent_value, pcmk__str_casei)) {
pe_rsc_trace(colocation->primary, "%s: %d + %d",
pe__node_name(node), node->weight, colocation->score);
node->weight = pcmk__add_scores(node->weight, colocation->score);
}
}
}
/*!
* \brief Update dependent for a colocation with a promotable clone
*
* \param[in] primary Primary resource in the colocation
* \param[in,out] dependent Dependent resource in the colocation
* \param[in] colocation Colocation constraint to apply
*/
void
pcmk__update_dependent_with_promotable(const pe_resource_t *primary,
pe_resource_t *dependent,
const pcmk__colocation_t *colocation)
{
GList *affected_nodes = NULL;
/* Build a list of all nodes where an instance of the primary will be, and
* (for optional colocations) update the dependent's allowed node scores for
* each one.
*/
for (GList *iter = primary->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
pe_node_t *node = instance->fns->location(instance, NULL, FALSE);
if (node == NULL) {
continue;
}
if (instance->fns->state(instance, FALSE) == colocation->primary_role) {
update_dependent_allowed_nodes(dependent, node, colocation);
affected_nodes = g_list_prepend(affected_nodes, node);
}
}
/* For mandatory colocations, add the primary's node weight to the
* dependent's node weight for each affected node, and ban the dependent
* from all other nodes.
*
* However, skip this for promoted-with-promoted colocations, otherwise
* inactive dependent instances can't start (in the unpromoted role).
*/
if ((colocation->score >= INFINITY)
&& ((colocation->dependent_role != RSC_ROLE_PROMOTED)
|| (colocation->primary_role != RSC_ROLE_PROMOTED))) {
pe_rsc_trace(colocation->primary,
"Applying %s (mandatory %s with %s) to %s",
colocation->id, colocation->dependent->id,
colocation->primary->id, dependent->id);
node_list_exclude(dependent->allowed_nodes, affected_nodes,
TRUE);
}
g_list_free(affected_nodes);
}
/*!
* \internal
* \brief Update dependent priority for colocation with promotable
*
* \param[in] primary Primary resource in the colocation
* \param[in,out] dependent Dependent resource in the colocation
* \param[in] colocation Colocation constraint to apply
*/
void
pcmk__update_promotable_dependent_priority(const pe_resource_t *primary,
pe_resource_t *dependent,
const pcmk__colocation_t *colocation)
{
pe_resource_t *primary_instance = NULL;
// Look for a primary instance where dependent will be
primary_instance = find_compatible_child(dependent, primary,
colocation->primary_role, FALSE);
if (primary_instance != NULL) {
// Add primary instance's priority to dependent's
int new_priority = pcmk__add_scores(dependent->priority,
colocation->score);
pe_rsc_trace(colocation->primary,
"Applying %s (%s with %s) to %s priority (%s + %s = %s)",
colocation->id, colocation->dependent->id,
colocation->primary->id, dependent->id,
pcmk_readable_score(dependent->priority),
pcmk_readable_score(colocation->score),
pcmk_readable_score(new_priority));
dependent->priority = new_priority;
} else if (colocation->score >= INFINITY) {
// Mandatory colocation, but primary won't be here
pe_rsc_trace(colocation->primary,
"Applying %s (%s with %s) to %s: can't be promoted",
colocation->id, colocation->dependent->id,
colocation->primary->id, dependent->id);
dependent->priority = -INFINITY;
}
}
diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c
index ba430b2d3d..d9d7a3eae0 100644
--- a/lib/pacemaker/pcmk_sched_resource.c
+++ b/lib/pacemaker/pcmk_sched_resource.c
@@ -1,1093 +1,1089 @@
/*
* Copyright 2014-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include "libpacemaker_private.h"
// Resource allocation methods that vary by resource variant
static resource_alloc_functions_t allocation_methods[] = {
{
pcmk__primitive_assign,
pcmk__primitive_create_actions,
pcmk__probe_rsc_on_node,
pcmk__primitive_internal_constraints,
pcmk__primitive_apply_coloc_score,
- pcmk__add_colocated_node_scores,
pcmk__colocated_resources,
pcmk__apply_location,
pcmk__primitive_action_flags,
pcmk__update_ordered_actions,
pcmk__output_resource_actions,
pcmk__add_rsc_actions_to_graph,
pcmk__primitive_add_graph_meta,
pcmk__primitive_add_utilization,
pcmk__primitive_shutdown_lock,
},
{
pcmk__group_assign,
pcmk__group_create_actions,
pcmk__probe_rsc_on_node,
pcmk__group_internal_constraints,
pcmk__group_apply_coloc_score,
- pcmk__group_add_colocated_node_scores,
pcmk__group_colocated_resources,
pcmk__group_apply_location,
pcmk__group_action_flags,
pcmk__group_update_ordered_actions,
pcmk__output_resource_actions,
pcmk__add_rsc_actions_to_graph,
pcmk__noop_add_graph_meta,
pcmk__group_add_utilization,
pcmk__group_shutdown_lock,
},
{
pcmk__clone_allocate,
clone_create_actions,
clone_create_probe,
clone_internal_constraints,
pcmk__clone_apply_coloc_score,
- pcmk__add_colocated_node_scores,
pcmk__colocated_resources,
clone_rsc_location,
clone_action_flags,
pcmk__multi_update_actions,
pcmk__output_resource_actions,
clone_expand,
clone_append_meta,
pcmk__clone_add_utilization,
pcmk__clone_shutdown_lock,
},
{
pcmk__bundle_allocate,
pcmk__bundle_create_actions,
pcmk__bundle_create_probe,
pcmk__bundle_internal_constraints,
pcmk__bundle_apply_coloc_score,
- pcmk__add_colocated_node_scores,
pcmk__colocated_resources,
pcmk__bundle_rsc_location,
pcmk__bundle_action_flags,
pcmk__multi_update_actions,
pcmk__output_bundle_actions,
pcmk__bundle_expand,
pcmk__noop_add_graph_meta,
pcmk__bundle_add_utilization,
pcmk__bundle_shutdown_lock,
}
};
/*!
* \internal
* \brief Check whether a resource's agent standard, provider, or type changed
*
* \param[in] rsc Resource to check
* \param[in] node Node needing unfencing/restart if agent changed
* \param[in] rsc_entry XML with previously known agent information
* \param[in] active_on_node Whether \p rsc is active on \p node
*
* \return true if agent for \p rsc changed, otherwise false
*/
bool
pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *rsc_entry, bool active_on_node)
{
bool changed = false;
const char *attr_list[] = {
XML_ATTR_TYPE,
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER
};
for (int i = 0; i < PCMK__NELEM(attr_list); i++) {
const char *value = crm_element_value(rsc->xml, attr_list[i]);
const char *old_value = crm_element_value(rsc_entry, attr_list[i]);
if (!pcmk__str_eq(value, old_value, pcmk__str_none)) {
changed = true;
trigger_unfencing(rsc, node, "Device definition changed", NULL,
rsc->cluster);
if (active_on_node) {
crm_notice("Forcing restart of %s on %s "
"because %s changed from '%s' to '%s'",
rsc->id, pe__node_name(node), attr_list[i],
pcmk__s(old_value, ""), pcmk__s(value, ""));
}
}
}
if (changed && active_on_node) {
// Make sure the resource is restarted
custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE,
rsc->cluster);
pe__set_resource_flags(rsc, pe_rsc_start_pending);
}
return changed;
}
/*!
* \internal
* \brief Add resource (and any matching children) to list if it matches ID
*
* \param[in] result List to add resource to
* \param[in] rsc Resource to check
* \param[in] id ID to match
*
* \return (Possibly new) head of list
*/
static GList *
add_rsc_if_matching(GList *result, pe_resource_t *rsc, const char *id)
{
if ((strcmp(rsc->id, id) == 0)
|| ((rsc->clone_name != NULL) && (strcmp(rsc->clone_name, id) == 0))) {
result = g_list_prepend(result, rsc);
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
result = add_rsc_if_matching(result, child, id);
}
return result;
}
/*!
* \internal
* \brief Find all resources matching a given ID by either ID or clone name
*
* \param[in] id Resource ID to check
* \param[in] data_set Cluster working set
*
* \return List of all resources that match \p id
* \note The caller is responsible for freeing the return value with
* g_list_free().
*/
GList *
pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set)
{
GList *result = NULL;
CRM_CHECK((id != NULL) && (data_set != NULL), return NULL);
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
result = add_rsc_if_matching(result, (pe_resource_t *) iter->data, id);
}
return result;
}
/*!
* \internal
* \brief Set the variant-appropriate allocation methods for a resource
*
* \param[in] rsc Resource to set allocation methods for
* \param[in] ignored Only here so function can be used with g_list_foreach()
*/
static void
set_allocation_methods_for_rsc(pe_resource_t *rsc, void *ignored)
{
rsc->cmds = &allocation_methods[rsc->variant];
g_list_foreach(rsc->children, (GFunc) set_allocation_methods_for_rsc, NULL);
}
/*!
* \internal
* \brief Set the variant-appropriate allocation methods for all resources
*
* \param[in] data_set Cluster working set
*/
void
pcmk__set_allocation_methods(pe_working_set_t *data_set)
{
g_list_foreach(data_set->resources, (GFunc) set_allocation_methods_for_rsc,
NULL);
}
// Shared implementation of resource_alloc_functions_t:colocated_resources()
GList *
pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs)
{
GList *gIter = NULL;
if (orig_rsc == NULL) {
orig_rsc = rsc;
}
if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) {
return colocated_rscs;
}
pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s",
rsc->id, orig_rsc->id);
colocated_rscs = g_list_append(colocated_rscs, rsc);
// Follow colocations where this resource is the dependent resource
for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_resource_t *primary = constraint->primary;
if (primary == orig_rsc) {
continue; // Break colocation loop
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(rsc, primary, constraint,
true) == pcmk__coloc_affects_location)) {
colocated_rscs = primary->cmds->colocated_resources(primary,
orig_rsc,
colocated_rscs);
}
}
// Follow colocations where this resource is the primary resource
for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_resource_t *dependent = constraint->dependent;
if (dependent == orig_rsc) {
continue; // Break colocation loop
}
if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) {
continue; // We can't be sure whether dependent will be colocated
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(dependent, rsc, constraint,
true) == pcmk__coloc_affects_location)) {
colocated_rscs = dependent->cmds->colocated_resources(dependent,
orig_rsc,
colocated_rscs);
}
}
return colocated_rscs;
}
// No-op function for variants that don't need to implement add_graph_meta()
void
pcmk__noop_add_graph_meta(pe_resource_t *rsc, xmlNode *xml)
{
}
void
pcmk__output_resource_actions(pe_resource_t *rsc)
{
pcmk__output_t *out = rsc->cluster->priv;
pe_node_t *next = NULL;
pe_node_t *current = NULL;
if (rsc->children != NULL) {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
child->cmds->output_actions(child);
}
return;
}
next = rsc->allocated_to;
if (rsc->running_on) {
current = pe__current_node(rsc);
if (rsc->role == RSC_ROLE_STOPPED) {
/* This can occur when resources are being recovered because
* the current role can change in pcmk__primitive_create_actions()
*/
rsc->role = RSC_ROLE_STARTED;
}
}
if ((current == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
/* Don't log stopped orphans */
return;
}
out->message(out, "rsc-action", rsc, current, next);
}
/*!
* \internal
* \brief Assign a specified primitive resource to a node
*
* Assign a specified primitive resource to a specified node, if the node can
* run the resource (or unconditionally, if \p force is true). Mark the resource
* as no longer provisional. If the primitive can't be assigned (or \p chosen is
* NULL), unassign any previous assignment for it, set its next role to stopped,
* and update any existing actions scheduled for it. This is not done
* recursively for children, so it should be called only for primitives.
*
* \param[in] rsc Resource to assign
* \param[in] chosen Node to assign \p rsc to
* \param[in] force If true, assign to \p chosen even if unavailable
*
* \return true if \p rsc could be assigned, otherwise false
*
* \note Assigning a resource to the NULL node using this function is different
* from calling pcmk__unassign_resource(), in that it will also update any
* actions created for the resource.
*/
bool
pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen, bool force)
{
pcmk__output_t *out = rsc->cluster->priv;
CRM_ASSERT(rsc->variant == pe_native);
if (!force && (chosen != NULL)) {
if ((chosen->weight < 0)
// Allow the graph to assume that guest node connections will come up
|| (!pcmk__node_available(chosen, true, false)
&& !pe__is_guest_node(chosen))) {
crm_debug("All nodes for resource %s are unavailable, unclean or "
"shutting down (%s can%s run resources, with weight %d)",
rsc->id, pe__node_name(chosen),
(pcmk__node_available(chosen, true, false)? "" : "not"),
chosen->weight);
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability");
chosen = NULL;
}
}
pcmk__unassign_resource(rsc);
pe__clear_resource_flags(rsc, pe_rsc_provisional);
if (chosen == NULL) {
crm_debug("Could not allocate a node for %s", rsc->id);
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate");
for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *op = (pe_action_t *) iter->data;
crm_debug("Updating %s for allocation failure", op->uuid);
if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) {
pe__clear_action_flags(op, pe_action_optional);
} else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) {
pe__clear_action_flags(op, pe_action_runnable);
//pe__set_resource_flags(rsc, pe_rsc_block);
} else {
// Cancel recurring actions, unless for stopped state
const char *interval_ms_s = NULL;
const char *target_rc_s = NULL;
char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING);
interval_ms_s = g_hash_table_lookup(op->meta,
XML_LRM_ATTR_INTERVAL_MS);
target_rc_s = g_hash_table_lookup(op->meta,
XML_ATTR_TE_TARGET_RC);
if ((interval_ms_s != NULL)
&& !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none)
&& !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) {
pe__clear_action_flags(op, pe_action_runnable);
}
free(rc_stopped);
}
}
return false;
}
crm_debug("Assigning %s to %s", rsc->id, pe__node_name(chosen));
rsc->allocated_to = pe__copy_node(chosen);
chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc,
rsc);
chosen->details->num_resources++;
chosen->count++;
pcmk__consume_node_capacity(chosen->details->utilization, rsc);
if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) {
out->message(out, "resource-util", rsc, chosen, __func__);
}
return true;
}
/*!
* \internal
* \brief Assign a specified resource (of any variant) to a node
*
* Assign a specified resource and its children (if any) to a specified node, if
* the node can run the resource (or unconditionally, if \p force is true). Mark
* the resources as no longer provisional. If the resources can't be assigned
* (or \p chosen is NULL), unassign any previous assignments, set next role to
* stopped, and update any existing actions scheduled for them.
*
* \param[in] rsc Resource to assign
* \param[in] chosen Node to assign \p rsc to
* \param[in] force If true, assign to \p chosen even if unavailable
*
* \return true if \p rsc could be assigned, otherwise false
*
* \note Assigning a resource to the NULL node using this function is different
* from calling pcmk__unassign_resource(), in that it will also update any
* actions created for the resource.
*/
bool
pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force)
{
bool changed = false;
if (rsc->children == NULL) {
if (rsc->allocated_to != NULL) {
changed = true;
}
pcmk__finalize_assignment(rsc, node, force);
} else {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
changed |= pcmk__assign_resource(child_rsc, node, force);
}
}
return changed;
}
/*!
* \internal
* \brief Remove any assignment of a specified resource to a node
*
* If a specified resource has been assigned to a node, remove that assignment
* and mark the resource as provisional again. This is not done recursively for
* children, so it should be called only for primitives.
*
* \param[in] rsc Resource to unassign
*/
void
pcmk__unassign_resource(pe_resource_t *rsc)
{
pe_node_t *old = rsc->allocated_to;
if (old == NULL) {
return;
}
crm_info("Unassigning %s from %s", rsc->id, pe__node_name(old));
pe__set_resource_flags(rsc, pe_rsc_provisional);
rsc->allocated_to = NULL;
/* We're going to free the pe_node_t, but its details member is shared and
* will remain, so update that appropriately first.
*/
old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc,
rsc);
old->details->num_resources--;
pcmk__release_node_capacity(old->details->utilization, rsc);
free(old);
}
/*!
* \internal
* \brief Check whether a resource has reached its migration threshold on a node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
* \param[out] failed If the threshold has been reached, this will be set to
* the resource that failed (possibly a parent of \p rsc)
*
* \return true if the migration threshold has been reached, false otherwise
*/
bool
pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_resource_t **failed)
{
int fail_count, remaining_tries;
pe_resource_t *rsc_to_ban = rsc;
// Migration threshold of 0 means never force away
if (rsc->migration_threshold == 0) {
return false;
}
// If we're ignoring failures, also ignore the migration threshold
if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
return false;
}
// If there are no failures, there's no need to force away
fail_count = pe_get_failcount(node, rsc, NULL,
pe_fc_effective|pe_fc_fillers, NULL,
rsc->cluster);
if (fail_count <= 0) {
return false;
}
// If failed resource is anonymous clone instance, we'll force clone away
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
rsc_to_ban = uber_parent(rsc);
}
// How many more times recovery will be tried on this node
remaining_tries = rsc->migration_threshold - fail_count;
if (remaining_tries <= 0) {
crm_warn("%s cannot run on %s due to reaching migration threshold "
"(clean up resource to allow again)"
CRM_XS " failures=%d migration-threshold=%d",
rsc_to_ban->id, pe__node_name(node), fail_count,
rsc->migration_threshold);
if (failed != NULL) {
*failed = rsc_to_ban;
}
return true;
}
crm_info("%s can fail %d more time%s on "
"%s before reaching migration threshold (%d)",
rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries),
pe__node_name(node), rsc->migration_threshold);
return false;
}
static void *
convert_const_pointer(const void *ptr)
{
/* Worst function ever */
return (void *)ptr;
}
/*!
* \internal
* \brief Get a node's weight
*
* \param[in] node Unweighted node to check (for node ID)
* \param[in] nodes List of weighted nodes to look for \p node in
*
* \return Node's weight, or -INFINITY if not found
*/
static int
get_node_weight(pe_node_t *node, GHashTable *nodes)
{
pe_node_t *weighted_node = NULL;
if ((node != NULL) && (nodes != NULL)) {
weighted_node = g_hash_table_lookup(nodes, node->details->id);
}
return (weighted_node == NULL)? -INFINITY : weighted_node->weight;
}
/*!
* \internal
* \brief Compare two resources according to which should be allocated first
*
* \param[in] a First resource to compare
* \param[in] b Second resource to compare
* \param[in] data Sorted list of all nodes in cluster
*
* \return -1 if \p a should be allocated before \b, 0 if they are equal,
* or +1 if \p a should be allocated after \b
*/
static gint
cmp_resources(gconstpointer a, gconstpointer b, gpointer data)
{
const pe_resource_t *resource1 = a;
const pe_resource_t *resource2 = b;
GList *nodes = (GList *) data;
int rc = 0;
int r1_weight = -INFINITY;
int r2_weight = -INFINITY;
pe_node_t *r1_node = NULL;
pe_node_t *r2_node = NULL;
GHashTable *r1_nodes = NULL;
GHashTable *r2_nodes = NULL;
const char *reason = NULL;
// Resources with highest priority should be allocated first
reason = "priority";
r1_weight = resource1->priority;
r2_weight = resource2->priority;
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
// We need nodes to make any other useful comparisons
reason = "no node list";
if (nodes == NULL) {
goto done;
}
// Calculate and log node weights
pcmk__add_colocated_node_scores(convert_const_pointer(resource1),
resource1->id, &r1_nodes, NULL, 1,
pcmk__coloc_select_this_with);
pcmk__add_colocated_node_scores(convert_const_pointer(resource2),
resource2->id, &r2_nodes, NULL, 1,
pcmk__coloc_select_this_with);
pe__show_node_weights(true, NULL, resource1->id, r1_nodes,
resource1->cluster);
pe__show_node_weights(true, NULL, resource2->id, r2_nodes,
resource2->cluster);
// The resource with highest score on its current node goes first
reason = "current location";
if (resource1->running_on != NULL) {
r1_node = pe__current_node(resource1);
}
if (resource2->running_on != NULL) {
r2_node = pe__current_node(resource2);
}
r1_weight = get_node_weight(r1_node, r1_nodes);
r2_weight = get_node_weight(r2_node, r2_nodes);
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
// Otherwise a higher weight on any node will do
reason = "score";
for (GList *iter = nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
r1_weight = get_node_weight(node, r1_nodes);
r2_weight = get_node_weight(node, r2_nodes);
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
}
done:
crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s",
resource1->id, r1_weight,
((r1_node == NULL)? "" : " on "),
((r1_node == NULL)? "" : r1_node->details->id),
((rc < 0)? '>' : ((rc > 0)? '<' : '=')),
resource2->id, r2_weight,
((r2_node == NULL)? "" : " on "),
((r2_node == NULL)? "" : r2_node->details->id),
reason);
if (r1_nodes != NULL) {
g_hash_table_destroy(r1_nodes);
}
if (r2_nodes != NULL) {
g_hash_table_destroy(r2_nodes);
}
return rc;
}
/*!
* \internal
* \brief Sort resources in the order they should be allocated to nodes
*
* \param[in] data_set Cluster working set
*/
void
pcmk__sort_resources(pe_working_set_t *data_set)
{
GList *nodes = g_list_copy(data_set->nodes);
nodes = pcmk__sort_nodes(nodes, NULL);
data_set->resources = g_list_sort_with_data(data_set->resources,
cmp_resources, nodes);
g_list_free(nodes);
}
/*!
* \internal
* \brief Create a hash table with a single node in it
*
* \param[in] node Node to copy into new table
*
* \return Newly created hash table containing a copy of \p node
* \note The caller is responsible for freeing the result with
* g_hash_table_destroy().
*/
static GHashTable *
new_node_table(pe_node_t *node)
{
GHashTable *table = pcmk__strkey_table(NULL, free);
node = pe__copy_node(node);
g_hash_table_insert(table, (gpointer) node->details->id, node);
return table;
}
/*!
* \internal
* \brief Apply a resource's parent's colocation scores to a node table
*
* \param[in] rsc Resource whose colocations should be applied
* \param[in,out] nodes Node table to apply colocations to
*/
static void
apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes)
{
GList *iter = NULL;
pcmk__colocation_t *colocation = NULL;
for (iter = rsc->parent->rsc_cons; iter != NULL; iter = iter->next) {
colocation = (pcmk__colocation_t *) iter->data;
pcmk__add_colocated_node_scores(colocation->primary, rsc->id, nodes,
colocation->node_attribute,
colocation->score / (float) INFINITY,
pcmk__coloc_select_default);
}
for (iter = rsc->parent->rsc_cons_lhs; iter != NULL; iter = iter->next) {
colocation = (pcmk__colocation_t *) iter->data;
if (!pcmk__colocation_has_influence(colocation, rsc)) {
continue;
}
pcmk__add_colocated_node_scores(colocation->dependent, rsc->id, nodes,
colocation->node_attribute,
colocation->score / (float) INFINITY,
pcmk__coloc_select_nonnegative);
}
}
/*!
* \internal
* \brief Compare clone or bundle instances based on colocation scores
*
* Determine the relative order in which two clone or bundle instances should be
* assigned to nodes, considering the scores of colocation constraints directly
* or indirectly involving them.
*
* \param[in] instance1 First instance to compare
* \param[in] instance2 Second instance to compare
*
* \return A negative number if \p instance1 should be assigned first,
* a positive number if \p instance2 should be assigned first,
* or 0 if assignment order doesn't matter
*/
static int
cmp_instance_by_colocation(const pe_resource_t *instance1,
const pe_resource_t *instance2)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = pe__current_node(instance1);
pe_node_t *current_node2 = pe__current_node(instance2);
GHashTable *colocated_scores1 = NULL;
GHashTable *colocated_scores2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
&& (instance2 != NULL) && (instance2->parent != NULL)
&& (current_node1 != NULL) && (current_node2 != NULL));
// Create node tables initialized with each node
colocated_scores1 = new_node_table(current_node1);
colocated_scores2 = new_node_table(current_node2);
// Apply parental colocations
apply_parent_colocations(instance1, &colocated_scores1);
apply_parent_colocations(instance2, &colocated_scores2);
// Find original nodes again, with scores updated for colocations
node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
// Compare nodes by updated scores
if (node1->weight < node2->weight) {
crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
instance1->id, node1->weight, pe__node_name(node1),
instance2->id, node2->weight, pe__node_name(node2));
rc = 1;
} else if (node1->weight > node2->weight) {
crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
instance1->id, node1->weight, pe__node_name(node1),
instance2->id, node2->weight, pe__node_name(node2));
rc = -1;
}
g_hash_table_destroy(colocated_scores1);
g_hash_table_destroy(colocated_scores2);
return rc;
}
/*!
* \internal
* \brief Check whether a resource or any of its children are failed
*
* \param[in] rsc Resource to check
*
* \return true if \p rsc or any of its children are failed, otherwise false
*/
static bool
did_fail(const pe_resource_t * rsc)
{
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
return true;
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
if (did_fail((pe_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node is allowed to run a resource
*
* \param[in] rsc Resource to check
* \param[in,out] node Node to check (will be set NULL if not allowed)
*
* \return true if *node is either NULL or allowed for \p rsc, otherwise false
*/
static bool
node_is_allowed(const pe_resource_t *rsc, pe_node_t **node)
{
if (*node != NULL) {
pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes,
(*node)->details->id);
if ((allowed == NULL) || (allowed->weight < 0)) {
pe_rsc_trace(rsc, "%s: current location (%s) is unavailable",
rsc->id, pe__node_name(*node));
*node = NULL;
return false;
}
}
return true;
}
/*!
* \internal
* \brief Compare two clone or bundle instances' instance numbers
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a's instance number is lower,
* a positive number if \p b's instance number is lower,
* or 0 if their instance numbers are the same
*/
gint
pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
{
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
char *div1 = NULL;
char *div2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
// Clone numbers are after a colon, bundle numbers after a dash
div1 = strrchr(instance1->id, ':');
if (div1 == NULL) {
div1 = strrchr(instance1->id, '-');
}
div2 = strrchr(instance2->id, ':');
if (div2 == NULL) {
div2 = strrchr(instance2->id, '-');
}
CRM_ASSERT((div1 != NULL) && (div2 != NULL));
return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
}
/*!
* \internal
* \brief Compare clone or bundle instances according to assignment order
*
* Compare two clone or bundle instances according to the order they should be
* assigned to nodes, preferring (in order):
*
* - Active instance that is less multiply active
* - Instance that is not active on a disallowed node
* - Instance with higher configured priority
* - Active instance whose current node can run resources
* - Active instance whose parent is allowed on current node
* - Active instance whose current node has fewer other instances
* - Active instance
* - Failed instance
* - Instance whose colocations result in higher score on current node
* - Instance with lower ID in lexicographic order
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a should be assigned first,
* a positive number if \p b should be assigned first,
* or 0 if assignment order doesn't matter
*/
gint
pcmk__cmp_instance(gconstpointer a, gconstpointer b)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
unsigned int nnodes1 = 0;
unsigned int nnodes2 = 0;
bool can1 = true;
bool can2 = true;
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
node1 = pe__find_active_on(instance1, &nnodes1, NULL);
node2 = pe__find_active_on(instance2, &nnodes2, NULL);
/* If both instances are running and at least one is multiply
* active, prefer instance that's running on fewer nodes.
*/
if ((nnodes1 > 0) && (nnodes2 > 0)) {
if (nnodes1 < nnodes2) {
crm_trace("Assign %s (active on %d) before %s (active on %d): "
"less multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return -1;
} else if (nnodes1 > nnodes2) {
crm_trace("Assign %s (active on %d) after %s (active on %d): "
"more multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return 1;
}
}
/* An instance that is either inactive or active on an allowed node is
* preferred over an instance that is active on a no-longer-allowed node.
*/
can1 = node_is_allowed(instance1, &node1);
can2 = node_is_allowed(instance2, &node2);
if (can1 && !can2) {
crm_trace("Assign %s before %s: not active on a disallowed node",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: active on a disallowed node",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher configured priority
if (instance1->priority > instance2->priority) {
crm_trace("Assign %s before %s: priority (%d > %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return -1;
} else if (instance1->priority < instance2->priority) {
crm_trace("Assign %s after %s: priority (%d < %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return 1;
}
// Prefer active instance
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: inactive",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node can run resources
can1 = pcmk__node_available(node1, false, false);
can2 = pcmk__node_available(node2, false, false);
if (can1 && !can2) {
crm_trace("Assign %s before %s: current node can run resources",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: current node can't run resources",
instance1->id, instance2->id);
return 1;
}
// Prefer instance whose parent is allowed to run on instance's current node
node1 = pcmk__top_allowed_node(instance1, node1);
node2 = pcmk__top_allowed_node(instance2, node2);
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: "
"parent not allowed on either instance's current node",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: parent not allowed on current node",
instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: parent allowed on current node",
instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node is running fewer other instances
if (node1->count < node2->count) {
crm_trace("Assign %s before %s: fewer active instances on current node",
instance1->id, instance2->id);
return -1;
} else if (node1->count > node2->count) {
crm_trace("Assign %s after %s: more active instances on current node",
instance1->id, instance2->id);
return 1;
}
// Prefer failed instance
can1 = did_fail(instance1);
can2 = did_fail(instance2);
if (!can1 && can2) {
crm_trace("Assign %s before %s: failed", instance1->id, instance2->id);
return -1;
} else if (can1 && !can2) {
crm_trace("Assign %s after %s: not failed",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher cumulative colocation score on current node
rc = cmp_instance_by_colocation(instance1, instance2);
if (rc != 0) {
return rc;
}
// Prefer instance with lower instance number
rc = pcmk__cmp_instance_number(instance1, instance2);
if (rc < 0) {
crm_trace("Assign %s before %s: instance number",
instance1->id, instance2->id);
} else if (rc > 0) {
crm_trace("Assign %s after %s: instance number",
instance1->id, instance2->id);
} else {
crm_trace("No assignment preference for %s vs. %s",
instance1->id, instance2->id);
}
return rc;
}
diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c
index d8a4589af6..7ca1c45998 100644
--- a/lib/pengine/bundle.c
+++ b/lib/pengine/bundle.c
@@ -1,1837 +1,1829 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define PE__VARIANT_BUNDLE 1
#include "./variant.h"
static char *
next_ip(const char *last_ip)
{
unsigned int oct1 = 0;
unsigned int oct2 = 0;
unsigned int oct3 = 0;
unsigned int oct4 = 0;
int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4);
if (rc != 4) {
/*@ TODO check for IPv6 */
return NULL;
} else if (oct3 > 253) {
return NULL;
} else if (oct4 > 253) {
++oct3;
oct4 = 1;
} else {
++oct4;
}
return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4);
}
static void
allocate_ip(pe__bundle_variant_data_t *data, pe__bundle_replica_t *replica,
GString *buffer)
{
if(data->ip_range_start == NULL) {
return;
} else if(data->ip_last) {
replica->ipaddr = next_ip(data->ip_last);
} else {
replica->ipaddr = strdup(data->ip_range_start);
}
data->ip_last = replica->ipaddr;
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
if (data->add_host) {
g_string_append_printf(buffer, " --add-host=%s-%d:%s",
data->prefix, replica->offset,
replica->ipaddr);
} else {
g_string_append_printf(buffer, " --hosts-entry=%s=%s-%d",
replica->ipaddr, data->prefix,
replica->offset);
}
break;
case PE__CONTAINER_AGENT_RKT:
g_string_append_printf(buffer, " --hosts-entry=%s=%s-%d",
replica->ipaddr, data->prefix,
replica->offset);
break;
default: // PE__CONTAINER_AGENT_UNKNOWN
break;
}
}
static xmlNode *
create_resource(const char *name, const char *provider, const char *kind)
{
xmlNode *rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
crm_xml_add(rsc, XML_ATTR_ID, name);
crm_xml_add(rsc, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF);
crm_xml_add(rsc, XML_AGENT_ATTR_PROVIDER, provider);
crm_xml_add(rsc, XML_ATTR_TYPE, kind);
return rsc;
}
/*!
* \internal
* \brief Check whether cluster can manage resource inside container
*
* \param[in] data Container variant data
*
* \return TRUE if networking configuration is acceptable, FALSE otherwise
*
* \note The resource is manageable if an IP range or control port has been
* specified. If a control port is used without an IP range, replicas per
* host must be 1.
*/
static bool
valid_network(pe__bundle_variant_data_t *data)
{
if(data->ip_range_start) {
return TRUE;
}
if(data->control_port) {
if(data->nreplicas_per_host > 1) {
pe_err("Specifying the 'control-port' for %s requires 'replicas-per-host=1'", data->prefix);
data->nreplicas_per_host = 1;
// @TODO to be sure: pe__clear_resource_flags(rsc, pe_rsc_unique);
}
return TRUE;
}
return FALSE;
}
static int
create_ip_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data,
pe__bundle_replica_t *replica)
{
if(data->ip_range_start) {
char *id = NULL;
xmlNode *xml_ip = NULL;
xmlNode *xml_obj = NULL;
id = crm_strdup_printf("%s-ip-%s", data->prefix, replica->ipaddr);
crm_xml_sanitize_id(id);
xml_ip = create_resource(id, "heartbeat", "IPaddr2");
free(id);
xml_obj = create_xml_node(xml_ip, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d",
data->prefix, replica->offset);
crm_create_nvpair_xml(xml_obj, NULL, "ip", replica->ipaddr);
if(data->host_network) {
crm_create_nvpair_xml(xml_obj, NULL, "nic", data->host_network);
}
if(data->host_netmask) {
crm_create_nvpair_xml(xml_obj, NULL,
"cidr_netmask", data->host_netmask);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", "32");
}
xml_obj = create_xml_node(xml_ip, "operations");
crm_create_op_xml(xml_obj, ID(xml_ip), "monitor", "60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
if (pe__unpack_resource(xml_ip, &replica->ip, parent,
parent->cluster) != pcmk_rc_ok) {
return pcmk_rc_unpack_error;
}
parent->children = g_list_append(parent->children, replica->ip);
}
return pcmk_rc_ok;
}
static const char*
container_agent_str(enum pe__container_agent t)
{
switch (t) {
case PE__CONTAINER_AGENT_DOCKER: return PE__CONTAINER_AGENT_DOCKER_S;
case PE__CONTAINER_AGENT_RKT: return PE__CONTAINER_AGENT_RKT_S;
case PE__CONTAINER_AGENT_PODMAN: return PE__CONTAINER_AGENT_PODMAN_S;
default: // PE__CONTAINER_AGENT_UNKNOWN
break;
}
return PE__CONTAINER_AGENT_UNKNOWN_S;
}
static int
create_container_resource(pe_resource_t *parent,
const pe__bundle_variant_data_t *data,
pe__bundle_replica_t *replica)
{
char *id = NULL;
xmlNode *xml_container = NULL;
xmlNode *xml_obj = NULL;
// Agent-specific
const char *hostname_opt = NULL;
const char *env_opt = NULL;
const char *agent_str = NULL;
int volid = 0; // rkt-only
GString *buffer = NULL;
GString *dbuffer = NULL;
// Where syntax differences are drop-in replacements, set them now
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
hostname_opt = "-h ";
env_opt = "-e ";
break;
case PE__CONTAINER_AGENT_RKT:
hostname_opt = "--hostname=";
env_opt = "--environment=";
break;
default: // PE__CONTAINER_AGENT_UNKNOWN
return pcmk_rc_unpack_error;
}
agent_str = container_agent_str(data->agent_type);
buffer = g_string_sized_new(4096);
id = crm_strdup_printf("%s-%s-%d", data->prefix, agent_str,
replica->offset);
crm_xml_sanitize_id(id);
xml_container = create_resource(id, "heartbeat", agent_str);
free(id);
xml_obj = create_xml_node(xml_container, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset);
crm_create_nvpair_xml(xml_obj, NULL, "image", data->image);
crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", XML_BOOLEAN_TRUE);
crm_create_nvpair_xml(xml_obj, NULL, "force_kill", XML_BOOLEAN_FALSE);
crm_create_nvpair_xml(xml_obj, NULL, "reuse", XML_BOOLEAN_FALSE);
if (data->agent_type == PE__CONTAINER_AGENT_DOCKER) {
g_string_append(buffer, " --restart=no");
}
/* Set a container hostname only if we have an IP to map it to. The user can
* set -h or --uts=host themselves if they want a nicer name for logs, but
* this makes applications happy who need their hostname to match the IP
* they bind to.
*/
if (data->ip_range_start != NULL) {
g_string_append_printf(buffer, " %s%s-%d", hostname_opt, data->prefix,
replica->offset);
}
pcmk__g_strcat(buffer, " ", env_opt, "PCMK_stderr=1", NULL);
if (data->container_network != NULL) {
pcmk__g_strcat(buffer, " --net=", data->container_network, NULL);
}
if (data->control_port != NULL) {
pcmk__g_strcat(buffer, " ", env_opt, "PCMK_remote_port=",
data->control_port, NULL);
} else {
g_string_append_printf(buffer, " %sPCMK_remote_port=%d", env_opt,
DEFAULT_REMOTE_PORT);
}
for (GList *iter = data->mounts; iter != NULL; iter = iter->next) {
pe__bundle_mount_t *mount = (pe__bundle_mount_t *) iter->data;
char *source = NULL;
if (pcmk_is_set(mount->flags, pe__bundle_mount_subdir)) {
source = crm_strdup_printf("%s/%s-%d", mount->source, data->prefix,
replica->offset);
pcmk__add_separated_word(&dbuffer, 1024, source, ",");
}
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
pcmk__g_strcat(buffer,
" -v ", pcmk__s(source, mount->source),
":", mount->target, NULL);
if (mount->options != NULL) {
pcmk__g_strcat(buffer, ":", mount->options, NULL);
}
break;
case PE__CONTAINER_AGENT_RKT:
g_string_append_printf(buffer,
" --volume vol%d,kind=host,"
"source=%s%s%s "
"--mount volume=vol%d,target=%s",
volid, pcmk__s(source, mount->source),
(mount->options != NULL)? "," : "",
pcmk__s(mount->options, ""),
volid, mount->target);
volid++;
break;
default:
break;
}
free(source);
}
for (GList *iter = data->ports; iter != NULL; iter = iter->next) {
pe__bundle_port_t *port = (pe__bundle_port_t *) iter->data;
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
if (replica->ipaddr != NULL) {
pcmk__g_strcat(buffer,
" -p ", replica->ipaddr, ":", port->source,
":", port->target, NULL);
} else if (!pcmk__str_eq(data->container_network, "host",
pcmk__str_none)) {
// No need to do port mapping if net == host
pcmk__g_strcat(buffer,
" -p ", port->source, ":", port->target,
NULL);
}
break;
case PE__CONTAINER_AGENT_RKT:
if (replica->ipaddr != NULL) {
pcmk__g_strcat(buffer,
" --port=", port->target,
":", replica->ipaddr, ":", port->source,
NULL);
} else {
pcmk__g_strcat(buffer,
" --port=", port->target, ":", port->source,
NULL);
}
break;
default:
break;
}
}
/* @COMPAT: We should use pcmk__add_word() here, but we can't yet, because
* it would cause restarts during rolling upgrades.
*
* In a previous version of the container resource creation logic, if
* data->launcher_options is not NULL, we append
* (" %s", data->launcher_options) even if data->launcher_options is an
* empty string. Likewise for data->container_host_options. Using
*
* pcmk__add_word(buffer, 0, data->launcher_options)
*
* removes that extra trailing space, causing a resource definition change.
*/
if (data->launcher_options != NULL) {
pcmk__g_strcat(buffer, " ", data->launcher_options, NULL);
}
if (data->container_host_options != NULL) {
pcmk__g_strcat(buffer, " ", data->container_host_options, NULL);
}
crm_create_nvpair_xml(xml_obj, NULL, "run_opts",
(const char *) buffer->str);
g_string_free(buffer, TRUE);
crm_create_nvpair_xml(xml_obj, NULL, "mount_points",
(dbuffer != NULL)? (const char *) dbuffer->str : "");
if (dbuffer != NULL) {
g_string_free(dbuffer, TRUE);
}
if (replica->child != NULL) {
if (data->container_command != NULL) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
data->container_command);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
SBIN_DIR "/pacemaker-remoted");
}
/* TODO: Allow users to specify their own?
*
* We just want to know if the container is alive; we'll monitor the
* child independently.
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
#if 0
/* @TODO Consider supporting the use case where we can start and stop
* resources, but not proxy local commands (such as setting node
* attributes), by running the local executor in stand-alone mode.
* However, this would probably be better done via ACLs as with other
* Pacemaker Remote nodes.
*/
} else if ((child != NULL) && data->untrusted) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
CRM_DAEMON_DIR "/pacemaker-execd");
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd",
CRM_DAEMON_DIR "/pacemaker/cts-exec-helper -c poke");
#endif
} else {
if (data->container_command != NULL) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
data->container_command);
}
/* TODO: Allow users to specify their own?
*
* We don't know what's in the container, so we just want to know if it
* is alive.
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
}
xml_obj = create_xml_node(xml_container, "operations");
crm_create_op_xml(xml_obj, ID(xml_container), "monitor", "60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
if (pe__unpack_resource(xml_container, &replica->container, parent,
parent->cluster) != pcmk_rc_ok) {
return pcmk_rc_unpack_error;
}
parent->children = g_list_append(parent->children, replica->container);
return pcmk_rc_ok;
}
/*!
* \brief Ban a node from a resource's (and its children's) allowed nodes list
*
* \param[in,out] rsc Resource to modify
* \param[in] uname Name of node to ban
*/
static void
disallow_node(pe_resource_t *rsc, const char *uname)
{
gpointer match = g_hash_table_lookup(rsc->allowed_nodes, uname);
if (match) {
((pe_node_t *) match)->weight = -INFINITY;
((pe_node_t *) match)->rsc_discover_mode = pe_discover_never;
}
if (rsc->children) {
g_list_foreach(rsc->children, (GFunc) disallow_node, (gpointer) uname);
}
}
static int
create_remote_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data,
pe__bundle_replica_t *replica)
{
if (replica->child && valid_network(data)) {
GHashTableIter gIter;
pe_node_t *node = NULL;
xmlNode *xml_remote = NULL;
char *id = crm_strdup_printf("%s-%d", data->prefix, replica->offset);
char *port_s = NULL;
const char *uname = NULL;
const char *connect_name = NULL;
if (pe_find_resource(parent->cluster->resources, id) != NULL) {
free(id);
// The biggest hammer we have
id = crm_strdup_printf("pcmk-internal-%s-remote-%d",
replica->child->id, replica->offset);
//@TODO return error instead of asserting?
CRM_ASSERT(pe_find_resource(parent->cluster->resources,
id) == NULL);
}
/* REMOTE_CONTAINER_HACK: Using "#uname" as the server name when the
* connection does not have its own IP is a magic string that we use to
* support nested remotes (i.e. a bundle running on a remote node).
*/
connect_name = (replica->ipaddr? replica->ipaddr : "#uname");
if (data->control_port == NULL) {
port_s = pcmk__itoa(DEFAULT_REMOTE_PORT);
}
/* This sets replica->container as replica->remote's container, which is
* similar to what happens with guest nodes. This is how the scheduler
* knows that the bundle node is fenced by recovering the container, and
* that remote should be ordered relative to the container.
*/
xml_remote = pe_create_remote_xml(NULL, id, replica->container->id,
NULL, NULL, NULL,
connect_name, (data->control_port?
data->control_port : port_s));
free(port_s);
/* Abandon our created ID, and pull the copy from the XML, because we
* need something that will get freed during data set cleanup to use as
* the node ID and uname.
*/
free(id);
id = NULL;
uname = ID(xml_remote);
/* Ensure a node has been created for the guest (it may have already
* been, if it has a permanent node attribute), and ensure its weight is
* -INFINITY so no other resources can run on it.
*/
node = pe_find_node(parent->cluster->nodes, uname);
if (node == NULL) {
node = pe_create_node(uname, uname, "remote", "-INFINITY",
parent->cluster);
} else {
node->weight = -INFINITY;
}
node->rsc_discover_mode = pe_discover_never;
/* unpack_remote_nodes() ensures that each remote node and guest node
* has a pe_node_t entry. Ideally, it would do the same for bundle nodes.
* Unfortunately, a bundle has to be mostly unpacked before it's obvious
* what nodes will be needed, so we do it just above.
*
* Worse, that means that the node may have been utilized while
* unpacking other resources, without our weight correction. The most
* likely place for this to happen is when pe__unpack_resource() calls
* resource_location() to set a default score in symmetric clusters.
* This adds a node *copy* to each resource's allowed nodes, and these
* copies will have the wrong weight.
*
* As a hacky workaround, fix those copies here.
*
* @TODO Possible alternative: ensure bundles are unpacked before other
* resources, so the weight is correct before any copies are made.
*/
g_list_foreach(parent->cluster->resources, (GFunc) disallow_node,
(gpointer) uname);
replica->node = pe__copy_node(node);
replica->node->weight = 500;
replica->node->rsc_discover_mode = pe_discover_exclusive;
/* Ensure the node shows up as allowed and with the correct discovery set */
if (replica->child->allowed_nodes != NULL) {
g_hash_table_destroy(replica->child->allowed_nodes);
}
replica->child->allowed_nodes = pcmk__strkey_table(NULL, free);
g_hash_table_insert(replica->child->allowed_nodes,
(gpointer) replica->node->details->id,
pe__copy_node(replica->node));
{
pe_node_t *copy = pe__copy_node(replica->node);
copy->weight = -INFINITY;
g_hash_table_insert(replica->child->parent->allowed_nodes,
(gpointer) replica->node->details->id, copy);
}
if (pe__unpack_resource(xml_remote, &replica->remote, parent,
parent->cluster) != pcmk_rc_ok) {
return pcmk_rc_unpack_error;
}
g_hash_table_iter_init(&gIter, replica->remote->allowed_nodes);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&node)) {
if (pe__is_guest_or_remote_node(node)) {
/* Remote resources can only run on 'normal' cluster node */
node->weight = -INFINITY;
}
}
replica->node->details->remote_rsc = replica->remote;
// Ensure pe__is_guest_node() functions correctly immediately
replica->remote->container = replica->container;
/* A bundle's #kind is closer to "container" (guest node) than the
* "remote" set by pe_create_node().
*/
g_hash_table_insert(replica->node->details->attrs,
strdup(CRM_ATTR_KIND), strdup("container"));
/* One effect of this is that setup_container() will add
* replica->remote to replica->container's fillers, which will make
* pe__resource_contains_guest_node() true for replica->container.
*
* replica->child does NOT get added to replica->container's fillers.
* The only noticeable effect if it did would be for its fail count to
* be taken into account when checking replica->container's migration
* threshold.
*/
parent->children = g_list_append(parent->children, replica->remote);
}
return pcmk_rc_ok;
}
static int
create_replica_resources(pe_resource_t *parent, pe__bundle_variant_data_t *data,
pe__bundle_replica_t *replica)
{
int rc = pcmk_rc_ok;
rc = create_container_resource(parent, data, replica);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = create_ip_resource(parent, data, replica);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = create_remote_resource(parent, data, replica);
if (rc != pcmk_rc_ok) {
return rc;
}
if ((replica->child != NULL) && (replica->ipaddr != NULL)) {
add_hash_param(replica->child->meta, "external-ip", replica->ipaddr);
}
if (replica->remote != NULL) {
/*
* Allow the remote connection resource to be allocated to a
* different node than the one on which the container is active.
*
* This makes it possible to have Pacemaker Remote nodes running
* containers with pacemaker-remoted inside in order to start
* services inside those containers.
*/
pe__set_resource_flags(replica->remote, pe_rsc_allow_remote_remotes);
}
return rc;
}
static void
mount_add(pe__bundle_variant_data_t *bundle_data, const char *source,
const char *target, const char *options, uint32_t flags)
{
pe__bundle_mount_t *mount = calloc(1, sizeof(pe__bundle_mount_t));
CRM_ASSERT(mount != NULL);
mount->source = strdup(source);
mount->target = strdup(target);
pcmk__str_update(&mount->options, options);
mount->flags = flags;
bundle_data->mounts = g_list_append(bundle_data->mounts, mount);
}
static void
mount_free(pe__bundle_mount_t *mount)
{
free(mount->source);
free(mount->target);
free(mount->options);
free(mount);
}
static void
port_free(pe__bundle_port_t *port)
{
free(port->source);
free(port->target);
free(port);
}
static pe__bundle_replica_t *
replica_for_remote(pe_resource_t *remote)
{
pe_resource_t *top = remote;
pe__bundle_variant_data_t *bundle_data = NULL;
if (top == NULL) {
return NULL;
}
while (top->parent != NULL) {
top = top->parent;
}
get_bundle_variant_data(bundle_data, top);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (replica->remote == remote) {
return replica;
}
}
CRM_LOG_ASSERT(FALSE);
return NULL;
}
bool
pe__bundle_needs_remote_name(pe_resource_t *rsc, pe_working_set_t *data_set)
{
const char *value;
GHashTable *params = NULL;
if (rsc == NULL) {
return false;
}
// Use NULL node since pcmk__bundle_expand() uses that to set value
params = pe_rsc_params(rsc, NULL, data_set);
value = g_hash_table_lookup(params, XML_RSC_ATTR_REMOTE_RA_ADDR);
return pcmk__str_eq(value, "#uname", pcmk__str_casei)
&& xml_contains_remote_node(rsc->xml);
}
const char *
pe__add_bundle_remote_name(pe_resource_t *rsc, pe_working_set_t *data_set,
xmlNode *xml, const char *field)
{
// REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
pe_node_t *node = NULL;
pe__bundle_replica_t *replica = NULL;
if (!pe__bundle_needs_remote_name(rsc, data_set)) {
return NULL;
}
replica = replica_for_remote(rsc);
if (replica == NULL) {
return NULL;
}
node = replica->container->allocated_to;
if (node == NULL) {
/* If it won't be running anywhere after the
* transition, go with where it's running now.
*/
node = pe__current_node(replica->container);
}
if(node == NULL) {
crm_trace("Cannot determine address for bundle connection %s", rsc->id);
return NULL;
}
crm_trace("Setting address for bundle connection %s to bundle host %s",
rsc->id, pe__node_name(node));
if(xml != NULL && field != NULL) {
crm_xml_add(xml, field, node->details->uname);
}
return node->details->uname;
}
#define pe__set_bundle_mount_flags(mount_xml, flags, flags_to_set) do { \
flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Bundle mount", ID(mount_xml), flags, \
(flags_to_set), #flags_to_set); \
} while (0)
gboolean
pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set)
{
const char *value = NULL;
xmlNode *xml_obj = NULL;
xmlNode *xml_resource = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
bool need_log_mount = TRUE;
CRM_ASSERT(rsc != NULL);
pe_rsc_trace(rsc, "Processing resource %s...", rsc->id);
bundle_data = calloc(1, sizeof(pe__bundle_variant_data_t));
rsc->variant_opaque = bundle_data;
bundle_data->prefix = strdup(rsc->id);
xml_obj = first_named_child(rsc->xml, PE__CONTAINER_AGENT_DOCKER_S);
if (xml_obj != NULL) {
bundle_data->agent_type = PE__CONTAINER_AGENT_DOCKER;
} else {
xml_obj = first_named_child(rsc->xml, PE__CONTAINER_AGENT_RKT_S);
if (xml_obj != NULL) {
bundle_data->agent_type = PE__CONTAINER_AGENT_RKT;
} else {
xml_obj = first_named_child(rsc->xml, PE__CONTAINER_AGENT_PODMAN_S);
if (xml_obj != NULL) {
bundle_data->agent_type = PE__CONTAINER_AGENT_PODMAN;
} else {
return FALSE;
}
}
}
// Use 0 for default, minimum, and invalid promoted-max
value = crm_element_value(xml_obj, XML_RSC_ATTR_PROMOTED_MAX);
if (value == NULL) {
// @COMPAT deprecated since 2.0.0
value = crm_element_value(xml_obj, "masters");
}
pcmk__scan_min_int(value, &bundle_data->promoted_max, 0);
// Default replicas to promoted-max if it was specified and 1 otherwise
value = crm_element_value(xml_obj, "replicas");
if ((value == NULL) && (bundle_data->promoted_max > 0)) {
bundle_data->nreplicas = bundle_data->promoted_max;
} else {
pcmk__scan_min_int(value, &bundle_data->nreplicas, 1);
}
/*
* Communication between containers on the same host via the
* floating IPs only works if the container is started with:
* --userland-proxy=false --ip-masq=false
*/
value = crm_element_value(xml_obj, "replicas-per-host");
pcmk__scan_min_int(value, &bundle_data->nreplicas_per_host, 1);
if (bundle_data->nreplicas_per_host == 1) {
pe__clear_resource_flags(rsc, pe_rsc_unique);
}
bundle_data->container_command = crm_element_value_copy(xml_obj, "run-command");
bundle_data->launcher_options = crm_element_value_copy(xml_obj, "options");
bundle_data->image = crm_element_value_copy(xml_obj, "image");
bundle_data->container_network = crm_element_value_copy(xml_obj, "network");
xml_obj = first_named_child(rsc->xml, "network");
if(xml_obj) {
bundle_data->ip_range_start = crm_element_value_copy(xml_obj, "ip-range-start");
bundle_data->host_netmask = crm_element_value_copy(xml_obj, "host-netmask");
bundle_data->host_network = crm_element_value_copy(xml_obj, "host-interface");
bundle_data->control_port = crm_element_value_copy(xml_obj, "control-port");
value = crm_element_value(xml_obj, "add-host");
if (crm_str_to_boolean(value, &bundle_data->add_host) != 1) {
bundle_data->add_host = TRUE;
}
for (xmlNode *xml_child = pcmk__xe_first_child(xml_obj); xml_child != NULL;
xml_child = pcmk__xe_next(xml_child)) {
pe__bundle_port_t *port = calloc(1, sizeof(pe__bundle_port_t));
port->source = crm_element_value_copy(xml_child, "port");
if(port->source == NULL) {
port->source = crm_element_value_copy(xml_child, "range");
} else {
port->target = crm_element_value_copy(xml_child, "internal-port");
}
if(port->source != NULL && strlen(port->source) > 0) {
if(port->target == NULL) {
port->target = strdup(port->source);
}
bundle_data->ports = g_list_append(bundle_data->ports, port);
} else {
pe_err("Invalid port directive %s", ID(xml_child));
port_free(port);
}
}
}
xml_obj = first_named_child(rsc->xml, "storage");
for (xmlNode *xml_child = pcmk__xe_first_child(xml_obj); xml_child != NULL;
xml_child = pcmk__xe_next(xml_child)) {
const char *source = crm_element_value(xml_child, "source-dir");
const char *target = crm_element_value(xml_child, "target-dir");
const char *options = crm_element_value(xml_child, "options");
int flags = pe__bundle_mount_none;
if (source == NULL) {
source = crm_element_value(xml_child, "source-dir-root");
pe__set_bundle_mount_flags(xml_child, flags,
pe__bundle_mount_subdir);
}
if (source && target) {
mount_add(bundle_data, source, target, options, flags);
if (strcmp(target, "/var/log") == 0) {
need_log_mount = FALSE;
}
} else {
pe_err("Invalid mount directive %s", ID(xml_child));
}
}
xml_obj = first_named_child(rsc->xml, "primitive");
if (xml_obj && valid_network(bundle_data)) {
char *value = NULL;
xmlNode *xml_set = NULL;
xml_resource = create_xml_node(NULL, XML_CIB_TAG_INCARNATION);
/* @COMPAT We no longer use the tag, but we need to keep it as
* part of the resource name, so that bundles don't restart in a rolling
* upgrade. (It also avoids needing to change regression tests.)
*/
crm_xml_set_id(xml_resource, "%s-%s", bundle_data->prefix,
(bundle_data->promoted_max? "master"
: (const char *)xml_resource->name));
xml_set = create_xml_node(xml_resource, XML_TAG_META_SETS);
crm_xml_set_id(xml_set, "%s-%s-meta", bundle_data->prefix, xml_resource->name);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_ORDERED, XML_BOOLEAN_TRUE);
value = pcmk__itoa(bundle_data->nreplicas);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_INCARNATION_MAX, value);
free(value);
value = pcmk__itoa(bundle_data->nreplicas_per_host);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_INCARNATION_NODEMAX, value);
free(value);
crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_UNIQUE,
pcmk__btoa(bundle_data->nreplicas_per_host > 1));
if (bundle_data->promoted_max) {
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_PROMOTABLE, XML_BOOLEAN_TRUE);
value = pcmk__itoa(bundle_data->promoted_max);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_PROMOTED_MAX, value);
free(value);
}
//crm_xml_add(xml_obj, XML_ATTR_ID, bundle_data->prefix);
add_node_copy(xml_resource, xml_obj);
} else if(xml_obj) {
pe_err("Cannot control %s inside %s without either ip-range-start or control-port",
rsc->id, ID(xml_obj));
return FALSE;
}
if(xml_resource) {
int lpc = 0;
GList *childIter = NULL;
- pe_resource_t *new_rsc = NULL;
pe__bundle_port_t *port = NULL;
-
GString *buffer = NULL;
- if (pe__unpack_resource(xml_resource, &new_rsc, rsc,
+ if (pe__unpack_resource(xml_resource, &(bundle_data->child), rsc,
data_set) != pcmk_rc_ok) {
- pe_err("Failed unpacking resource %s", ID(rsc->xml));
- if (new_rsc != NULL && new_rsc->fns != NULL) {
- new_rsc->fns->free(new_rsc);
- }
return FALSE;
}
- bundle_data->child = new_rsc;
-
/* Currently, we always map the default authentication key location
* into the same location inside the container.
*
* Ideally, we would respect the host's PCMK_authkey_location, but:
* - it may be different on different nodes;
* - the actual connection will do extra checking to make sure the key
* file exists and is readable, that we can't do here on the DC
* - tools such as crm_resource and crm_simulate may not have the same
* environment variables as the cluster, causing operation digests to
* differ
*
* Always using the default location inside the container is fine,
* because we control the pacemaker_remote environment, and it avoids
* having to pass another environment variable to the container.
*
* @TODO A better solution may be to have only pacemaker_remote use the
* environment variable, and have the cluster nodes use a new
* cluster option for key location. This would introduce the limitation
* of the location being the same on all cluster nodes, but that's
* reasonable.
*/
mount_add(bundle_data, DEFAULT_REMOTE_KEY_LOCATION,
DEFAULT_REMOTE_KEY_LOCATION, NULL, pe__bundle_mount_none);
if (need_log_mount) {
mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL,
pe__bundle_mount_subdir);
}
port = calloc(1, sizeof(pe__bundle_port_t));
if(bundle_data->control_port) {
port->source = strdup(bundle_data->control_port);
} else {
/* If we wanted to respect PCMK_remote_port, we could use
* crm_default_remote_port() here and elsewhere in this file instead
* of DEFAULT_REMOTE_PORT.
*
* However, it gains nothing, since we control both the container
* environment and the connection resource parameters, and the user
* can use a different port if desired by setting control-port.
*/
port->source = pcmk__itoa(DEFAULT_REMOTE_PORT);
}
port->target = strdup(port->source);
bundle_data->ports = g_list_append(bundle_data->ports, port);
buffer = g_string_sized_new(1024);
for (childIter = bundle_data->child->children; childIter != NULL;
childIter = childIter->next) {
pe__bundle_replica_t *replica = calloc(1, sizeof(pe__bundle_replica_t));
replica->child = childIter->data;
replica->child->exclusive_discover = TRUE;
replica->offset = lpc++;
// Ensure the child's notify gets set based on the underlying primitive's value
if (pcmk_is_set(replica->child->flags, pe_rsc_notify)) {
pe__set_resource_flags(bundle_data->child, pe_rsc_notify);
}
allocate_ip(bundle_data, replica, buffer);
bundle_data->replicas = g_list_append(bundle_data->replicas,
replica);
bundle_data->attribute_target = g_hash_table_lookup(replica->child->meta,
XML_RSC_ATTR_TARGET);
}
bundle_data->container_host_options = g_string_free(buffer, FALSE);
if (bundle_data->attribute_target) {
g_hash_table_replace(rsc->meta, strdup(XML_RSC_ATTR_TARGET),
strdup(bundle_data->attribute_target));
g_hash_table_replace(bundle_data->child->meta,
strdup(XML_RSC_ATTR_TARGET),
strdup(bundle_data->attribute_target));
}
} else {
// Just a naked container, no pacemaker-remote
GString *buffer = g_string_sized_new(1024);
for (int lpc = 0; lpc < bundle_data->nreplicas; lpc++) {
pe__bundle_replica_t *replica = calloc(1, sizeof(pe__bundle_replica_t));
replica->offset = lpc;
allocate_ip(bundle_data, replica, buffer);
bundle_data->replicas = g_list_append(bundle_data->replicas,
replica);
}
bundle_data->container_host_options = g_string_free(buffer, FALSE);
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (create_replica_resources(rsc, bundle_data, replica) != pcmk_rc_ok) {
pe_err("Failed unpacking resource %s", rsc->id);
rsc->fns->free(rsc);
return FALSE;
}
/* Utilization needs special handling for bundles. It makes no sense for
* the inner primitive to have utilization, because it is tied
* one-to-one to the guest node created by the container resource -- and
* there's no way to set capacities for that guest node anyway.
*
* What the user really wants is to configure utilization for the
* container. However, the schema only allows utilization for
* primitives, and the container resource is implicit anyway, so the
* user can *only* configure utilization for the inner primitive. If
* they do, move the primitive's utilization values to the container.
*
* @TODO This means that bundles without an inner primitive can't have
* utilization. An alternative might be to allow utilization values in
* the top-level bundle XML in the schema, and copy those to each
* container.
*/
if (replica->child != NULL) {
GHashTable *empty = replica->container->utilization;
replica->container->utilization = replica->child->utilization;
replica->child->utilization = empty;
}
}
if (bundle_data->child) {
rsc->children = g_list_append(rsc->children, bundle_data->child);
}
return TRUE;
}
static int
replica_resource_active(pe_resource_t *rsc, gboolean all)
{
if (rsc) {
gboolean child_active = rsc->fns->active(rsc, all);
if (child_active && !all) {
return TRUE;
} else if (!child_active && all) {
return FALSE;
}
}
return -1;
}
gboolean
pe__bundle_active(pe_resource_t *rsc, gboolean all)
{
pe__bundle_variant_data_t *bundle_data = NULL;
GList *iter = NULL;
get_bundle_variant_data(bundle_data, rsc);
for (iter = bundle_data->replicas; iter != NULL; iter = iter->next) {
pe__bundle_replica_t *replica = iter->data;
int rsc_active;
rsc_active = replica_resource_active(replica->ip, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = replica_resource_active(replica->child, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = replica_resource_active(replica->container, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = replica_resource_active(replica->remote, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
}
/* If "all" is TRUE, we've already checked that no resources were inactive,
* so return TRUE; if "all" is FALSE, we didn't find any active resources,
* so return FALSE.
*/
return all;
}
/*!
* \internal
* \brief Find the bundle replica corresponding to a given node
*
* \param[in] bundle Top-level bundle resource
* \param[in] node Node to search for
*
* \return Bundle replica if found, NULL otherwise
*/
pe_resource_t *
pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_ASSERT(bundle && node);
get_bundle_variant_data(bundle_data, bundle);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica && replica->node);
if (replica->node->details == node->details) {
return replica->child;
}
}
return NULL;
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
print_rsc_in_list(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
if (rsc != NULL) {
if (options & pe_print_html) {
status_print("");
}
rsc->fns->print(rsc, pre_text, options, print_data);
if (options & pe_print_html) {
status_print("\n");
}
}
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
bundle_print_xml(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
pe__bundle_variant_data_t *bundle_data = NULL;
char *child_text = NULL;
CRM_CHECK(rsc != NULL, return);
if (pre_text == NULL) {
pre_text = "";
}
child_text = crm_strdup_printf("%s ", pre_text);
get_bundle_variant_data(bundle_data, rsc);
status_print("%sid);
status_print("type=\"%s\" ", container_agent_str(bundle_data->agent_type));
status_print("image=\"%s\" ", bundle_data->image);
status_print("unique=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_unique));
status_print("managed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_managed));
status_print("failed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_failed));
status_print(">\n");
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
status_print("%s \n", pre_text, replica->offset);
print_rsc_in_list(replica->ip, child_text, options, print_data);
print_rsc_in_list(replica->child, child_text, options, print_data);
print_rsc_in_list(replica->container, child_text, options, print_data);
print_rsc_in_list(replica->remote, child_text, options, print_data);
status_print("%s \n", pre_text);
}
status_print("%s\n", pre_text);
free(child_text);
}
PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__bundle_xml(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
pe__bundle_variant_data_t *bundle_data = NULL;
int rc = pcmk_rc_no_output;
gboolean printed_header = FALSE;
gboolean print_everything = TRUE;
CRM_ASSERT(rsc != NULL);
get_bundle_variant_data(bundle_data, rsc);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
char *id = NULL;
gboolean print_ip, print_child, print_ctnr, print_remote;
CRM_ASSERT(replica);
if (pcmk__rsc_filtered_by_node(replica->container, only_node)) {
continue;
}
print_ip = replica->ip != NULL &&
!replica->ip->fns->is_filtered(replica->ip, only_rsc, print_everything);
print_child = replica->child != NULL &&
!replica->child->fns->is_filtered(replica->child, only_rsc, print_everything);
print_ctnr = !replica->container->fns->is_filtered(replica->container, only_rsc, print_everything);
print_remote = replica->remote != NULL &&
!replica->remote->fns->is_filtered(replica->remote, only_rsc, print_everything);
if (!print_everything && !print_ip && !print_child && !print_ctnr && !print_remote) {
continue;
}
if (!printed_header) {
printed_header = TRUE;
rc = pe__name_and_nvpairs_xml(out, true, "bundle", 6,
"id", rsc->id,
"type", container_agent_str(bundle_data->agent_type),
"image", bundle_data->image,
"unique", pe__rsc_bool_str(rsc, pe_rsc_unique),
"managed", pe__rsc_bool_str(rsc, pe_rsc_managed),
"failed", pe__rsc_bool_str(rsc, pe_rsc_failed));
CRM_ASSERT(rc == pcmk_rc_ok);
}
id = pcmk__itoa(replica->offset);
rc = pe__name_and_nvpairs_xml(out, true, "replica", 1, "id", id);
free(id);
CRM_ASSERT(rc == pcmk_rc_ok);
if (print_ip) {
out->message(out, crm_map_element_name(replica->ip->xml), show_opts,
replica->ip, only_node, only_rsc);
}
if (print_child) {
out->message(out, crm_map_element_name(replica->child->xml), show_opts,
replica->child, only_node, only_rsc);
}
if (print_ctnr) {
out->message(out, crm_map_element_name(replica->container->xml), show_opts,
replica->container, only_node, only_rsc);
}
if (print_remote) {
out->message(out, crm_map_element_name(replica->remote->xml), show_opts,
replica->remote, only_node, only_rsc);
}
pcmk__output_xml_pop_parent(out); // replica
}
if (printed_header) {
pcmk__output_xml_pop_parent(out); // bundle
}
return rc;
}
static void
pe__bundle_replica_output_html(pcmk__output_t *out, pe__bundle_replica_t *replica,
pe_node_t *node, uint32_t show_opts)
{
pe_resource_t *rsc = replica->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = replica->container;
}
if (replica->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->container));
}
if (replica->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)",
replica->ipaddr);
}
pe__common_output_html(out, rsc, buffer, node, show_opts);
}
PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__bundle_html(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
pe__bundle_variant_data_t *bundle_data = NULL;
int rc = pcmk_rc_no_output;
gboolean print_everything = TRUE;
CRM_ASSERT(rsc != NULL);
get_bundle_variant_data(bundle_data, rsc);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
gboolean print_ip, print_child, print_ctnr, print_remote;
CRM_ASSERT(replica);
if (pcmk__rsc_filtered_by_node(replica->container, only_node)) {
continue;
}
print_ip = replica->ip != NULL &&
!replica->ip->fns->is_filtered(replica->ip, only_rsc, print_everything);
print_child = replica->child != NULL &&
!replica->child->fns->is_filtered(replica->child, only_rsc, print_everything);
print_ctnr = !replica->container->fns->is_filtered(replica->container, only_rsc, print_everything);
print_remote = replica->remote != NULL &&
!replica->remote->fns->is_filtered(replica->remote, only_rsc, print_everything);
if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) ||
(print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) {
/* The text output messages used below require pe_print_implicit to
* be set to do anything.
*/
uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs;
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "",
pcmk_is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)");
if (pcmk__list_of_multiple(bundle_data->replicas)) {
out->begin_list(out, NULL, NULL, "Replica[%d]", replica->offset);
}
if (print_ip) {
out->message(out, crm_map_element_name(replica->ip->xml),
new_show_opts, replica->ip, only_node, only_rsc);
}
if (print_child) {
out->message(out, crm_map_element_name(replica->child->xml),
new_show_opts, replica->child, only_node, only_rsc);
}
if (print_ctnr) {
out->message(out, crm_map_element_name(replica->container->xml),
new_show_opts, replica->container, only_node, only_rsc);
}
if (print_remote) {
out->message(out, crm_map_element_name(replica->remote->xml),
new_show_opts, replica->remote, only_node, only_rsc);
}
if (pcmk__list_of_multiple(bundle_data->replicas)) {
out->end_list(out);
}
} else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) {
continue;
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "",
pcmk_is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)");
pe__bundle_replica_output_html(out, replica, pe__current_node(replica->container),
show_opts);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
static void
pe__bundle_replica_output_text(pcmk__output_t *out, pe__bundle_replica_t *replica,
pe_node_t *node, uint32_t show_opts)
{
pe_resource_t *rsc = replica->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = replica->container;
}
if (replica->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->container));
}
if (replica->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)",
replica->ipaddr);
}
pe__common_output_text(out, rsc, buffer, node, show_opts);
}
PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__bundle_text(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
pe__bundle_variant_data_t *bundle_data = NULL;
int rc = pcmk_rc_no_output;
gboolean print_everything = TRUE;
get_bundle_variant_data(bundle_data, rsc);
CRM_ASSERT(rsc != NULL);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
gboolean print_ip, print_child, print_ctnr, print_remote;
CRM_ASSERT(replica);
if (pcmk__rsc_filtered_by_node(replica->container, only_node)) {
continue;
}
print_ip = replica->ip != NULL &&
!replica->ip->fns->is_filtered(replica->ip, only_rsc, print_everything);
print_child = replica->child != NULL &&
!replica->child->fns->is_filtered(replica->child, only_rsc, print_everything);
print_ctnr = !replica->container->fns->is_filtered(replica->container, only_rsc, print_everything);
print_remote = replica->remote != NULL &&
!replica->remote->fns->is_filtered(replica->remote, only_rsc, print_everything);
if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) ||
(print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) {
/* The text output messages used below require pe_print_implicit to
* be set to do anything.
*/
uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs;
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "",
pcmk_is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)");
if (pcmk__list_of_multiple(bundle_data->replicas)) {
out->list_item(out, NULL, "Replica[%d]", replica->offset);
}
out->begin_list(out, NULL, NULL, NULL);
if (print_ip) {
out->message(out, crm_map_element_name(replica->ip->xml),
new_show_opts, replica->ip, only_node, only_rsc);
}
if (print_child) {
out->message(out, crm_map_element_name(replica->child->xml),
new_show_opts, replica->child, only_node, only_rsc);
}
if (print_ctnr) {
out->message(out, crm_map_element_name(replica->container->xml),
new_show_opts, replica->container, only_node, only_rsc);
}
if (print_remote) {
out->message(out, crm_map_element_name(replica->remote->xml),
new_show_opts, replica->remote, only_node, only_rsc);
}
out->end_list(out);
} else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) {
continue;
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "",
pcmk_is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)");
pe__bundle_replica_output_text(out, replica, pe__current_node(replica->container),
show_opts);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
print_bundle_replica(pe__bundle_replica_t *replica, const char *pre_text,
long options, void *print_data)
{
pe_node_t *node = NULL;
pe_resource_t *rsc = replica->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = replica->container;
}
if (replica->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->container));
}
if (replica->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)",
replica->ipaddr);
}
node = pe__current_node(replica->container);
common_print(rsc, pre_text, buffer, node, options, print_data);
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
void
pe__print_bundle(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
pe__bundle_variant_data_t *bundle_data = NULL;
char *child_text = NULL;
CRM_CHECK(rsc != NULL, return);
if (options & pe_print_xml) {
bundle_print_xml(rsc, pre_text, options, print_data);
return;
}
get_bundle_variant_data(bundle_data, rsc);
if (pre_text == NULL) {
pre_text = " ";
}
status_print("%sContainer bundle%s: %s [%s]%s%s\n",
pre_text, ((bundle_data->nreplicas > 1)? " set" : ""),
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "",
pcmk_is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)");
if (options & pe_print_html) {
status_print("
\n\n");
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (options & pe_print_html) {
status_print("- ");
}
if (pcmk_is_set(options, pe_print_implicit)) {
child_text = crm_strdup_printf(" %s", pre_text);
if (pcmk__list_of_multiple(bundle_data->replicas)) {
status_print(" %sReplica[%d]\n", pre_text, replica->offset);
}
if (options & pe_print_html) {
status_print("
\n\n");
}
print_rsc_in_list(replica->ip, child_text, options, print_data);
print_rsc_in_list(replica->container, child_text, options, print_data);
print_rsc_in_list(replica->remote, child_text, options, print_data);
print_rsc_in_list(replica->child, child_text, options, print_data);
if (options & pe_print_html) {
status_print("
\n");
}
} else {
child_text = crm_strdup_printf("%s ", pre_text);
print_bundle_replica(replica, child_text, options, print_data);
}
free(child_text);
if (options & pe_print_html) {
status_print(" \n");
}
}
if (options & pe_print_html) {
status_print("
\n");
}
}
static void
free_bundle_replica(pe__bundle_replica_t *replica)
{
if (replica == NULL) {
return;
}
if (replica->node) {
free(replica->node);
replica->node = NULL;
}
if (replica->ip) {
free_xml(replica->ip->xml);
replica->ip->xml = NULL;
replica->ip->fns->free(replica->ip);
replica->ip = NULL;
}
if (replica->container) {
free_xml(replica->container->xml);
replica->container->xml = NULL;
replica->container->fns->free(replica->container);
replica->container = NULL;
}
if (replica->remote) {
free_xml(replica->remote->xml);
replica->remote->xml = NULL;
replica->remote->fns->free(replica->remote);
replica->remote = NULL;
}
free(replica->ipaddr);
free(replica);
}
void
pe__free_bundle(pe_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
pe_rsc_trace(rsc, "Freeing %s", rsc->id);
free(bundle_data->prefix);
free(bundle_data->image);
free(bundle_data->control_port);
free(bundle_data->host_network);
free(bundle_data->host_netmask);
free(bundle_data->ip_range_start);
free(bundle_data->container_network);
free(bundle_data->launcher_options);
free(bundle_data->container_command);
g_free(bundle_data->container_host_options);
g_list_free_full(bundle_data->replicas,
(GDestroyNotify) free_bundle_replica);
g_list_free_full(bundle_data->mounts, (GDestroyNotify)mount_free);
g_list_free_full(bundle_data->ports, (GDestroyNotify)port_free);
g_list_free(rsc->children);
if(bundle_data->child) {
free_xml(bundle_data->child->xml);
bundle_data->child->xml = NULL;
bundle_data->child->fns->free(bundle_data->child);
}
common_free(rsc);
}
enum rsc_role_e
pe__bundle_resource_state(const pe_resource_t *rsc, gboolean current)
{
enum rsc_role_e container_role = RSC_ROLE_UNKNOWN;
return container_role;
}
/*!
* \brief Get the number of configured replicas in a bundle
*
* \param[in] rsc Bundle resource
*
* \return Number of configured replicas, or 0 on error
*/
int
pe_bundle_replicas(const pe_resource_t *rsc)
{
if ((rsc == NULL) || (rsc->variant != pe_container)) {
return 0;
} else {
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
return bundle_data->nreplicas;
}
}
void
pe__count_bundle(pe_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
for (GList *item = bundle_data->replicas; item != NULL; item = item->next) {
pe__bundle_replica_t *replica = item->data;
if (replica->ip) {
replica->ip->fns->count(replica->ip);
}
if (replica->child) {
replica->child->fns->count(replica->child);
}
if (replica->container) {
replica->container->fns->count(replica->container);
}
if (replica->remote) {
replica->remote->fns->count(replica->remote);
}
}
}
gboolean
pe__bundle_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent)
{
gboolean passes = FALSE;
pe__bundle_variant_data_t *bundle_data = NULL;
if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) {
passes = TRUE;
} else {
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (replica->ip != NULL && !replica->ip->fns->is_filtered(replica->ip, only_rsc, FALSE)) {
passes = TRUE;
break;
} else if (replica->child != NULL && !replica->child->fns->is_filtered(replica->child, only_rsc, FALSE)) {
passes = TRUE;
break;
} else if (!replica->container->fns->is_filtered(replica->container, only_rsc, FALSE)) {
passes = TRUE;
break;
} else if (replica->remote != NULL && !replica->remote->fns->is_filtered(replica->remote, only_rsc, FALSE)) {
passes = TRUE;
break;
}
}
}
return !passes;
}
diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c
index 2d5b85d440..70716b6f8e 100644
--- a/lib/pengine/clone.c
+++ b/lib/pengine/clone.c
@@ -1,1319 +1,1317 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define VARIANT_CLONE 1
#include "./variant.h"
#ifdef PCMK__COMPAT_2_0
#define PROMOTED_INSTANCES RSC_ROLE_PROMOTED_LEGACY_S "s"
#define UNPROMOTED_INSTANCES RSC_ROLE_UNPROMOTED_LEGACY_S "s"
#else
#define PROMOTED_INSTANCES RSC_ROLE_PROMOTED_S
#define UNPROMOTED_INSTANCES RSC_ROLE_UNPROMOTED_S
#endif
/*!
* \internal
* \brief Return the maximum number of clone instances allowed to be promoted
*
* \param[in] clone Promotable clone or clone instance to check
*
* \return Maximum promoted instances for \p clone
*/
int
pe__clone_promoted_max(pe_resource_t *clone)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, uber_parent(clone));
return clone_data->promoted_max;
}
/*!
* \internal
* \brief Return the maximum number of clone instances allowed to be promoted
*
* \param[in] clone Promotable clone or clone instance to check
*
* \return Maximum promoted instances for \p clone
*/
int
pe__clone_promoted_node_max(pe_resource_t *clone)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, uber_parent(clone));
return clone_data->promoted_node_max;
}
static GList *
sorted_hash_table_values(GHashTable *table)
{
GList *retval = NULL;
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
if (!g_list_find_custom(retval, value, (GCompareFunc) strcmp)) {
retval = g_list_prepend(retval, (char *) value);
}
}
retval = g_list_sort(retval, (GCompareFunc) strcmp);
return retval;
}
static GList *
nodes_with_status(GHashTable *table, const char *status)
{
GList *retval = NULL;
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
if (!strcmp((char *) value, status)) {
retval = g_list_prepend(retval, key);
}
}
retval = g_list_sort(retval, (GCompareFunc) pcmk__numeric_strcasecmp);
return retval;
}
static GString *
node_list_to_str(const GList *list)
{
GString *retval = NULL;
for (const GList *iter = list; iter != NULL; iter = iter->next) {
pcmk__add_word(&retval, 1024, (const char *) iter->data);
}
return retval;
}
static void
clone_header(pcmk__output_t *out, int *rc, pe_resource_t *rsc, clone_variant_data_t *clone_data)
{
GString *attrs = NULL;
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__add_separated_word(&attrs, 64, "promotable", ", ");
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
pcmk__add_separated_word(&attrs, 64, "unique", ", ");
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pcmk__add_separated_word(&attrs, 64, "unmanaged", ", ");
}
if (pe__resource_is_disabled(rsc)) {
pcmk__add_separated_word(&attrs, 64, "disabled", ", ");
}
if (attrs != NULL) {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Clone Set: %s [%s] (%s)",
rsc->id, ID(clone_data->xml_obj_child),
(const char *) attrs->str);
g_string_free(attrs, TRUE);
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Clone Set: %s [%s]",
rsc->id, ID(clone_data->xml_obj_child))
}
}
void
pe__force_anon(const char *standard, pe_resource_t *rsc, const char *rid,
pe_working_set_t *data_set)
{
if (pe_rsc_is_clone(rsc)) {
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_warn("Ignoring " XML_RSC_ATTR_UNIQUE " for %s because %s resources "
"such as %s can be used only as anonymous clones",
rsc->id, standard, rid);
clone_data->clone_node_max = 1;
clone_data->clone_max = QB_MIN(clone_data->clone_max,
g_list_length(data_set->nodes));
}
}
pe_resource_t *
find_clone_instance(pe_resource_t * rsc, const char *sub_id, pe_working_set_t * data_set)
{
char *child_id = NULL;
pe_resource_t *child = NULL;
const char *child_base = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
child_base = ID(clone_data->xml_obj_child);
child_id = crm_strdup_printf("%s:%s", child_base, sub_id);
child = pe_find_resource(rsc->children, child_id);
free(child_id);
return child;
}
pe_resource_t *
pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set)
{
gboolean as_orphan = FALSE;
char *inc_num = NULL;
char *inc_max = NULL;
pe_resource_t *child_rsc = NULL;
xmlNode *child_copy = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
CRM_CHECK(clone_data->xml_obj_child != NULL, return FALSE);
if (clone_data->total_clones >= clone_data->clone_max) {
// If we've already used all available instances, this is an orphan
as_orphan = TRUE;
}
// Allocate instance numbers in numerical order (starting at 0)
inc_num = pcmk__itoa(clone_data->total_clones);
inc_max = pcmk__itoa(clone_data->clone_max);
child_copy = copy_xml(clone_data->xml_obj_child);
crm_xml_add(child_copy, XML_RSC_ATTR_INCARNATION, inc_num);
if (pe__unpack_resource(child_copy, &child_rsc, rsc,
data_set) != pcmk_rc_ok) {
- pe_err("Failed unpacking resource %s", crm_element_value(child_copy, XML_ATTR_ID));
- child_rsc = NULL;
goto bail;
}
/* child_rsc->globally_unique = rsc->globally_unique; */
CRM_ASSERT(child_rsc);
clone_data->total_clones += 1;
pe_rsc_trace(child_rsc, "Setting clone attributes for: %s", child_rsc->id);
rsc->children = g_list_append(rsc->children, child_rsc);
if (as_orphan) {
pe__set_resource_flags_recursive(child_rsc, pe_rsc_orphan);
}
add_hash_param(child_rsc->meta, XML_RSC_ATTR_INCARNATION_MAX, inc_max);
pe_rsc_trace(rsc, "Added %s instance %s", rsc->id, child_rsc->id);
bail:
free(inc_num);
free(inc_max);
return child_rsc;
}
gboolean
clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set)
{
int lpc = 0;
xmlNode *a_child = NULL;
xmlNode *xml_obj = rsc->xml;
clone_variant_data_t *clone_data = NULL;
const char *max_clones = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_MAX);
const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX);
pe_rsc_trace(rsc, "Processing resource %s...", rsc->id);
clone_data = calloc(1, sizeof(clone_variant_data_t));
rsc->variant_opaque = clone_data;
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
const char *promoted_max = NULL;
const char *promoted_node_max = NULL;
promoted_max = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_PROMOTED_MAX);
if (promoted_max == NULL) {
// @COMPAT deprecated since 2.0.0
promoted_max = g_hash_table_lookup(rsc->meta,
PCMK_XA_PROMOTED_MAX_LEGACY);
}
promoted_node_max = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_PROMOTED_NODEMAX);
if (promoted_node_max == NULL) {
// @COMPAT deprecated since 2.0.0
promoted_node_max =
g_hash_table_lookup(rsc->meta,
PCMK_XA_PROMOTED_NODE_MAX_LEGACY);
}
// Use 1 as default but 0 for minimum and invalid
if (promoted_max == NULL) {
clone_data->promoted_max = 1;
} else {
pcmk__scan_min_int(promoted_max, &(clone_data->promoted_max), 0);
}
// Use 1 as default but 0 for minimum and invalid
if (promoted_node_max == NULL) {
clone_data->promoted_node_max = 1;
} else {
pcmk__scan_min_int(promoted_node_max,
&(clone_data->promoted_node_max), 0);
}
}
// Implied by calloc()
/* clone_data->xml_obj_child = NULL; */
// Use 1 as default but 0 for minimum and invalid
if (max_clones_node == NULL) {
clone_data->clone_node_max = 1;
} else {
pcmk__scan_min_int(max_clones_node, &(clone_data->clone_node_max), 0);
}
/* Use number of nodes (but always at least 1, which is handy for crm_verify
* for a CIB without nodes) as default, but 0 for minimum and invalid
*/
if (max_clones == NULL) {
clone_data->clone_max = QB_MAX(1, g_list_length(data_set->nodes));
} else {
pcmk__scan_min_int(max_clones, &(clone_data->clone_max), 0);
}
if (crm_is_true(g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_ORDERED))) {
clone_data->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,
"Clone", rsc->id,
clone_data->flags,
pe__clone_ordered,
"pe__clone_ordered");
}
if ((rsc->flags & pe_rsc_unique) == 0 && clone_data->clone_node_max > 1) {
pcmk__config_err("Ignoring " XML_RSC_ATTR_PROMOTED_MAX " for %s "
"because anonymous clones support only one instance "
"per node", rsc->id);
clone_data->clone_node_max = 1;
}
pe_rsc_trace(rsc, "Options for %s", rsc->id);
pe_rsc_trace(rsc, "\tClone max: %d", clone_data->clone_max);
pe_rsc_trace(rsc, "\tClone node max: %d", clone_data->clone_node_max);
pe_rsc_trace(rsc, "\tClone is unique: %s",
pe__rsc_bool_str(rsc, pe_rsc_unique));
pe_rsc_trace(rsc, "\tClone is promotable: %s",
pe__rsc_bool_str(rsc, pe_rsc_promotable));
// Clones may contain a single group or primitive
for (a_child = pcmk__xe_first_child(xml_obj); a_child != NULL;
a_child = pcmk__xe_next(a_child)) {
if (pcmk__str_any_of((const char *)a_child->name, XML_CIB_TAG_RESOURCE, XML_CIB_TAG_GROUP, NULL)) {
clone_data->xml_obj_child = a_child;
break;
}
}
if (clone_data->xml_obj_child == NULL) {
pcmk__config_err("%s has nothing to clone", rsc->id);
return FALSE;
}
/*
* Make clones ever so slightly sticky by default
*
* This helps ensure clone instances are not shuffled around the cluster
* for no benefit in situations when pre-allocation is not appropriate
*/
if (g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_STICKINESS) == NULL) {
add_hash_param(rsc->meta, XML_RSC_ATTR_STICKINESS, "1");
}
/* This ensures that the globally-unique value always exists for children to
* inherit when being unpacked, as well as in resource agents' environment.
*/
add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE,
pe__rsc_bool_str(rsc, pe_rsc_unique));
if (clone_data->clone_max <= 0) {
/* Create one child instance so that unpack_find_resource() will hook up
* any orphans up to the parent correctly.
*/
if (pe__create_clone_child(rsc, data_set) == NULL) {
return FALSE;
}
} else {
// Create a child instance for each available instance number
for (lpc = 0; lpc < clone_data->clone_max; lpc++) {
if (pe__create_clone_child(rsc, data_set) == NULL) {
return FALSE;
}
}
}
pe_rsc_trace(rsc, "Added %d children to resource %s...", clone_data->clone_max, rsc->id);
return TRUE;
}
gboolean
clone_active(pe_resource_t * rsc, gboolean all)
{
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean child_active = child_rsc->fns->active(child_rsc, all);
if (all == FALSE && child_active) {
return TRUE;
} else if (all && child_active == FALSE) {
return FALSE;
}
}
if (all) {
return TRUE;
} else {
return FALSE;
}
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
short_print(const char *list, const char *prefix, const char *type,
const char *suffix, long options, void *print_data)
{
if(suffix == NULL) {
suffix = "";
}
if (!pcmk__str_empty(list)) {
if (options & pe_print_html) {
status_print("");
}
status_print("%s%s: [ %s ]%s", prefix, type, list, suffix);
if (options & pe_print_html) {
status_print("\n");
} else if (options & pe_print_suppres_nl) {
/* nothing */
} else if ((options & pe_print_printf) || (options & pe_print_ncurses)) {
status_print("\n");
}
}
}
static const char *
configured_role_str(pe_resource_t * rsc)
{
const char *target_role = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_TARGET_ROLE);
if ((target_role == NULL) && rsc->children && rsc->children->data) {
target_role = g_hash_table_lookup(((pe_resource_t*)rsc->children->data)->meta,
XML_RSC_ATTR_TARGET_ROLE);
}
return target_role;
}
static enum rsc_role_e
configured_role(pe_resource_t * rsc)
{
const char *target_role = configured_role_str(rsc);
if (target_role) {
return text2role(target_role);
}
return RSC_ROLE_UNKNOWN;
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
clone_print_xml(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
char *child_text = crm_strdup_printf("%s ", pre_text);
const char *target_role = configured_role_str(rsc);
GList *gIter = rsc->children;
status_print("%sid);
status_print("multi_state=\"%s\" ",
pe__rsc_bool_str(rsc, pe_rsc_promotable));
status_print("unique=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_unique));
status_print("managed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_managed));
status_print("failed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_failed));
status_print("failure_ignored=\"%s\" ",
pe__rsc_bool_str(rsc, pe_rsc_failure_ignored));
if (target_role) {
status_print("target_role=\"%s\" ", target_role);
}
status_print(">\n");
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->fns->print(child_rsc, child_text, options, print_data);
}
status_print("%s\n", pre_text);
free(child_text);
}
bool
is_set_recursive(const pe_resource_t *rsc, long long flag, bool any)
{
GList *gIter;
bool all = !any;
if (pcmk_is_set(rsc->flags, flag)) {
if(any) {
return TRUE;
}
} else if(all) {
return FALSE;
}
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
if(is_set_recursive(gIter->data, flag, any)) {
if(any) {
return TRUE;
}
} else if(all) {
return FALSE;
}
}
if(all) {
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
void
clone_print(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
GString *list_text = NULL;
char *child_text = NULL;
GString *stopped_list = NULL;
GList *promoted_list = NULL;
GList *started_list = NULL;
GList *gIter = rsc->children;
clone_variant_data_t *clone_data = NULL;
int active_instances = 0;
if (pre_text == NULL) {
pre_text = " ";
}
if (options & pe_print_xml) {
clone_print_xml(rsc, pre_text, options, print_data);
return;
}
get_clone_variant_data(clone_data, rsc);
child_text = crm_strdup_printf("%s ", pre_text);
status_print("%sClone Set: %s [%s]%s%s%s",
pre_text ? pre_text : "", rsc->id, ID(clone_data->xml_obj_child),
pcmk_is_set(rsc->flags, pe_rsc_promotable)? " (promotable)" : "",
pcmk_is_set(rsc->flags, pe_rsc_unique)? " (unique)" : "",
pcmk_is_set(rsc->flags, pe_rsc_managed)? "" : " (unmanaged)");
if (options & pe_print_html) {
status_print("\n\n");
} else if ((options & pe_print_log) == 0) {
status_print("\n");
}
for (; gIter != NULL; gIter = gIter->next) {
gboolean print_full = FALSE;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean partially_active = child_rsc->fns->active(child_rsc, FALSE);
if (options & pe_print_clone_details) {
print_full = TRUE;
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
// Print individual instance when unique (except stopped orphans)
if (partially_active || !pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
print_full = TRUE;
}
// Everything else in this block is for anonymous clones
} else if (pcmk_is_set(options, pe_print_pending)
&& (child_rsc->pending_task != NULL)
&& strcmp(child_rsc->pending_task, "probe")) {
// Print individual instance when non-probe action is pending
print_full = TRUE;
} else if (partially_active == FALSE) {
// List stopped instances when requested (except orphans)
if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan)
&& !pcmk_is_set(options, pe_print_clone_active)) {
pcmk__add_word(&stopped_list, 1024, child_rsc->id);
}
} else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE)
|| is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE
|| is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) {
// Print individual instance when active orphaned/unmanaged/failed
print_full = TRUE;
} else if (child_rsc->fns->active(child_rsc, TRUE)) {
// Instance of fully active anonymous clone
pe_node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE);
if (location) {
// Instance is active on a single node
enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE);
if (location->details->online == FALSE && location->details->unclean) {
print_full = TRUE;
} else if (a_role > RSC_ROLE_UNPROMOTED) {
promoted_list = g_list_append(promoted_list, location);
} else {
started_list = g_list_append(started_list, location);
}
} else {
/* uncolocated group - bleh */
print_full = TRUE;
}
} else {
// Instance of partially active anonymous clone
print_full = TRUE;
}
if (print_full) {
if (options & pe_print_html) {
status_print("- \n");
}
child_rsc->fns->print(child_rsc, child_text, options, print_data);
if (options & pe_print_html) {
status_print("
\n");
}
}
}
/* Promoted */
promoted_list = g_list_sort(promoted_list, pe__cmp_node_name);
for (gIter = promoted_list; gIter; gIter = gIter->next) {
pe_node_t *host = gIter->data;
pcmk__add_word(&list_text, 1024, host->details->uname);
active_instances++;
}
if (list_text != NULL) {
short_print((const char *) list_text->str, child_text,
PROMOTED_INSTANCES, NULL, options, print_data);
g_string_truncate(list_text, 0);
}
g_list_free(promoted_list);
/* Started/Unpromoted */
started_list = g_list_sort(started_list, pe__cmp_node_name);
for (gIter = started_list; gIter; gIter = gIter->next) {
pe_node_t *host = gIter->data;
pcmk__add_word(&list_text, 1024, host->details->uname);
active_instances++;
}
if (list_text != NULL) {
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
enum rsc_role_e role = configured_role(rsc);
if (role == RSC_ROLE_UNPROMOTED) {
short_print((const char *) list_text->str, child_text,
UNPROMOTED_INSTANCES " (target-role)", NULL,
options, print_data);
} else {
short_print((const char *) list_text->str, child_text,
UNPROMOTED_INSTANCES, NULL, options, print_data);
}
} else {
short_print((const char *) list_text->str, child_text, "Started",
NULL, options, print_data);
}
}
g_list_free(started_list);
if (!pcmk_is_set(options, pe_print_clone_active)) {
const char *state = "Stopped";
enum rsc_role_e role = configured_role(rsc);
if (role == RSC_ROLE_STOPPED) {
state = "Stopped (disabled)";
}
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)
&& (clone_data->clone_max > active_instances)) {
GList *nIter;
GList *list = g_hash_table_get_values(rsc->allowed_nodes);
/* Custom stopped list for non-unique clones */
if (stopped_list != NULL) {
g_string_truncate(stopped_list, 0);
}
if (list == NULL) {
/* Clusters with symmetrical=false haven't calculated allowed_nodes yet
* If we've not probed for them yet, the Stopped list will be empty
*/
list = g_hash_table_get_values(rsc->known_on);
}
list = g_list_sort(list, pe__cmp_node_name);
for (nIter = list; nIter != NULL; nIter = nIter->next) {
pe_node_t *node = (pe_node_t *)nIter->data;
if (pe_find_node(rsc->running_on, node->details->uname) == NULL) {
pcmk__add_word(&stopped_list, 1024, node->details->uname);
}
}
g_list_free(list);
}
if (stopped_list != NULL) {
short_print((const char *) stopped_list->str, child_text, state,
NULL, options, print_data);
}
}
if (options & pe_print_html) {
status_print("
\n");
}
if (list_text != NULL) {
g_string_free(list_text, TRUE);
}
if (stopped_list != NULL) {
g_string_free(stopped_list, TRUE);
}
free(child_text);
}
PCMK__OUTPUT_ARGS("clone", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__clone_xml(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
GList *gIter = rsc->children;
GList *all = NULL;
int rc = pcmk_rc_no_output;
gboolean printed_header = FALSE;
gboolean print_everything = TRUE;
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
(strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches));
all = g_list_prepend(all, (gpointer) "*");
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) {
continue;
}
if (child_rsc->fns->is_filtered(child_rsc, only_rsc, print_everything)) {
continue;
}
if (!printed_header) {
printed_header = TRUE;
rc = pe__name_and_nvpairs_xml(out, true, "clone", 8,
"id", rsc->id,
"multi_state", pe__rsc_bool_str(rsc, pe_rsc_promotable),
"unique", pe__rsc_bool_str(rsc, pe_rsc_unique),
"managed", pe__rsc_bool_str(rsc, pe_rsc_managed),
"disabled", pcmk__btoa(pe__resource_is_disabled(rsc)),
"failed", pe__rsc_bool_str(rsc, pe_rsc_failed),
"failure_ignored", pe__rsc_bool_str(rsc, pe_rsc_failure_ignored),
"target_role", configured_role_str(rsc));
CRM_ASSERT(rc == pcmk_rc_ok);
}
out->message(out, crm_map_element_name(child_rsc->xml), show_opts,
child_rsc, only_node, all);
}
if (printed_header) {
pcmk__output_xml_pop_parent(out);
}
g_list_free(all);
return rc;
}
PCMK__OUTPUT_ARGS("clone", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__clone_default(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
GHashTable *stopped = NULL;
GString *list_text = NULL;
GList *promoted_list = NULL;
GList *started_list = NULL;
GList *gIter = rsc->children;
clone_variant_data_t *clone_data = NULL;
int active_instances = 0;
int rc = pcmk_rc_no_output;
gboolean print_everything = TRUE;
get_clone_variant_data(clone_data, rsc);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
(strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches));
for (; gIter != NULL; gIter = gIter->next) {
gboolean print_full = FALSE;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean partially_active = child_rsc->fns->active(child_rsc, FALSE);
if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) {
continue;
}
if (child_rsc->fns->is_filtered(child_rsc, only_rsc, print_everything)) {
continue;
}
if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
print_full = TRUE;
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
// Print individual instance when unique (except stopped orphans)
if (partially_active || !pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
print_full = TRUE;
}
// Everything else in this block is for anonymous clones
} else if (pcmk_is_set(show_opts, pcmk_show_pending)
&& (child_rsc->pending_task != NULL)
&& strcmp(child_rsc->pending_task, "probe")) {
// Print individual instance when non-probe action is pending
print_full = TRUE;
} else if (partially_active == FALSE) {
// List stopped instances when requested (except orphans)
if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan)
&& !pcmk_is_set(show_opts, pcmk_show_clone_detail)
&& pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
if (stopped == NULL) {
stopped = pcmk__strkey_table(free, free);
}
g_hash_table_insert(stopped, strdup(child_rsc->id), strdup("Stopped"));
}
} else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE)
|| is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE
|| is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) {
// Print individual instance when active orphaned/unmanaged/failed
print_full = TRUE;
} else if (child_rsc->fns->active(child_rsc, TRUE)) {
// Instance of fully active anonymous clone
pe_node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE);
if (location) {
// Instance is active on a single node
enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE);
if (location->details->online == FALSE && location->details->unclean) {
print_full = TRUE;
} else if (a_role > RSC_ROLE_UNPROMOTED) {
promoted_list = g_list_append(promoted_list, location);
} else {
started_list = g_list_append(started_list, location);
}
} else {
/* uncolocated group - bleh */
print_full = TRUE;
}
} else {
// Instance of partially active anonymous clone
print_full = TRUE;
}
if (print_full) {
GList *all = NULL;
clone_header(out, &rc, rsc, clone_data);
/* Print every resource that's a child of this clone. */
all = g_list_prepend(all, (gpointer) "*");
out->message(out, crm_map_element_name(child_rsc->xml), show_opts,
child_rsc, only_node, all);
g_list_free(all);
}
}
if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return pcmk_rc_ok;
}
/* Promoted */
promoted_list = g_list_sort(promoted_list, pe__cmp_node_name);
for (gIter = promoted_list; gIter; gIter = gIter->next) {
pe_node_t *host = gIter->data;
if (!pcmk__str_in_list(host->details->uname, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
pcmk__add_word(&list_text, 1024, host->details->uname);
active_instances++;
}
g_list_free(promoted_list);
if ((list_text != NULL) && (list_text->len > 0)) {
clone_header(out, &rc, rsc, clone_data);
out->list_item(out, NULL, PROMOTED_INSTANCES ": [ %s ]",
(const char *) list_text->str);
g_string_truncate(list_text, 0);
}
/* Started/Unpromoted */
started_list = g_list_sort(started_list, pe__cmp_node_name);
for (gIter = started_list; gIter; gIter = gIter->next) {
pe_node_t *host = gIter->data;
if (!pcmk__str_in_list(host->details->uname, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
pcmk__add_word(&list_text, 1024, host->details->uname);
active_instances++;
}
g_list_free(started_list);
if ((list_text != NULL) && (list_text->len > 0)) {
clone_header(out, &rc, rsc, clone_data);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
enum rsc_role_e role = configured_role(rsc);
if (role == RSC_ROLE_UNPROMOTED) {
out->list_item(out, NULL,
UNPROMOTED_INSTANCES " (target-role): [ %s ]",
(const char *) list_text->str);
} else {
out->list_item(out, NULL, UNPROMOTED_INSTANCES ": [ %s ]",
(const char *) list_text->str);
}
} else {
out->list_item(out, NULL, "Started: [ %s ]",
(const char *) list_text->str);
}
}
if (list_text != NULL) {
g_string_free(list_text, TRUE);
}
if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)
&& (clone_data->clone_max > active_instances)) {
GList *nIter;
GList *list = g_hash_table_get_values(rsc->allowed_nodes);
/* Custom stopped table for non-unique clones */
if (stopped != NULL) {
g_hash_table_destroy(stopped);
stopped = NULL;
}
if (list == NULL) {
/* Clusters with symmetrical=false haven't calculated allowed_nodes yet
* If we've not probed for them yet, the Stopped list will be empty
*/
list = g_hash_table_get_values(rsc->known_on);
}
list = g_list_sort(list, pe__cmp_node_name);
for (nIter = list; nIter != NULL; nIter = nIter->next) {
pe_node_t *node = (pe_node_t *)nIter->data;
if (pe_find_node(rsc->running_on, node->details->uname) == NULL &&
pcmk__str_in_list(node->details->uname, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
xmlNode *probe_op = pe__failed_probe_for_rsc(rsc, node->details->uname);
const char *state = "Stopped";
if (configured_role(rsc) == RSC_ROLE_STOPPED) {
state = "Stopped (disabled)";
}
if (stopped == NULL) {
stopped = pcmk__strkey_table(free, free);
}
if (probe_op != NULL) {
int rc;
pcmk__scan_min_int(crm_element_value(probe_op, XML_LRM_ATTR_RC), &rc, 0);
g_hash_table_insert(stopped, strdup(node->details->uname),
crm_strdup_printf("Stopped (%s)", services_ocf_exitcode_str(rc)));
} else {
g_hash_table_insert(stopped, strdup(node->details->uname),
strdup(state));
}
}
}
g_list_free(list);
}
if (stopped != NULL) {
GList *list = sorted_hash_table_values(stopped);
clone_header(out, &rc, rsc, clone_data);
for (GList *status_iter = list; status_iter != NULL; status_iter = status_iter->next) {
const char *status = status_iter->data;
GList *nodes = nodes_with_status(stopped, status);
GString *nodes_str = node_list_to_str(nodes);
if (nodes_str != NULL) {
if (nodes_str->len > 0) {
out->list_item(out, NULL, "%s: [ %s ]", status,
(const char *) nodes_str->str);
}
g_string_free(nodes_str, TRUE);
}
g_list_free(nodes);
}
g_list_free(list);
g_hash_table_destroy(stopped);
/* If there are no instances of this clone (perhaps because there are no
* nodes configured), simply output the clone header by itself. This can
* come up in PCS testing.
*/
} else if (active_instances == 0) {
clone_header(out, &rc, rsc, clone_data);
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
void
clone_free(pe_resource_t * rsc)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_rsc_trace(rsc, "Freeing %s", rsc->id);
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
CRM_ASSERT(child_rsc);
pe_rsc_trace(child_rsc, "Freeing child %s", child_rsc->id);
free_xml(child_rsc->xml);
child_rsc->xml = NULL;
/* There could be a saved unexpanded xml */
free_xml(child_rsc->orig_xml);
child_rsc->orig_xml = NULL;
child_rsc->fns->free(child_rsc);
}
g_list_free(rsc->children);
if (clone_data) {
CRM_ASSERT(clone_data->demote_notify == NULL);
CRM_ASSERT(clone_data->stop_notify == NULL);
CRM_ASSERT(clone_data->start_notify == NULL);
CRM_ASSERT(clone_data->promote_notify == NULL);
}
common_free(rsc);
}
enum rsc_role_e
clone_resource_state(const pe_resource_t * rsc, gboolean current)
{
enum rsc_role_e clone_role = RSC_ROLE_UNKNOWN;
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, current);
if (a_role > clone_role) {
clone_role = a_role;
}
}
pe_rsc_trace(rsc, "%s role: %s", rsc->id, role2text(clone_role));
return clone_role;
}
/*!
* \internal
* \brief Check whether a clone has an instance for every node
*
* \param[in] rsc Clone to check
* \param[in] data_set Cluster state
*/
bool
pe__is_universal_clone(pe_resource_t *rsc,
pe_working_set_t *data_set)
{
if (pe_rsc_is_clone(rsc)) {
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (clone_data->clone_max == g_list_length(data_set->nodes)) {
return TRUE;
}
}
return FALSE;
}
gboolean
pe__clone_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent)
{
gboolean passes = FALSE;
clone_variant_data_t *clone_data = NULL;
if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) {
passes = TRUE;
} else {
get_clone_variant_data(clone_data, rsc);
passes = pcmk__str_in_list(ID(clone_data->xml_obj_child), only_rsc, pcmk__str_star_matches);
if (!passes) {
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (!child_rsc->fns->is_filtered(child_rsc, only_rsc, FALSE)) {
passes = TRUE;
break;
}
}
}
}
return !passes;
}
const char *
pe__clone_child_id(pe_resource_t *rsc)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
return ID(clone_data->xml_obj_child);
}
/*!
* \internal
* \brief Check whether a clone is ordered
*
* \param[in] clone Clone resource to check
*
* \return true if clone is ordered, otherwise false
*/
bool
pe__clone_is_ordered(pe_resource_t *clone)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
return pcmk_is_set(clone_data->flags, pe__clone_ordered);
}
/*!
* \internal
* \brief Set a clone flag
*
* \param[in] clone Clone resource to set flag for
* \param[in] flag Clone flag to set
*
* \return Standard Pacemaker return code (either pcmk_rc_ok if flag was not
* already set or pcmk_rc_already if it was)
*/
int
pe__set_clone_flag(pe_resource_t *clone, enum pe__clone_flags flag)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
if (pcmk_is_set(clone_data->flags, flag)) {
return pcmk_rc_already;
}
clone_data->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,
"Clone", clone->id,
clone_data->flags, flag, "flag");
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Create pseudo-actions needed for promotable clones
*
* \param[in] clone Promotable clone to create actions for
* \param[in] any_promoting Whether any instances will be promoted
* \param[in] any_demoting Whether any instance will be demoted
*/
void
pe__create_promotable_pseudo_ops(pe_resource_t *clone, bool any_promoting,
bool any_demoting)
{
pe_action_t *action = NULL;
pe_action_t *action_complete = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
// Create a "promote" action for the clone itself
action = pe__new_rsc_pseudo_action(clone, RSC_PROMOTE, !any_promoting,
true);
// Create a "promoted" action for when all promotions are done
action_complete = pe__new_rsc_pseudo_action(clone, RSC_PROMOTED,
!any_promoting, true);
action_complete->priority = INFINITY;
// Create notification pseudo-actions for promotion
if (clone_data->promote_notify == NULL) {
clone_data->promote_notify = pe__clone_notif_pseudo_ops(clone,
RSC_PROMOTE,
action,
action_complete);
}
// Create a "demote" action for the clone itself
action = pe__new_rsc_pseudo_action(clone, RSC_DEMOTE, !any_demoting, true);
// Create a "demoted" action for when all demotions are done
action_complete = pe__new_rsc_pseudo_action(clone, RSC_DEMOTED,
!any_demoting, true);
action_complete->priority = INFINITY;
// Create notification pseudo-actions for demotion
if (clone_data->demote_notify == NULL) {
clone_data->demote_notify = pe__clone_notif_pseudo_ops(clone,
RSC_DEMOTE,
action,
action_complete);
if (clone_data->promote_notify != NULL) {
order_actions(clone_data->stop_notify->post_done,
clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->start_notify->post_done,
clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done,
clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done,
clone_data->start_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done,
clone_data->stop_notify->pre,
pe_order_optional);
}
}
}
diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c
index c11f949c95..8c140ed820 100644
--- a/lib/pengine/complex.c
+++ b/lib/pengine/complex.c
@@ -1,1110 +1,1128 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include "pe_status_private.h"
void populate_hash(xmlNode * nvpair_list, GHashTable * hash, const char **attrs, int attrs_length);
resource_object_functions_t resource_class_functions[] = {
{
native_unpack,
native_find_rsc,
native_parameter,
native_print,
native_active,
native_resource_state,
native_location,
native_free,
pe__count_common,
pe__native_is_filtered,
},
{
group_unpack,
native_find_rsc,
native_parameter,
group_print,
group_active,
group_resource_state,
native_location,
group_free,
pe__count_common,
pe__group_is_filtered,
},
{
clone_unpack,
native_find_rsc,
native_parameter,
clone_print,
clone_active,
clone_resource_state,
native_location,
clone_free,
pe__count_common,
pe__clone_is_filtered,
},
{
pe__unpack_bundle,
native_find_rsc,
native_parameter,
pe__print_bundle,
pe__bundle_active,
pe__bundle_resource_state,
native_location,
pe__free_bundle,
pe__count_bundle,
pe__bundle_is_filtered,
}
};
static enum pe_obj_types
get_resource_type(const char *name)
{
if (pcmk__str_eq(name, XML_CIB_TAG_RESOURCE, pcmk__str_casei)) {
return pe_native;
} else if (pcmk__str_eq(name, XML_CIB_TAG_GROUP, pcmk__str_casei)) {
return pe_group;
} else if (pcmk__str_eq(name, XML_CIB_TAG_INCARNATION, pcmk__str_casei)) {
return pe_clone;
} else if (pcmk__str_eq(name, PCMK_XE_PROMOTABLE_LEGACY, pcmk__str_casei)) {
// @COMPAT deprecated since 2.0.0
return pe_clone;
} else if (pcmk__str_eq(name, XML_CIB_TAG_CONTAINER, pcmk__str_casei)) {
return pe_container;
}
return pe_unknown;
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
add_hash_param(user_data, key, value);
}
static void
expand_parents_fixed_nvpairs(pe_resource_t * rsc, pe_rule_eval_data_t * rule_data, GHashTable * meta_hash, pe_working_set_t * data_set)
{
GHashTable *parent_orig_meta = pcmk__strkey_table(free, free);
pe_resource_t *p = rsc->parent;
if (p == NULL) {
return ;
}
/* Search all parent resources, get the fixed value of "meta_attributes" set only in the original xml, and stack it in the hash table. */
/* The fixed value of the lower parent resource takes precedence and is not overwritten. */
while(p != NULL) {
/* A hash table for comparison is generated, including the id-ref. */
pe__unpack_dataset_nvpairs(p->xml, XML_TAG_META_SETS,
rule_data, parent_orig_meta, NULL, FALSE, data_set);
p = p->parent;
}
/* If there is a fixed value of "meta_attributes" of the parent resource, it will be processed. */
if (parent_orig_meta != NULL) {
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
g_hash_table_iter_init(&iter, parent_orig_meta);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
/* Parameters set in the original xml of the parent resource will also try to overwrite the child resource. */
/* Attributes that already exist in the child lease are not updated. */
dup_attr(key, value, meta_hash);
}
}
if (parent_orig_meta != NULL) {
g_hash_table_destroy(parent_orig_meta);
}
return ;
}
void
get_meta_attributes(GHashTable * meta_hash, pe_resource_t * rsc,
pe_node_t * node, pe_working_set_t * data_set)
{
pe_rsc_eval_data_t rsc_rule_data = {
.standard = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS),
.provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER),
.agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE)
};
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = &rsc_rule_data,
.op_data = NULL
};
if (node) {
rule_data.node_hash = node->details->attrs;
}
for (xmlAttrPtr a = pcmk__xe_first_attr(rsc->xml); a != NULL; a = a->next) {
const char *prop_name = (const char *) a->name;
const char *prop_value = crm_element_value(rsc->xml, prop_name);
add_hash_param(meta_hash, prop_name, prop_value);
}
pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_META_SETS, &rule_data,
meta_hash, NULL, FALSE, data_set);
/* Set the "meta_attributes" explicitly set in the parent resource to the hash table of the child resource. */
/* If it is already explicitly set as a child, it will not be overwritten. */
if (rsc->parent != NULL) {
expand_parents_fixed_nvpairs(rsc, &rule_data, meta_hash, data_set);
}
/* check the defaults */
pe__unpack_dataset_nvpairs(data_set->rsc_defaults, XML_TAG_META_SETS,
&rule_data, meta_hash, NULL, FALSE, data_set);
/* If there is "meta_attributes" that the parent resource has not explicitly set, set a value that is not set from rsc_default either. */
/* The values already set up to this point will not be overwritten. */
if (rsc->parent) {
g_hash_table_foreach(rsc->parent->meta, dup_attr, meta_hash);
}
}
void
get_rsc_attributes(GHashTable *meta_hash, const pe_resource_t *rsc,
const pe_node_t *node, pe_working_set_t *data_set)
{
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
if (node) {
rule_data.node_hash = node->details->attrs;
}
pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_ATTR_SETS, &rule_data,
meta_hash, NULL, FALSE, data_set);
/* set anything else based on the parent */
if (rsc->parent != NULL) {
get_rsc_attributes(meta_hash, rsc->parent, node, data_set);
} else {
/* and finally check the defaults */
pe__unpack_dataset_nvpairs(data_set->rsc_defaults, XML_TAG_ATTR_SETS,
&rule_data, meta_hash, NULL, FALSE, data_set);
}
}
static char *
template_op_key(xmlNode * op)
{
const char *name = crm_element_value(op, "name");
const char *role = crm_element_value(op, "role");
char *key = NULL;
if ((role == NULL)
|| pcmk__strcase_any_of(role, RSC_ROLE_STARTED_S, RSC_ROLE_UNPROMOTED_S,
RSC_ROLE_UNPROMOTED_LEGACY_S, NULL)) {
role = RSC_ROLE_UNKNOWN_S;
}
key = crm_strdup_printf("%s-%s", name, role);
return key;
}
static gboolean
unpack_template(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set)
{
xmlNode *cib_resources = NULL;
xmlNode *template = NULL;
xmlNode *new_xml = NULL;
xmlNode *child_xml = NULL;
xmlNode *rsc_ops = NULL;
xmlNode *template_ops = NULL;
const char *template_ref = NULL;
const char *clone = NULL;
const char *id = NULL;
if (xml_obj == NULL) {
pe_err("No resource object for template unpacking");
return FALSE;
}
template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE);
if (template_ref == NULL) {
return TRUE;
}
id = ID(xml_obj);
if (id == NULL) {
pe_err("'%s' object must have a id", crm_element_name(xml_obj));
return FALSE;
}
if (pcmk__str_eq(template_ref, id, pcmk__str_none)) {
pe_err("The resource object '%s' should not reference itself", id);
return FALSE;
}
cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE);
if (cib_resources == NULL) {
pe_err("No resources configured");
return FALSE;
}
template = pcmk__xe_match(cib_resources, XML_CIB_TAG_RSC_TEMPLATE,
XML_ATTR_ID, template_ref);
if (template == NULL) {
pe_err("No template named '%s'", template_ref);
return FALSE;
}
new_xml = copy_xml(template);
xmlNodeSetName(new_xml, xml_obj->name);
crm_xml_replace(new_xml, XML_ATTR_ID, id);
clone = crm_element_value(xml_obj, XML_RSC_ATTR_INCARNATION);
if(clone) {
crm_xml_add(new_xml, XML_RSC_ATTR_INCARNATION, clone);
}
template_ops = find_xml_node(new_xml, "operations", FALSE);
for (child_xml = pcmk__xe_first_child(xml_obj); child_xml != NULL;
child_xml = pcmk__xe_next(child_xml)) {
xmlNode *new_child = NULL;
new_child = add_node_copy(new_xml, child_xml);
if (pcmk__str_eq((const char *)new_child->name, "operations", pcmk__str_none)) {
rsc_ops = new_child;
}
}
if (template_ops && rsc_ops) {
xmlNode *op = NULL;
GHashTable *rsc_ops_hash = pcmk__strkey_table(free, NULL);
for (op = pcmk__xe_first_child(rsc_ops); op != NULL;
op = pcmk__xe_next(op)) {
char *key = template_op_key(op);
g_hash_table_insert(rsc_ops_hash, key, op);
}
for (op = pcmk__xe_first_child(template_ops); op != NULL;
op = pcmk__xe_next(op)) {
char *key = template_op_key(op);
if (g_hash_table_lookup(rsc_ops_hash, key) == NULL) {
add_node_copy(rsc_ops, op);
}
free(key);
}
if (rsc_ops_hash) {
g_hash_table_destroy(rsc_ops_hash);
}
free_xml(template_ops);
}
/*free_xml(*expanded_xml); */
*expanded_xml = new_xml;
/* Disable multi-level templates for now */
/*if(unpack_template(new_xml, expanded_xml, data_set) == FALSE) {
free_xml(*expanded_xml);
*expanded_xml = NULL;
return FALSE;
} */
return TRUE;
}
static gboolean
add_template_rsc(xmlNode * xml_obj, pe_working_set_t * data_set)
{
const char *template_ref = NULL;
const char *id = NULL;
if (xml_obj == NULL) {
pe_err("No resource object for processing resource list of template");
return FALSE;
}
template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE);
if (template_ref == NULL) {
return TRUE;
}
id = ID(xml_obj);
if (id == NULL) {
pe_err("'%s' object must have a id", crm_element_name(xml_obj));
return FALSE;
}
if (pcmk__str_eq(template_ref, id, pcmk__str_none)) {
pe_err("The resource object '%s' should not reference itself", id);
return FALSE;
}
if (add_tag_ref(data_set->template_rsc_sets, template_ref, id) == FALSE) {
return FALSE;
}
return TRUE;
}
static bool
detect_promotable(pe_resource_t *rsc)
{
const char *promotable = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_PROMOTABLE);
if (crm_is_true(promotable)) {
return TRUE;
}
// @COMPAT deprecated since 2.0.0
if (pcmk__str_eq(crm_element_name(rsc->xml), PCMK_XE_PROMOTABLE_LEGACY,
pcmk__str_casei)) {
/* @TODO in some future version, pe_warn_once() here,
* then drop support in even later version
*/
g_hash_table_insert(rsc->meta, strdup(XML_RSC_ATTR_PROMOTABLE),
strdup(XML_BOOLEAN_TRUE));
return TRUE;
}
return FALSE;
}
static void
free_params_table(gpointer data)
{
g_hash_table_destroy((GHashTable *) data);
}
/*!
* \brief Get a table of resource parameters
*
* \param[in,out] rsc Resource to query
* \param[in] node Node for evaluating rules (NULL for defaults)
* \param[in] data_set Cluster working set
*
* \return Hash table containing resource parameter names and values
* (or NULL if \p rsc or \p data_set is NULL)
* \note The returned table will be destroyed when the resource is freed, so
* callers should not destroy it.
*/
GHashTable *
pe_rsc_params(pe_resource_t *rsc, const pe_node_t *node,
pe_working_set_t *data_set)
{
GHashTable *params_on_node = NULL;
/* A NULL node is used to request the resource's default parameters
* (not evaluated for node), but we always want something non-NULL
* as a hash table key.
*/
const char *node_name = "";
// Sanity check
if ((rsc == NULL) || (data_set == NULL)) {
return NULL;
}
if ((node != NULL) && (node->details->uname != NULL)) {
node_name = node->details->uname;
}
// Find the parameter table for given node
if (rsc->parameter_cache == NULL) {
rsc->parameter_cache = pcmk__strikey_table(free, free_params_table);
} else {
params_on_node = g_hash_table_lookup(rsc->parameter_cache, node_name);
}
// If none exists yet, create one with parameters evaluated for node
if (params_on_node == NULL) {
params_on_node = pcmk__strkey_table(free, free);
get_rsc_attributes(params_on_node, rsc, node, data_set);
g_hash_table_insert(rsc->parameter_cache, strdup(node_name),
params_on_node);
}
return params_on_node;
}
/*!
* \internal
* \brief Unpack a resource's "requires" meta-attribute
*
* \param[in] rsc Resource being unpacked
* \param[in] value Value of "requires" meta-attribute
* \param[in] is_default Whether \p value was selected by default
*/
static void
unpack_requires(pe_resource_t *rsc, const char *value, bool is_default)
{
if (pcmk__str_eq(value, PCMK__VALUE_NOTHING, pcmk__str_casei)) {
} else if (pcmk__str_eq(value, PCMK__VALUE_QUORUM, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pe_rsc_needs_quorum);
} else if (pcmk__str_eq(value, PCMK__VALUE_FENCING, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pe_rsc_needs_fencing);
if (!pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) {
pcmk__config_warn("%s requires fencing but fencing is disabled",
rsc->id);
}
} else if (pcmk__str_eq(value, PCMK__VALUE_UNFENCING, pcmk__str_casei)) {
if (pcmk_is_set(rsc->flags, pe_rsc_fence_device)) {
pcmk__config_warn("Resetting \"" XML_RSC_ATTR_REQUIRES "\" for %s "
"to \"" PCMK__VALUE_QUORUM "\" because fencing "
"devices cannot require unfencing", rsc->id);
unpack_requires(rsc, PCMK__VALUE_QUORUM, true);
return;
} else if (!pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) {
pcmk__config_warn("Resetting \"" XML_RSC_ATTR_REQUIRES "\" for %s "
"to \"" PCMK__VALUE_QUORUM "\" because fencing "
"is disabled", rsc->id);
unpack_requires(rsc, PCMK__VALUE_QUORUM, true);
return;
} else {
pe__set_resource_flags(rsc,
pe_rsc_needs_fencing|pe_rsc_needs_unfencing);
}
} else {
const char *orig_value = value;
if (pcmk_is_set(rsc->flags, pe_rsc_fence_device)) {
value = PCMK__VALUE_QUORUM;
} else if ((rsc->variant == pe_native)
&& xml_contains_remote_node(rsc->xml)) {
value = PCMK__VALUE_QUORUM;
} else if (pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing)) {
value = PCMK__VALUE_UNFENCING;
} else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) {
value = PCMK__VALUE_FENCING;
} else if (rsc->cluster->no_quorum_policy == no_quorum_ignore) {
value = PCMK__VALUE_NOTHING;
} else {
value = PCMK__VALUE_QUORUM;
}
if (orig_value != NULL) {
pcmk__config_err("Resetting '" XML_RSC_ATTR_REQUIRES "' for %s "
"to '%s' because '%s' is not valid",
rsc->id, value, orig_value);
}
unpack_requires(rsc, value, true);
return;
}
pe_rsc_trace(rsc, "\tRequired to start: %s%s", value,
(is_default? " (default)" : ""));
}
/*!
* \internal
* \brief Unpack configuration XML for a given resource
*
* Unpack the XML object containing a resource's configuration into a new
* \c pe_resource_t object.
*
* \param[in] xml_obj XML node containing the resource's configuration
* \param[out] rsc Where to store the unpacked resource information
* \param[in] parent Resource's parent, if any
* \param[in,out] data_set Cluster working set
*
* \return Standard Pacemaker return code
+ * \note If pcmk_rc_ok is returned, \p *rsc is guaranteed to be non-NULL, and
+ * the caller is responsible for freeing it using its variant-specific
+ * free() method. Otherwise, \p *rsc is guaranteed to be NULL.
*/
int
pe__unpack_resource(xmlNode *xml_obj, pe_resource_t **rsc,
pe_resource_t *parent, pe_working_set_t *data_set)
{
xmlNode *expanded_xml = NULL;
xmlNode *ops = NULL;
const char *value = NULL;
- const char *rclass = NULL; /* Look for this after any templates have been expanded */
- const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
+ const char *id = NULL;
bool guest_node = false;
bool remote_node = false;
- bool has_versioned_params = false;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
- .now = data_set->now,
+ .now = NULL,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
- crm_log_xml_trace(xml_obj, "Processing resource input...");
-
CRM_CHECK(rsc != NULL, return EINVAL);
+ CRM_CHECK((xml_obj != NULL) && (data_set != NULL),
+ *rsc = NULL;
+ return EINVAL);
+ rule_data.now = data_set->now;
+
+ crm_log_xml_trace(xml_obj, "[raw XML]");
+
+ id = crm_element_value(xml_obj, XML_ATTR_ID);
if (id == NULL) {
- pe_err("Must specify id tag in ");
+ pe_err("Ignoring <%s> configuration without " XML_ATTR_ID,
+ crm_element_name(xml_obj));
return pcmk_rc_unpack_error;
}
if (unpack_template(xml_obj, &expanded_xml, data_set) == FALSE) {
return pcmk_rc_unpack_error;
}
*rsc = calloc(1, sizeof(pe_resource_t));
+ if (*rsc == NULL) {
+ crm_crit("Unable to allocate memory for resource '%s'", id);
+ return ENOMEM;
+ }
(*rsc)->cluster = data_set;
if (expanded_xml) {
- crm_log_xml_trace(expanded_xml, "Expanded resource...");
+ crm_log_xml_trace(expanded_xml, "[expanded XML]");
(*rsc)->xml = expanded_xml;
(*rsc)->orig_xml = xml_obj;
} else {
(*rsc)->xml = xml_obj;
(*rsc)->orig_xml = NULL;
}
/* Do not use xml_obj from here on, use (*rsc)->xml in case templates are involved */
- rclass = crm_element_value((*rsc)->xml, XML_AGENT_ATTR_CLASS);
+
(*rsc)->parent = parent;
ops = find_xml_node((*rsc)->xml, "operations", FALSE);
(*rsc)->ops_xml = expand_idref(ops, data_set->input);
(*rsc)->variant = get_resource_type(crm_element_name((*rsc)->xml));
if ((*rsc)->variant == pe_unknown) {
- pe_err("Unknown resource type: %s", crm_element_name((*rsc)->xml));
- free(*rsc);
+ pe_err("Ignoring resource '%s' of unknown type '%s'",
+ id, crm_element_name((*rsc)->xml));
+ common_free(*rsc);
+ *rsc = NULL;
return pcmk_rc_unpack_error;
}
(*rsc)->meta = pcmk__strkey_table(free, free);
(*rsc)->allowed_nodes = pcmk__strkey_table(NULL, free);
(*rsc)->known_on = pcmk__strkey_table(NULL, free);
value = crm_element_value((*rsc)->xml, XML_RSC_ATTR_INCARNATION);
if (value) {
(*rsc)->id = crm_strdup_printf("%s:%s", id, value);
add_hash_param((*rsc)->meta, XML_RSC_ATTR_INCARNATION, value);
} else {
(*rsc)->id = strdup(id);
}
(*rsc)->fns = &resource_class_functions[(*rsc)->variant];
- pe_rsc_trace((*rsc), "Unpacking resource...");
get_meta_attributes((*rsc)->meta, *rsc, NULL, data_set);
(*rsc)->parameters = pe_rsc_params(*rsc, NULL, data_set); // \deprecated
(*rsc)->flags = 0;
pe__set_resource_flags(*rsc, pe_rsc_runnable|pe_rsc_provisional);
if (!pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) {
pe__set_resource_flags(*rsc, pe_rsc_managed);
}
(*rsc)->rsc_cons = NULL;
(*rsc)->rsc_tickets = NULL;
(*rsc)->actions = NULL;
(*rsc)->role = RSC_ROLE_STOPPED;
(*rsc)->next_role = RSC_ROLE_UNKNOWN;
(*rsc)->recovery_type = recovery_stop_start;
(*rsc)->stickiness = 0;
(*rsc)->migration_threshold = INFINITY;
(*rsc)->failure_timeout = 0;
value = g_hash_table_lookup((*rsc)->meta, XML_CIB_ATTR_PRIORITY);
(*rsc)->priority = char2score(value);
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_CRITICAL);
if ((value == NULL) || crm_is_true(value)) {
pe__set_resource_flags(*rsc, pe_rsc_critical);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_NOTIFY);
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pe_rsc_notify);
}
if (xml_contains_remote_node((*rsc)->xml)) {
(*rsc)->is_remote_node = TRUE;
if (g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_CONTAINER)) {
guest_node = true;
} else {
remote_node = true;
}
}
value = g_hash_table_lookup((*rsc)->meta, XML_OP_ATTR_ALLOW_MIGRATE);
- if (crm_is_true(value) && has_versioned_params) {
- pe_rsc_trace((*rsc), "Migration is disabled for resources with versioned parameters");
- } else if (crm_is_true(value)) {
+ if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pe_rsc_allow_migrate);
- } else if ((value == NULL) && remote_node && !has_versioned_params) {
+ } else if ((value == NULL) && remote_node) {
/* By default, we want remote nodes to be able
* to float around the cluster without having to stop all the
* resources within the remote-node before moving. Allowing
* migration support enables this feature. If this ever causes
* problems, migration support can be explicitly turned off with
* allow-migrate=false.
- * We don't support migration for versioned resources, though. */
+ */
pe__set_resource_flags(*rsc, pe_rsc_allow_migrate);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MANAGED);
if (value != NULL && !pcmk__str_eq("default", value, pcmk__str_casei)) {
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pe_rsc_managed);
} else {
pe__clear_resource_flags(*rsc, pe_rsc_managed);
}
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MAINTENANCE);
if (crm_is_true(value)) {
pe__clear_resource_flags(*rsc, pe_rsc_managed);
pe__set_resource_flags(*rsc, pe_rsc_maintenance);
}
if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) {
pe__clear_resource_flags(*rsc, pe_rsc_managed);
pe__set_resource_flags(*rsc, pe_rsc_maintenance);
}
if (pe_rsc_is_clone(uber_parent(*rsc))) {
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE);
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pe_rsc_unique);
}
if (detect_promotable(*rsc)) {
pe__set_resource_flags(*rsc, pe_rsc_promotable);
}
} else {
pe__set_resource_flags(*rsc, pe_rsc_unique);
}
- pe_rsc_trace((*rsc), "Options for %s", (*rsc)->id);
-
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_RESTART);
if (pcmk__str_eq(value, "restart", pcmk__str_casei)) {
(*rsc)->restart_type = pe_restart_restart;
- pe_rsc_trace((*rsc), "\tDependency restart handling: restart");
+ pe_rsc_trace((*rsc), "%s dependency restart handling: restart",
+ (*rsc)->id);
pe_warn_once(pe_wo_restart_type,
"Support for restart-type is deprecated and will be removed in a future release");
} else {
(*rsc)->restart_type = pe_restart_ignore;
- pe_rsc_trace((*rsc), "\tDependency restart handling: ignore");
+ pe_rsc_trace((*rsc), "%s dependency restart handling: ignore",
+ (*rsc)->id);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MULTIPLE);
if (pcmk__str_eq(value, "stop_only", pcmk__str_casei)) {
(*rsc)->recovery_type = recovery_stop_only;
- pe_rsc_trace((*rsc), "\tMultiple running resource recovery: stop only");
+ pe_rsc_trace((*rsc), "%s multiple running resource recovery: stop only",
+ (*rsc)->id);
} else if (pcmk__str_eq(value, "block", pcmk__str_casei)) {
(*rsc)->recovery_type = recovery_block;
- pe_rsc_trace((*rsc), "\tMultiple running resource recovery: block");
+ pe_rsc_trace((*rsc), "%s multiple running resource recovery: block",
+ (*rsc)->id);
} else if (pcmk__str_eq(value, "stop_unexpected", pcmk__str_casei)) {
(*rsc)->recovery_type = recovery_stop_unexpected;
- pe_rsc_trace((*rsc), "\tMultiple running resource recovery: "
- "stop unexpected instances");
+ pe_rsc_trace((*rsc), "%s multiple running resource recovery: "
+ "stop unexpected instances",
+ (*rsc)->id);
} else { // "stop_start"
if (!pcmk__str_eq(value, "stop_start",
pcmk__str_casei|pcmk__str_null_matches)) {
pe_warn("%s is not a valid value for " XML_RSC_ATTR_MULTIPLE
", using default of \"stop_start\"", value);
}
(*rsc)->recovery_type = recovery_stop_start;
- pe_rsc_trace((*rsc), "\tMultiple running resource recovery: stop/start");
+ pe_rsc_trace((*rsc), "%s multiple running resource recovery: "
+ "stop/start", (*rsc)->id);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_STICKINESS);
if (value != NULL && !pcmk__str_eq("default", value, pcmk__str_casei)) {
(*rsc)->stickiness = char2score(value);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_STICKINESS);
if (value != NULL && !pcmk__str_eq("default", value, pcmk__str_casei)) {
(*rsc)->migration_threshold = char2score(value);
if ((*rsc)->migration_threshold < 0) {
/* @TODO We use 1 here to preserve previous behavior, but this
* should probably use the default (INFINITY) or 0 (to disable)
* instead.
*/
pe_warn_once(pe_wo_neg_threshold,
XML_RSC_ATTR_FAIL_STICKINESS
" must be non-negative, using 1 instead");
(*rsc)->migration_threshold = 1;
}
}
- if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ if (pcmk__str_eq(crm_element_value((*rsc)->xml, XML_AGENT_ATTR_CLASS),
+ PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
pe__set_working_set_flags(data_set, pe_flag_have_stonith_resource);
pe__set_resource_flags(*rsc, pe_rsc_fence_device);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_REQUIRES);
unpack_requires(*rsc, value, false);
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_TIMEOUT);
if (value != NULL) {
// Stored as seconds
(*rsc)->failure_timeout = (int) (crm_parse_interval_spec(value) / 1000);
}
if (remote_node) {
GHashTable *params = pe_rsc_params(*rsc, NULL, data_set);
/* Grabbing the value now means that any rules based on node attributes
* will evaluate to false, so such rules should not be used with
* reconnect_interval.
*
* @TODO Evaluate per node before using
*/
value = g_hash_table_lookup(params, XML_REMOTE_ATTR_RECONNECT_INTERVAL);
if (value) {
/* reconnect delay works by setting failure_timeout and preventing the
* connection from starting until the failure is cleared. */
(*rsc)->remote_reconnect_ms = crm_parse_interval_spec(value);
/* we want to override any default failure_timeout in use when remote
* reconnect_interval is in use. */
(*rsc)->failure_timeout = (*rsc)->remote_reconnect_ms / 1000;
}
}
get_target_role(*rsc, &((*rsc)->next_role));
- pe_rsc_trace((*rsc), "\tDesired next state: %s",
+ pe_rsc_trace((*rsc), "%s desired next state: %s", (*rsc)->id,
(*rsc)->next_role != RSC_ROLE_UNKNOWN ? role2text((*rsc)->next_role) : "default");
if ((*rsc)->fns->unpack(*rsc, data_set) == FALSE) {
+ (*rsc)->fns->free(*rsc);
+ *rsc = NULL;
return pcmk_rc_unpack_error;
}
if (pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)) {
// This tag must stay exactly the same because it is tested elsewhere
resource_location(*rsc, NULL, 0, "symmetric_default", data_set);
} else if (guest_node) {
/* remote resources tied to a container resource must always be allowed
* to opt-in to the cluster. Whether the connection resource is actually
* allowed to be placed on a node is dependent on the container resource */
resource_location(*rsc, NULL, 0, "remote_connection_default", data_set);
}
- pe_rsc_trace((*rsc), "\tAction notification: %s",
+ pe_rsc_trace((*rsc), "%s action notification: %s", (*rsc)->id,
pcmk_is_set((*rsc)->flags, pe_rsc_notify)? "required" : "not required");
(*rsc)->utilization = pcmk__strkey_table(free, free);
pe__unpack_dataset_nvpairs((*rsc)->xml, XML_TAG_UTILIZATION, &rule_data,
(*rsc)->utilization, NULL, FALSE, data_set);
-/* data_set->resources = g_list_append(data_set->resources, (*rsc)); */
-
if (expanded_xml) {
if (add_template_rsc(xml_obj, data_set) == FALSE) {
+ (*rsc)->fns->free(*rsc);
+ *rsc = NULL;
return pcmk_rc_unpack_error;
}
}
return pcmk_rc_ok;
}
void
common_update_score(pe_resource_t * rsc, const char *id, int score)
{
pe_node_t *node = NULL;
node = pe_hash_table_lookup(rsc->allowed_nodes, id);
if (node != NULL) {
pe_rsc_trace(rsc, "Updating score for %s on %s: %d + %d", rsc->id, id, node->weight, score);
node->weight = pcmk__add_scores(node->weight, score);
}
if (rsc->children) {
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
common_update_score(child_rsc, id, score);
}
}
}
gboolean
is_parent(pe_resource_t *child, pe_resource_t *rsc)
{
pe_resource_t *parent = child;
if (parent == NULL || rsc == NULL) {
return FALSE;
}
while (parent->parent != NULL) {
if (parent->parent == rsc) {
return TRUE;
}
parent = parent->parent;
}
return FALSE;
}
pe_resource_t *
uber_parent(pe_resource_t * rsc)
{
pe_resource_t *parent = rsc;
if (parent == NULL) {
return NULL;
}
while (parent->parent != NULL && parent->parent->variant != pe_container) {
parent = parent->parent;
}
return parent;
}
void
common_free(pe_resource_t * rsc)
{
if (rsc == NULL) {
return;
}
pe_rsc_trace(rsc, "Freeing %s %d", rsc->id, rsc->variant);
g_list_free(rsc->rsc_cons);
g_list_free(rsc->rsc_cons_lhs);
g_list_free(rsc->rsc_tickets);
g_list_free(rsc->dangling_migrations);
if (rsc->parameter_cache != NULL) {
g_hash_table_destroy(rsc->parameter_cache);
}
if (rsc->meta != NULL) {
g_hash_table_destroy(rsc->meta);
}
if (rsc->utilization != NULL) {
g_hash_table_destroy(rsc->utilization);
}
if ((rsc->parent == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
free_xml(rsc->xml);
rsc->xml = NULL;
free_xml(rsc->orig_xml);
rsc->orig_xml = NULL;
/* if rsc->orig_xml, then rsc->xml is an expanded xml from a template */
} else if (rsc->orig_xml) {
free_xml(rsc->xml);
rsc->xml = NULL;
}
if (rsc->running_on) {
g_list_free(rsc->running_on);
rsc->running_on = NULL;
}
if (rsc->known_on) {
g_hash_table_destroy(rsc->known_on);
rsc->known_on = NULL;
}
if (rsc->actions) {
g_list_free(rsc->actions);
rsc->actions = NULL;
}
if (rsc->allowed_nodes) {
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = NULL;
}
g_list_free(rsc->fillers);
g_list_free(rsc->rsc_location);
pe_rsc_trace(rsc, "Resource freed");
free(rsc->id);
free(rsc->clone_name);
free(rsc->allocated_to);
free(rsc->variant_opaque);
free(rsc->pending_task);
free(rsc);
}
/*!
* \brief
* \internal Find a node (and optionally count all) where resource is active
*
* \param[in] rsc Resource to check
* \param[out] count_all If not NULL, will be set to count of active nodes
* \param[out] count_clean If not NULL, will be set to count of clean nodes
*
* \return An active node (or NULL if resource is not active anywhere)
*
* \note The order of preference is: an active node that is the resource's
* partial migration source; if the resource's "requires" is "quorum" or
* "nothing", the first active node in the list that is clean and online;
* the first active node in the list.
*/
pe_node_t *
pe__find_active_on(const pe_resource_t *rsc, unsigned int *count_all,
unsigned int *count_clean)
{
pe_node_t *active = NULL;
pe_node_t *node = NULL;
bool keep_looking = FALSE;
bool is_happy = FALSE;
if (count_all) {
*count_all = 0;
}
if (count_clean) {
*count_clean = 0;
}
if (rsc == NULL) {
return NULL;
}
for (GList *node_iter = rsc->running_on; node_iter != NULL;
node_iter = node_iter->next) {
node = node_iter->data;
keep_looking = FALSE;
is_happy = node->details->online && !node->details->unclean;
if (count_all) {
++*count_all;
}
if (count_clean && is_happy) {
++*count_clean;
}
if (count_all || count_clean) {
// If we're counting, we need to go through entire list
keep_looking = TRUE;
}
if (rsc->partial_migration_source != NULL) {
if (node->details == rsc->partial_migration_source->details) {
// This is the migration source
active = node;
} else {
keep_looking = TRUE;
}
} else if (!pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
if (is_happy && (!active || !active->details->online
|| active->details->unclean)) {
// This is the first clean node
active = node;
} else {
keep_looking = TRUE;
}
}
if (active == NULL) {
// This is first node in list
active = node;
}
if (keep_looking == FALSE) {
// Don't waste time iterating if we don't have to
break;
}
}
return active;
}
/*!
* \brief
* \internal Find and count active nodes according to "requires"
*
* \param[in] rsc Resource to check
* \param[out] count If not NULL, will be set to count of active nodes
*
* \return An active node (or NULL if resource is not active anywhere)
*
* \note This is a convenience wrapper for pe__find_active_on() where the count
* of all active nodes or only clean active nodes is desired according to
* the "requires" meta-attribute.
*/
pe_node_t *
pe__find_active_requires(const pe_resource_t *rsc, unsigned int *count)
{
if (rsc && !pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
return pe__find_active_on(rsc, NULL, count);
}
return pe__find_active_on(rsc, count, NULL);
}
void
pe__count_common(pe_resource_t *rsc)
{
if (rsc->children != NULL) {
for (GList *item = rsc->children; item != NULL; item = item->next) {
((pe_resource_t *) item->data)->fns->count(item->data);
}
} else if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)
|| (rsc->role > RSC_ROLE_STOPPED)) {
rsc->cluster->ninstances++;
if (pe__resource_is_disabled(rsc)) {
rsc->cluster->disabled_resources++;
}
if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
rsc->cluster->blocked_resources++;
}
}
}
/*!
* \internal
* \brief Update a resource's next role
*
* \param[in,out] rsc Resource to be updated
* \param[in] role Resource's new next role
* \param[in] why Human-friendly reason why role is changing (for logs)
*/
void
pe__set_next_role(pe_resource_t *rsc, enum rsc_role_e role, const char *why)
{
CRM_ASSERT((rsc != NULL) && (why != NULL));
if (rsc->next_role != role) {
pe_rsc_trace(rsc, "Resetting next role for %s from %s to %s (%s)",
rsc->id, role2text(rsc->next_role), role2text(role), why);
rsc->next_role = role;
}
}
diff --git a/lib/pengine/group.c b/lib/pengine/group.c
index 56d25ea830..0389ef5242 100644
--- a/lib/pengine/group.c
+++ b/lib/pengine/group.c
@@ -1,491 +1,495 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
typedef struct group_variant_data_s {
pe_resource_t *last_child; // Last group member
uint32_t flags; // Group of enum pe__group_flags
} group_variant_data_t;
/*!
* \internal
* \brief Get a group's last member
*
* \param[in] group Group resource to check
*
* \return Last member of \p group if any, otherwise NULL
*/
pe_resource_t *
pe__last_group_member(const pe_resource_t *group)
{
if (group != NULL) {
CRM_CHECK((group->variant == pe_group)
&& (group->variant_opaque != NULL), return NULL);
return ((group_variant_data_t *) group->variant_opaque)->last_child;
}
return NULL;
}
/*!
* \internal
* \brief Check whether a group flag is set
*
* \param[in] group Group resource to check
* \param[in] flags Flag or flags to check
*
* \return true if all \p flags are set for \p group, otherwise false
*/
bool
pe__group_flag_is_set(const pe_resource_t *group, uint32_t flags)
{
group_variant_data_t *group_data = NULL;
CRM_CHECK((group != NULL) && (group->variant == pe_group)
&& (group->variant_opaque != NULL), return false);
group_data = (group_variant_data_t *) group->variant_opaque;
return pcmk_all_flags_set(group_data->flags, flags);
}
/*!
* \internal
* \brief Set a (deprecated) group flag
*
* \param[in,out] group Group resource to check
* \param[in] option Name of boolean configuration option
* \param[in] flag Flag to set if \p option is true (which is default)
* \param[in] wo_bit "Warn once" flag to use for deprecation warning
*/
static void
set_group_flag(pe_resource_t *group, const char *option, uint32_t flag,
uint32_t wo_bit)
{
const char *value_s = NULL;
int value = 0;
value_s = g_hash_table_lookup(group->meta, option);
// We don't actually need the null check but it speeds up the common case
if ((value_s == NULL) || (crm_str_to_boolean(value_s, &value) < 0)
|| (value != 0)) {
((group_variant_data_t *) group->variant_opaque)->flags |= flag;
} else {
pe_warn_once(wo_bit,
"Support for the '%s' group meta-attribute is deprecated "
"and will be removed in a future release "
"(use a resource set instead)", option);
}
}
static int
inactive_resources(pe_resource_t *rsc)
{
int retval = 0;
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (!child_rsc->fns->active(child_rsc, TRUE)) {
retval++;
}
}
return retval;
}
static void
group_header(pcmk__output_t *out, int *rc, pe_resource_t *rsc, int n_inactive, bool show_inactive)
{
GString *attrs = NULL;
if (n_inactive > 0 && !show_inactive) {
attrs = g_string_sized_new(64);
g_string_append_printf(attrs, "%d member%s inactive", n_inactive,
pcmk__plural_s(n_inactive));
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pcmk__add_separated_word(&attrs, 64, "unmanaged", ", ");
}
if (pe__resource_is_disabled(rsc)) {
pcmk__add_separated_word(&attrs, 64, "disabled", ", ");
}
if (attrs != NULL) {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Resource Group: %s (%s)",
rsc->id, (const char *) attrs->str);
g_string_free(attrs, TRUE);
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Resource Group: %s", rsc->id);
}
}
static bool
skip_child_rsc(pe_resource_t *rsc, pe_resource_t *child, gboolean parent_passes,
GList *only_rsc, uint32_t show_opts)
{
bool star_list = pcmk__list_of_1(only_rsc) &&
pcmk__str_eq("*", g_list_first(only_rsc)->data, pcmk__str_none);
bool child_filtered = child->fns->is_filtered(child, only_rsc, FALSE);
bool child_active = child->fns->active(child, FALSE);
bool show_inactive = pcmk_is_set(show_opts, pcmk_show_inactive_rscs);
/* If the resource is in only_rsc by name (so, ignoring "*") then allow
* it regardless of if it's active or not.
*/
if (!star_list && !child_filtered) {
return false;
} else if (!child_filtered && (child_active || show_inactive)) {
return false;
} else if (parent_passes && (child_active || show_inactive)) {
return false;
}
return true;
}
gboolean
group_unpack(pe_resource_t * rsc, pe_working_set_t * data_set)
{
xmlNode *xml_obj = rsc->xml;
xmlNode *xml_native_rsc = NULL;
group_variant_data_t *group_data = NULL;
const char *clone_id = NULL;
pe_rsc_trace(rsc, "Processing resource %s...", rsc->id);
group_data = calloc(1, sizeof(group_variant_data_t));
group_data->last_child = NULL;
rsc->variant_opaque = group_data;
// @COMPAT These are deprecated since 2.1.5
set_group_flag(rsc, XML_RSC_ATTR_ORDERED, pe__group_ordered,
pe_wo_group_order);
set_group_flag(rsc, "collocated", pe__group_colocated, pe_wo_group_coloc);
clone_id = crm_element_value(rsc->xml, XML_RSC_ATTR_INCARNATION);
for (xml_native_rsc = pcmk__xe_first_child(xml_obj); xml_native_rsc != NULL;
xml_native_rsc = pcmk__xe_next(xml_native_rsc)) {
if (pcmk__str_eq((const char *)xml_native_rsc->name,
XML_CIB_TAG_RESOURCE, pcmk__str_none)) {
pe_resource_t *new_rsc = NULL;
crm_xml_add(xml_native_rsc, XML_RSC_ATTR_INCARNATION, clone_id);
if (pe__unpack_resource(xml_native_rsc, &new_rsc, rsc,
data_set) != pcmk_rc_ok) {
- pe_err("Failed unpacking resource %s", crm_element_value(xml_obj, XML_ATTR_ID));
- if (new_rsc != NULL && new_rsc->fns != NULL) {
- new_rsc->fns->free(new_rsc);
- }
continue;
}
rsc->children = g_list_append(rsc->children, new_rsc);
group_data->last_child = new_rsc;
pe_rsc_trace(rsc, "Added %s member %s", rsc->id, new_rsc->id);
}
}
if (rsc->children == NULL) {
- // Allow empty groups, children can be added later
- pcmk__config_warn("Group %s does not have any children", rsc->id);
+ /* The schema does not allow empty groups, but if validation is
+ * disabled, we allow them (members can be added later).
+ *
+ * @COMPAT At a major release bump, we should consider this a failure so
+ * that group methods can assume children is not NULL, and there
+ * are no strange effects from phantom groups due to their
+ * presence or meta-attributes.
+ */
+ pcmk__config_warn("Group %s will be ignored because it does not have "
+ "any members", rsc->id);
}
return TRUE;
}
gboolean
group_active(pe_resource_t * rsc, gboolean all)
{
gboolean c_all = TRUE;
gboolean c_any = FALSE;
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (child_rsc->fns->active(child_rsc, all)) {
c_any = TRUE;
} else {
c_all = FALSE;
}
}
if (c_any == FALSE) {
return FALSE;
} else if (all && c_all == FALSE) {
return FALSE;
}
return TRUE;
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
group_print_xml(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
GList *gIter = rsc->children;
char *child_text = crm_strdup_printf("%s ", pre_text);
status_print("%sid);
status_print("number_resources=\"%d\" ", g_list_length(rsc->children));
status_print(">\n");
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->fns->print(child_rsc, child_text, options, print_data);
}
status_print("%s\n", pre_text);
free(child_text);
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
void
group_print(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
char *child_text = NULL;
GList *gIter = rsc->children;
if (pre_text == NULL) {
pre_text = " ";
}
if (options & pe_print_xml) {
group_print_xml(rsc, pre_text, options, print_data);
return;
}
child_text = crm_strdup_printf("%s ", pre_text);
status_print("%sResource Group: %s", pre_text ? pre_text : "", rsc->id);
if (options & pe_print_html) {
status_print("\n\n");
} else if ((options & pe_print_log) == 0) {
status_print("\n");
}
if (options & pe_print_brief) {
print_rscs_brief(rsc->children, child_text, options, print_data, TRUE);
} else {
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (options & pe_print_html) {
status_print("- \n");
}
child_rsc->fns->print(child_rsc, child_text, options, print_data);
if (options & pe_print_html) {
status_print("
\n");
}
}
}
if (options & pe_print_html) {
status_print("
\n");
}
free(child_text);
}
PCMK__OUTPUT_ARGS("group", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__group_xml(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
GList *gIter = rsc->children;
char *count = pcmk__itoa(g_list_length(gIter));
int rc = pcmk_rc_no_output;
gboolean parent_passes = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
(strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches));
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
free(count);
return rc;
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (skip_child_rsc(rsc, child_rsc, parent_passes, only_rsc, show_opts)) {
continue;
}
if (rc == pcmk_rc_no_output) {
rc = pe__name_and_nvpairs_xml(out, true, "group", 4
, "id", rsc->id
, "number_resources", count
, "managed", pe__rsc_bool_str(rsc, pe_rsc_managed)
, "disabled", pcmk__btoa(pe__resource_is_disabled(rsc)));
free(count);
CRM_ASSERT(rc == pcmk_rc_ok);
}
out->message(out, crm_map_element_name(child_rsc->xml), show_opts, child_rsc,
only_node, only_rsc);
}
if (rc == pcmk_rc_ok) {
pcmk__output_xml_pop_parent(out);
}
return rc;
}
PCMK__OUTPUT_ARGS("group", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__group_default(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
int rc = pcmk_rc_no_output;
gboolean parent_passes = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
(strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches));
gboolean active = rsc->fns->active(rsc, TRUE);
gboolean partially_active = rsc->fns->active(rsc, FALSE);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
if (pcmk_is_set(show_opts, pcmk_show_brief)) {
GList *rscs = pe__filter_rsc_list(rsc->children, only_rsc);
if (rscs != NULL) {
group_header(out, &rc, rsc, !active && partially_active ? inactive_resources(rsc) : 0,
pcmk_is_set(show_opts, pcmk_show_inactive_rscs));
pe__rscs_brief_output(out, rscs, show_opts | pcmk_show_inactive_rscs);
rc = pcmk_rc_ok;
g_list_free(rscs);
}
} else {
for (GList *gIter = rsc->children; gIter; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (skip_child_rsc(rsc, child_rsc, parent_passes, only_rsc, show_opts)) {
continue;
}
group_header(out, &rc, rsc, !active && partially_active ? inactive_resources(rsc) : 0,
pcmk_is_set(show_opts, pcmk_show_inactive_rscs));
out->message(out, crm_map_element_name(child_rsc->xml), show_opts,
child_rsc, only_node, only_rsc);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
void
group_free(pe_resource_t * rsc)
{
CRM_CHECK(rsc != NULL, return);
pe_rsc_trace(rsc, "Freeing %s", rsc->id);
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
CRM_ASSERT(child_rsc);
pe_rsc_trace(child_rsc, "Freeing child %s", child_rsc->id);
child_rsc->fns->free(child_rsc);
}
pe_rsc_trace(rsc, "Freeing child list");
g_list_free(rsc->children);
common_free(rsc);
}
enum rsc_role_e
group_resource_state(const pe_resource_t * rsc, gboolean current)
{
enum rsc_role_e group_role = RSC_ROLE_UNKNOWN;
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
enum rsc_role_e role = child_rsc->fns->state(child_rsc, current);
if (role > group_role) {
group_role = role;
}
}
pe_rsc_trace(rsc, "%s role: %s", rsc->id, role2text(group_role));
return group_role;
}
gboolean
pe__group_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent)
{
gboolean passes = FALSE;
if (check_parent && pcmk__str_in_list(rsc_printable_id(uber_parent(rsc)), only_rsc, pcmk__str_star_matches)) {
passes = TRUE;
} else if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) {
passes = TRUE;
} else if (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches)) {
passes = TRUE;
} else {
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (!child_rsc->fns->is_filtered(child_rsc, only_rsc, FALSE)) {
passes = TRUE;
break;
}
}
}
return !passes;
}
diff --git a/lib/pengine/pe_actions.c b/lib/pengine/pe_actions.c
index 4a106e3188..a003260bbf 100644
--- a/lib/pengine/pe_actions.c
+++ b/lib/pengine/pe_actions.c
@@ -1,1683 +1,1683 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include "pe_status_private.h"
static void unpack_operation(pe_action_t *action, xmlNode *xml_obj,
pe_resource_t *container,
pe_working_set_t *data_set, guint interval_ms);
static void
add_singleton(pe_working_set_t *data_set, pe_action_t *action)
{
if (data_set->singletons == NULL) {
data_set->singletons = pcmk__strkey_table(NULL, NULL);
}
g_hash_table_insert(data_set->singletons, action->uuid, action);
}
static pe_action_t *
lookup_singleton(pe_working_set_t *data_set, const char *action_uuid)
{
if (data_set->singletons == NULL) {
return NULL;
}
return g_hash_table_lookup(data_set->singletons, action_uuid);
}
/*!
* \internal
* \brief Find an existing action that matches arguments
*
* \param[in] key Action key to match
* \param[in] rsc Resource to match (if any)
* \param[in] node Node to match (if any)
* \param[in] data_set Cluster working set
*
* \return Existing action that matches arguments (or NULL if none)
*/
static pe_action_t *
find_existing_action(const char *key, pe_resource_t *rsc, const pe_node_t *node,
pe_working_set_t *data_set)
{
GList *matches = NULL;
pe_action_t *action = NULL;
/* When rsc is NULL, it would be quicker to check data_set->singletons,
* but checking all data_set->actions takes the node into account.
*/
matches = find_actions(((rsc == NULL)? data_set->actions : rsc->actions),
key, node);
if (matches == NULL) {
return NULL;
}
CRM_LOG_ASSERT(!pcmk__list_of_multiple(matches));
action = matches->data;
g_list_free(matches);
return action;
}
static xmlNode *
find_rsc_op_entry_helper(const pe_resource_t *rsc, const char *key,
gboolean include_disabled)
{
guint interval_ms = 0;
gboolean do_retry = TRUE;
char *local_key = NULL;
const char *name = NULL;
const char *interval_spec = NULL;
char *match_key = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
retry:
for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL;
operation = pcmk__xe_next(operation)) {
if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
bool enabled = false;
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok &&
!enabled) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
match_key = pcmk__op_key(rsc->id, name, interval_ms);
if (pcmk__str_eq(key, match_key, pcmk__str_casei)) {
op = operation;
}
free(match_key);
if (rsc->clone_name) {
match_key = pcmk__op_key(rsc->clone_name, name, interval_ms);
if (pcmk__str_eq(key, match_key, pcmk__str_casei)) {
op = operation;
}
free(match_key);
}
if (op != NULL) {
free(local_key);
return op;
}
}
}
free(local_key);
if (do_retry == FALSE) {
return NULL;
}
do_retry = FALSE;
if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) {
local_key = pcmk__op_key(rsc->id, "migrate", 0);
key = local_key;
goto retry;
} else if (strstr(key, "_notify_")) {
local_key = pcmk__op_key(rsc->id, "notify", 0);
key = local_key;
goto retry;
}
return NULL;
}
xmlNode *
find_rsc_op_entry(const pe_resource_t *rsc, const char *key)
{
return find_rsc_op_entry_helper(rsc, key, FALSE);
}
/*!
* \internal
* \brief Create a new action object
*
* \param[in] key Action key
* \param[in] task Action name
* \param[in] rsc Resource that action is for (if any)
* \param[in] node Node that action is on (if any)
* \param[in] optional Whether action should be considered optional
* \param[in] for_graph Whether action should be recorded in transition graph
* \param[in] data_set Cluster working set
*
* \return Newly allocated action
* \note This function takes ownership of \p key. It is the caller's
* responsibility to free the return value with pe_free_action().
*/
static pe_action_t *
new_action(char *key, const char *task, pe_resource_t *rsc,
const pe_node_t *node, bool optional, bool for_graph,
pe_working_set_t *data_set)
{
pe_action_t *action = calloc(1, sizeof(pe_action_t));
CRM_ASSERT(action != NULL);
action->rsc = rsc;
action->task = strdup(task); CRM_ASSERT(action->task != NULL);
action->uuid = key;
action->extra = pcmk__strkey_table(free, free);
action->meta = pcmk__strkey_table(free, free);
if (node) {
action->node = pe__copy_node(node);
}
if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
// Resource history deletion for a node can be done on the DC
pe__set_action_flags(action, pe_action_dc);
}
pe__set_action_flags(action, pe_action_runnable);
if (optional) {
pe__set_action_flags(action, pe_action_optional);
} else {
pe__clear_action_flags(action, pe_action_optional);
}
if (rsc != NULL) {
guint interval_ms = 0;
action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE);
parse_op_key(key, NULL, NULL, &interval_ms);
unpack_operation(action, action->op_entry, rsc->container, data_set,
interval_ms);
}
if (for_graph) {
pe_rsc_trace(rsc, "Created %s action %d (%s): %s for %s on %s",
(optional? "optional" : "required"),
data_set->action_id, key, task,
((rsc == NULL)? "no resource" : rsc->id),
pe__node_name(node));
action->id = data_set->action_id++;
data_set->actions = g_list_prepend(data_set->actions, action);
if (rsc == NULL) {
add_singleton(data_set, action);
} else {
rsc->actions = g_list_prepend(rsc->actions, action);
}
}
return action;
}
/*!
* \internal
* \brief Evaluate node attribute values for an action
*
* \param[in] action Action to unpack attributes for
* \param[in] data_set Cluster working set
*/
static void
unpack_action_node_attributes(pe_action_t *action, pe_working_set_t *data_set)
{
if (!pcmk_is_set(action->flags, pe_action_have_node_attrs)
&& (action->op_entry != NULL)) {
pe_rule_eval_data_t rule_data = {
.node_hash = action->node->details->attrs,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
pe__set_action_flags(action, pe_action_have_node_attrs);
pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS,
&rule_data, action->extra, NULL,
FALSE, data_set);
}
}
/*!
* \internal
* \brief Update an action's optional flag
*
* \param[in] action Action to update
* \param[in] optional Requested optional status
*/
static void
update_action_optional(pe_action_t *action, gboolean optional)
{
// Force a non-recurring action to be optional if its resource is unmanaged
if ((action->rsc != NULL) && (action->node != NULL)
&& !pcmk_is_set(action->flags, pe_action_pseudo)
&& !pcmk_is_set(action->rsc->flags, pe_rsc_managed)
&& (g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS) == NULL)) {
pe_rsc_debug(action->rsc, "%s on %s is optional (%s is unmanaged)",
action->uuid, pe__node_name(action->node),
action->rsc->id);
pe__set_action_flags(action, pe_action_optional);
// We shouldn't clear runnable here because ... something
// Otherwise require the action if requested
} else if (!optional) {
pe__clear_action_flags(action, pe_action_optional);
}
}
static enum pe_quorum_policy
effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set)
{
enum pe_quorum_policy policy = data_set->no_quorum_policy;
if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)) {
policy = no_quorum_ignore;
} else if (data_set->no_quorum_policy == no_quorum_demote) {
switch (rsc->role) {
case RSC_ROLE_PROMOTED:
case RSC_ROLE_UNPROMOTED:
if (rsc->next_role > RSC_ROLE_UNPROMOTED) {
pe__set_next_role(rsc, RSC_ROLE_UNPROMOTED,
"no-quorum-policy=demote");
}
policy = no_quorum_ignore;
break;
default:
policy = no_quorum_stop;
break;
}
}
return policy;
}
/*!
* \internal
* \brief Update a resource action's runnable flag
*
* \param[in] action Action to update
* \param[in] for_graph Whether action should be recorded in transition graph
* \param[in] data_set Cluster working set
*
* \note This may also schedule fencing if a stop is unrunnable.
*/
static void
update_resource_action_runnable(pe_action_t *action, bool for_graph,
pe_working_set_t *data_set)
{
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
return;
}
if (action->node == NULL) {
pe_rsc_trace(action->rsc, "%s is unrunnable (unallocated)",
action->uuid);
pe__clear_action_flags(action, pe_action_runnable);
} else if (!pcmk_is_set(action->flags, pe_action_dc)
&& !(action->node->details->online)
&& (!pe__is_guest_node(action->node)
|| action->node->details->remote_requires_reset)) {
pe__clear_action_flags(action, pe_action_runnable);
do_crm_log((for_graph? LOG_WARNING: LOG_TRACE),
"%s on %s is unrunnable (node is offline)",
action->uuid, pe__node_name(action->node));
if (pcmk_is_set(action->rsc->flags, pe_rsc_managed)
&& for_graph
&& pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)
&& !(action->node->details->unclean)) {
pe_fence_node(data_set, action->node, "stop is unrunnable", false);
}
} else if (!pcmk_is_set(action->flags, pe_action_dc)
&& action->node->details->pending) {
pe__clear_action_flags(action, pe_action_runnable);
do_crm_log((for_graph? LOG_WARNING: LOG_TRACE),
"Action %s on %s is unrunnable (node is pending)",
action->uuid, pe__node_name(action->node));
} else if (action->needs == rsc_req_nothing) {
pe_action_set_reason(action, NULL, TRUE);
if (pe__is_guest_node(action->node)
&& !pe_can_fence(data_set, action->node)) {
/* An action that requires nothing usually does not require any
* fencing in order to be runnable. However, there is an exception:
* such an action cannot be completed if it is on a guest node whose
* host is unclean and cannot be fenced.
*/
pe_rsc_debug(action->rsc, "%s on %s is unrunnable "
"(node's host cannot be fenced)",
action->uuid, pe__node_name(action->node));
pe__clear_action_flags(action, pe_action_runnable);
} else {
pe_rsc_trace(action->rsc,
"%s on %s does not require fencing or quorum",
action->uuid, pe__node_name(action->node));
pe__set_action_flags(action, pe_action_runnable);
}
} else {
switch (effective_quorum_policy(action->rsc, data_set)) {
case no_quorum_stop:
pe_rsc_debug(action->rsc, "%s on %s is unrunnable (no quorum)",
action->uuid, pe__node_name(action->node));
pe__clear_action_flags(action, pe_action_runnable);
pe_action_set_reason(action, "no quorum", true);
break;
case no_quorum_freeze:
if (!action->rsc->fns->active(action->rsc, TRUE)
|| (action->rsc->next_role > action->rsc->role)) {
pe_rsc_debug(action->rsc,
"%s on %s is unrunnable (no quorum)",
action->uuid, pe__node_name(action->node));
pe__clear_action_flags(action, pe_action_runnable);
pe_action_set_reason(action, "quorum freeze", true);
}
break;
default:
//pe_action_set_reason(action, NULL, TRUE);
pe__set_action_flags(action, pe_action_runnable);
break;
}
}
}
/*!
* \internal
* \brief Update a resource object's flags for a new action on it
*
* \param[in] rsc Resource that action is for (if any)
* \param[in] action New action
*/
static void
update_resource_flags_for_action(pe_resource_t *rsc, pe_action_t *action)
{
/* @COMPAT pe_rsc_starting and pe_rsc_stopping are not actually used
* within Pacemaker, and should be deprecated and eventually removed
*/
if (pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pe_rsc_stopping);
} else if (pcmk__str_eq(action->task, CRMD_ACTION_START, pcmk__str_casei)) {
if (pcmk_is_set(action->flags, pe_action_runnable)) {
pe__set_resource_flags(rsc, pe_rsc_starting);
} else {
pe__clear_resource_flags(rsc, pe_rsc_starting);
}
}
}
static bool
valid_stop_on_fail(const char *value)
{
return !pcmk__strcase_any_of(value, "standby", "demote", "stop", NULL);
}
static const char *
unpack_operation_on_fail(pe_action_t * action)
{
const char *name = NULL;
const char *role = NULL;
const char *on_fail = NULL;
const char *interval_spec = NULL;
const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL);
if (pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)
&& !valid_stop_on_fail(value)) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop "
"action to default value because '%s' is not "
"allowed for stop", action->rsc->id, value);
return NULL;
} else if (pcmk__str_eq(action->task, CRMD_ACTION_DEMOTE, pcmk__str_casei) && !value) {
// demote on_fail defaults to monitor value for promoted role if present
xmlNode *operation = NULL;
CRM_CHECK(action->rsc != NULL, return NULL);
for (operation = pcmk__xe_first_child(action->rsc->ops_xml);
(operation != NULL) && (value == NULL);
operation = pcmk__xe_next(operation)) {
bool enabled = false;
if (!pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
continue;
}
name = crm_element_value(operation, "name");
role = crm_element_value(operation, "role");
on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL);
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!on_fail) {
continue;
} else if (pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok && !enabled) {
continue;
} else if (!pcmk__str_eq(name, "monitor", pcmk__str_casei)
|| !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
RSC_ROLE_PROMOTED_LEGACY_S,
NULL)) {
continue;
} else if (crm_parse_interval_spec(interval_spec) == 0) {
continue;
} else if (pcmk__str_eq(on_fail, "demote", pcmk__str_casei)) {
continue;
}
value = on_fail;
}
} else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
value = "ignore";
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
name = crm_element_value(action->op_entry, "name");
role = crm_element_value(action->op_entry, "role");
interval_spec = crm_element_value(action->op_entry,
XML_LRM_ATTR_INTERVAL);
if (!pcmk__str_eq(name, CRMD_ACTION_PROMOTE, pcmk__str_casei)
&& (!pcmk__str_eq(name, CRMD_ACTION_STATUS, pcmk__str_casei)
|| !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
RSC_ROLE_PROMOTED_LEGACY_S, NULL)
|| (crm_parse_interval_spec(interval_spec) == 0))) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s "
"action to default value because 'demote' is not "
"allowed for it", action->rsc->id, name);
return NULL;
}
}
return value;
}
static int
unpack_timeout(const char *value)
{
int timeout_ms = crm_get_msec(value);
if (timeout_ms < 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
return timeout_ms;
}
// true if value contains valid, non-NULL interval origin for recurring op
static bool
unpack_interval_origin(const char *value, xmlNode *xml_obj, guint interval_ms,
crm_time_t *now, long long *start_delay)
{
long long result = 0;
guint interval_sec = interval_ms / 1000;
crm_time_t *origin = NULL;
// Ignore unspecified values and non-recurring operations
if ((value == NULL) || (interval_ms == 0) || (now == NULL)) {
return false;
}
// Parse interval origin from text
origin = crm_time_new(value);
if (origin == NULL) {
pcmk__config_err("Ignoring '" XML_OP_ATTR_ORIGIN "' for operation "
"'%s' because '%s' is not valid",
(ID(xml_obj)? ID(xml_obj) : "(missing ID)"), value);
return false;
}
// Get seconds since origin (negative if origin is in the future)
result = crm_time_get_seconds(now) - crm_time_get_seconds(origin);
crm_time_free(origin);
// Calculate seconds from closest interval to now
result = result % interval_sec;
// Calculate seconds remaining until next interval
result = ((result <= 0)? 0 : interval_sec) - result;
crm_info("Calculated a start delay of %llds for operation '%s'",
result,
(ID(xml_obj)? ID(xml_obj) : "(unspecified)"));
if (start_delay != NULL) {
*start_delay = result * 1000; // milliseconds
}
return true;
}
static int
unpack_start_delay(const char *value, GHashTable *meta)
{
int start_delay = 0;
if (value != NULL) {
start_delay = crm_get_msec(value);
if (start_delay < 0) {
start_delay = 0;
}
if (meta) {
g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY),
pcmk__itoa(start_delay));
}
}
return start_delay;
}
static xmlNode *
find_min_interval_mon(pe_resource_t * rsc, gboolean include_disabled)
{
guint interval_ms = 0;
guint min_interval_ms = G_MAXUINT;
const char *name = NULL;
const char *interval_spec = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
for (operation = pcmk__xe_first_child(rsc->ops_xml);
operation != NULL;
operation = pcmk__xe_next(operation)) {
if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
bool enabled = false;
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok &&
!enabled) {
continue;
}
if (!pcmk__str_eq(name, RSC_STATUS, pcmk__str_casei)) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms && (interval_ms < min_interval_ms)) {
min_interval_ms = interval_ms;
op = operation;
}
}
}
return op;
}
/*!
* \brief Unpack operation XML into an action structure
*
* Unpack an operation's meta-attributes (normalizing the interval, timeout,
* and start delay values as integer milliseconds), requirements, and
* failure policy.
*
* \param[in,out] action Action to unpack into
* \param[in] xml_obj Operation XML (or NULL if all defaults)
* \param[in] container Resource that contains affected resource, if any
* \param[in] data_set Cluster state
* \param[in] interval_ms How frequently to perform the operation
*/
static void
unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container,
pe_working_set_t * data_set, guint interval_ms)
{
int timeout_ms = 0;
const char *value = NULL;
bool is_probe = false;
pe_rsc_eval_data_t rsc_rule_data = {
.standard = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_CLASS),
.provider = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_PROVIDER),
.agent = crm_element_value(action->rsc->xml, XML_EXPR_ATTR_TYPE)
};
pe_op_eval_data_t op_rule_data = {
.op_name = action->task,
.interval = interval_ms
};
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = &rsc_rule_data,
.op_data = &op_rule_data
};
CRM_CHECK(action && action->rsc, return);
is_probe = pcmk_is_probe(action->task, interval_ms);
// Cluster-wide
pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data,
action->meta, NULL, FALSE, data_set);
// Determine probe default timeout differently
if (is_probe) {
xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE);
if (min_interval_mon) {
value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT);
if (value) {
crm_trace("\t%s: Setting default timeout to minimum-interval "
"monitor's timeout '%s'", action->uuid, value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT),
strdup(value));
}
}
}
if (xml_obj) {
xmlAttrPtr xIter = NULL;
// take precedence over defaults
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, &rule_data,
action->meta, NULL, TRUE, data_set);
/* Anything set as an XML property has highest precedence.
* This ensures we use the name and interval from the tag.
*/
for (xIter = xml_obj->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_obj, prop_name);
g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value));
}
}
g_hash_table_remove(action->meta, "id");
// Normalize interval to milliseconds
if (interval_ms > 0) {
g_hash_table_replace(action->meta, strdup(XML_LRM_ATTR_INTERVAL),
crm_strdup_printf("%u", interval_ms));
} else {
g_hash_table_remove(action->meta, XML_LRM_ATTR_INTERVAL);
}
/*
* Timeout order of precedence:
* 1. pcmk_monitor_timeout (if rsc has pcmk_ra_cap_fence_params
* and task is start or a probe; pcmk_monitor_timeout works
* by default for a recurring monitor)
* 2. explicit op timeout on the primitive
* 3. default op timeout
* a. if probe, then min-interval monitor's timeout
* b. else, in XML_CIB_TAG_OPCONFIG
* 4. CRM_DEFAULT_OP_TIMEOUT_S
*
* #1 overrides general rule of XML property having highest
* precedence.
*/
if (pcmk_is_set(pcmk_get_ra_caps(rsc_rule_data.standard),
pcmk_ra_cap_fence_params)
&& (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)
|| is_probe)) {
GHashTable *params = pe_rsc_params(action->rsc, action->node, data_set);
value = g_hash_table_lookup(params, "pcmk_monitor_timeout");
if (value) {
crm_trace("\t%s: Setting timeout to pcmk_monitor_timeout '%s', "
"overriding default", action->uuid, value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT),
strdup(value));
}
}
// Normalize timeout to positive milliseconds
value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
timeout_ms = unpack_timeout(value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT),
pcmk__itoa(timeout_ms));
if (!pcmk__strcase_any_of(action->task, RSC_START, RSC_PROMOTE, NULL)) {
action->needs = rsc_req_nothing;
value = "nothing (not start or promote)";
} else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_fencing)) {
action->needs = rsc_req_stonith;
value = "fencing";
} else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_quorum)) {
action->needs = rsc_req_quorum;
value = "quorum";
} else {
action->needs = rsc_req_nothing;
value = "nothing";
}
pe_rsc_trace(action->rsc, "%s requires %s", action->uuid, value);
value = unpack_operation_on_fail(action);
if (value == NULL) {
} else if (pcmk__str_eq(value, "block", pcmk__str_casei)) {
action->on_fail = action_fail_block;
g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block"));
value = "block"; // The above could destroy the original string
} else if (pcmk__str_eq(value, "fence", pcmk__str_casei)) {
action->on_fail = action_fail_fence;
value = "node fencing";
if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for "
"operation '%s' to 'stop' because 'fence' is not "
"valid when fencing is disabled", action->uuid);
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
}
} else if (pcmk__str_eq(value, "standby", pcmk__str_casei)) {
action->on_fail = action_fail_standby;
value = "node standby";
} else if (pcmk__strcase_any_of(value, "ignore", PCMK__VALUE_NOTHING,
NULL)) {
action->on_fail = action_fail_ignore;
value = "ignore";
} else if (pcmk__str_eq(value, "migrate", pcmk__str_casei)) {
action->on_fail = action_fail_migrate;
value = "force migration";
} else if (pcmk__str_eq(value, "stop", pcmk__str_casei)) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
} else if (pcmk__str_eq(value, "restart", pcmk__str_casei)) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate)";
} else if (pcmk__str_eq(value, "restart-container", pcmk__str_casei)) {
if (container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate)";
} else {
value = NULL;
}
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
action->on_fail = action_fail_demote;
value = "demote instance";
} else {
pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value);
value = NULL;
}
/* defaults */
if (value == NULL && container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate) (default)";
/* For remote nodes, ensure that any failure that results in dropping an
* active connection to the node results in fencing of the node.
*
* There are only two action failures that don't result in fencing.
* 1. probes - probe failures are expected.
* 2. start - a start failure indicates that an active connection does not already
* exist. The user can set op on-fail=fence if they really want to fence start
* failures. */
} else if (((value == NULL) || !pcmk_is_set(action->rsc->flags, pe_rsc_managed))
&& pe__resource_is_remote_conn(action->rsc, data_set)
&& !(pcmk__str_eq(action->task, CRMD_ACTION_STATUS, pcmk__str_casei)
&& (interval_ms == 0))
&& !pcmk__str_eq(action->task, CRMD_ACTION_START, pcmk__str_casei)) {
if (!pcmk_is_set(action->rsc->flags, pe_rsc_managed)) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop unmanaged remote node (enforcing default)";
} else {
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
value = "fence remote node (default)";
} else {
value = "recover remote node connection (default)";
}
if (action->rsc->remote_reconnect_ms) {
action->fail_role = RSC_ROLE_STOPPED;
}
action->on_fail = action_fail_reset_remote;
}
} else if (value == NULL && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) {
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
action->on_fail = action_fail_fence;
value = "resource fence (default)";
} else {
action->on_fail = action_fail_block;
value = "resource block (default)";
}
} else if (value == NULL) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate) (default)";
}
pe_rsc_trace(action->rsc, "%s failure handling: %s",
action->uuid, value);
value = NULL;
if (xml_obj != NULL) {
value = g_hash_table_lookup(action->meta, "role_after_failure");
if (value) {
pe_warn_once(pe_wo_role_after,
"Support for role_after_failure is deprecated and will be removed in a future release");
}
}
if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) {
action->fail_role = text2role(value);
}
/* defaults */
if (action->fail_role == RSC_ROLE_UNKNOWN) {
if (pcmk__str_eq(action->task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) {
action->fail_role = RSC_ROLE_UNPROMOTED;
} else {
action->fail_role = RSC_ROLE_STARTED;
}
}
pe_rsc_trace(action->rsc, "%s failure results in: %s",
action->uuid, role2text(action->fail_role));
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY);
if (value) {
unpack_start_delay(value, action->meta);
} else {
long long start_delay = 0;
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN);
if (unpack_interval_origin(value, xml_obj, interval_ms, data_set->now,
&start_delay)) {
g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY),
crm_strdup_printf("%lld", start_delay));
}
}
}
/*!
* \brief Create or update an action object
*
* \param[in] rsc Resource that action is for (if any)
* \param[in] key Action key (must be non-NULL)
* \param[in] task Action name (must be non-NULL)
* \param[in] on_node Node that action is on (if any)
* \param[in] optional Whether action should be considered optional
* \param[in] save_action Whether action should be recorded in transition graph
* \param[in] data_set Cluster working set
*
* \return Action object corresponding to arguments
* \note This function takes ownership of (and might free) \p key. If
* \p save_action is true, \p data_set will own the returned action,
* otherwise it is the caller's responsibility to free the return value
* with pe_free_action().
*/
pe_action_t *
custom_action(pe_resource_t *rsc, char *key, const char *task,
const pe_node_t *on_node, gboolean optional, gboolean save_action,
pe_working_set_t *data_set)
{
pe_action_t *action = NULL;
CRM_ASSERT((key != NULL) && (task != NULL) && (data_set != NULL));
if (save_action) {
action = find_existing_action(key, rsc, on_node, data_set);
}
if (action == NULL) {
action = new_action(key, task, rsc, on_node, optional, save_action,
data_set);
} else {
free(key);
}
update_action_optional(action, optional);
if (rsc != NULL) {
if (action->node != NULL) {
unpack_action_node_attributes(action, data_set);
}
update_resource_action_runnable(action, save_action, data_set);
if (save_action) {
update_resource_flags_for_action(rsc, action);
}
}
return action;
}
pe_action_t *
get_pseudo_op(const char *name, pe_working_set_t * data_set)
{
pe_action_t *op = lookup_singleton(data_set, name);
if (op == NULL) {
op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
}
return op;
}
static GList *
find_unfencing_devices(GList *candidates, GList *matches)
{
for (GList *gIter = candidates; gIter != NULL; gIter = gIter->next) {
pe_resource_t *candidate = gIter->data;
if (candidate->children != NULL) {
matches = find_unfencing_devices(candidate->children, matches);
} else if (!pcmk_is_set(candidate->flags, pe_rsc_fence_device)) {
continue;
} else if (pcmk_is_set(candidate->flags, pe_rsc_needs_unfencing)) {
matches = g_list_prepend(matches, candidate);
} else if (pcmk__str_eq(g_hash_table_lookup(candidate->meta,
PCMK_STONITH_PROVIDES),
PCMK__VALUE_UNFENCING,
pcmk__str_casei)) {
matches = g_list_prepend(matches, candidate);
}
}
return matches;
}
static int
node_priority_fencing_delay(pe_node_t * node, pe_working_set_t * data_set)
{
int member_count = 0;
int online_count = 0;
int top_priority = 0;
int lowest_priority = 0;
GList *gIter = NULL;
// `priority-fencing-delay` is disabled
if (data_set->priority_fencing_delay <= 0) {
return 0;
}
/* No need to request a delay if the fencing target is not a normal cluster
* member, for example if it's a remote node or a guest node. */
if (node->details->type != node_member) {
return 0;
}
// No need to request a delay if the fencing target is in our partition
if (node->details->online) {
return 0;
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
pe_node_t *n = gIter->data;
if (n->details->type != node_member) {
continue;
}
member_count ++;
if (n->details->online) {
online_count++;
}
if (member_count == 1
|| n->details->priority > top_priority) {
top_priority = n->details->priority;
}
if (member_count == 1
|| n->details->priority < lowest_priority) {
lowest_priority = n->details->priority;
}
}
// No need to delay if we have more than half of the cluster members
if (online_count > member_count / 2) {
return 0;
}
/* All the nodes have equal priority.
* Any configured corresponding `pcmk_delay_base/max` will be applied. */
if (lowest_priority == top_priority) {
return 0;
}
if (node->details->priority < top_priority) {
return 0;
}
return data_set->priority_fencing_delay;
}
pe_action_t *
pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason,
bool priority_delay, pe_working_set_t * data_set)
{
char *op_key = NULL;
pe_action_t *stonith_op = NULL;
if(op == NULL) {
op = data_set->stonith_action;
}
op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op);
stonith_op = lookup_singleton(data_set, op_key);
if(stonith_op == NULL) {
stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id);
add_hash_param(stonith_op->meta, "stonith_action", op);
if (pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) {
/* Extra work to detect device changes
*/
GString *digests_all = g_string_sized_new(1024);
GString *digests_secure = g_string_sized_new(1024);
GList *matches = find_unfencing_devices(data_set->resources, NULL);
char *key = NULL;
char *value = NULL;
for (GList *gIter = matches; gIter != NULL; gIter = gIter->next) {
pe_resource_t *match = gIter->data;
const char *agent = g_hash_table_lookup(match->meta,
XML_ATTR_TYPE);
op_digest_cache_t *data = NULL;
data = pe__compare_fencing_digest(match, agent, node, data_set);
if(data->rc == RSC_DIGEST_ALL) {
optional = FALSE;
crm_notice("Unfencing Pacemaker Remote node %s "
"because the definition of %s changed",
pe__node_name(node), match->id);
if (!pcmk__is_daemon && data_set->priv != NULL) {
pcmk__output_t *out = data_set->priv;
out->info(out,
"notice: Unfencing Pacemaker Remote node %s "
"because the definition of %s changed",
pe__node_name(node), match->id);
}
}
pcmk__g_strcat(digests_all,
match->id, ":", agent, ":",
data->digest_all_calc, ",", NULL);
pcmk__g_strcat(digests_secure,
match->id, ":", agent, ":",
data->digest_secure_calc, ",", NULL);
}
key = strdup(XML_OP_ATTR_DIGESTS_ALL);
value = strdup((const char *) digests_all->str);
CRM_ASSERT((key != NULL) && (value != NULL));
g_hash_table_insert(stonith_op->meta, key, value);
g_string_free(digests_all, TRUE);
key = strdup(XML_OP_ATTR_DIGESTS_SECURE);
value = strdup((const char *) digests_secure->str);
CRM_ASSERT((key != NULL) && (value != NULL));
g_hash_table_insert(stonith_op->meta, key, value);
g_string_free(digests_secure, TRUE);
}
} else {
free(op_key);
}
if (data_set->priority_fencing_delay > 0
/* It's a suitable case where `priority-fencing-delay` applies.
* At least add `priority-fencing-delay` field as an indicator. */
&& (priority_delay
/* The priority delay needs to be recalculated if this function has
* been called by schedule_fencing_and_shutdowns() after node
* priority has already been calculated by native_add_running().
*/
|| g_hash_table_lookup(stonith_op->meta,
XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY) != NULL)) {
/* Add `priority-fencing-delay` to the fencing op even if it's 0 for
* the targeting node. So that it takes precedence over any possible
* `pcmk_delay_base/max`.
*/
char *delay_s = pcmk__itoa(node_priority_fencing_delay(node, data_set));
g_hash_table_insert(stonith_op->meta,
strdup(XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY),
delay_s);
}
if(optional == FALSE && pe_can_fence(data_set, node)) {
pe__clear_action_flags(stonith_op, pe_action_optional);
pe_action_set_reason(stonith_op, reason, false);
} else if(reason && stonith_op->reason == NULL) {
stonith_op->reason = strdup(reason);
}
return stonith_op;
}
void
pe_free_action(pe_action_t * action)
{
if (action == NULL) {
return;
}
g_list_free_full(action->actions_before, free); /* pe_action_wrapper_t* */
g_list_free_full(action->actions_after, free); /* pe_action_wrapper_t* */
if (action->extra) {
g_hash_table_destroy(action->extra);
}
if (action->meta) {
g_hash_table_destroy(action->meta);
}
free(action->cancel_task);
free(action->reason);
free(action->task);
free(action->uuid);
free(action->node);
free(action);
}
int
pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set)
{
xmlNode *child = NULL;
GHashTable *action_meta = NULL;
const char *timeout_spec = NULL;
int timeout_ms = 0;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP);
child != NULL; child = crm_next_same_xml(child)) {
if (pcmk__str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME),
pcmk__str_casei)) {
timeout_spec = crm_element_value(child, XML_ATTR_TIMEOUT);
break;
}
}
if (timeout_spec == NULL && data_set->op_defaults) {
action_meta = pcmk__strkey_table(free, free);
pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS,
&rule_data, action_meta, NULL, FALSE, data_set);
timeout_spec = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT);
}
- // @TODO check meta-attributes (including versioned meta-attributes)
+ // @TODO check meta-attributes
// @TODO maybe use min-interval monitor timeout as default for monitors
timeout_ms = crm_get_msec(timeout_spec);
if (timeout_ms < 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
if (action_meta != NULL) {
g_hash_table_destroy(action_meta);
}
return timeout_ms;
}
enum action_tasks
get_complex_task(pe_resource_t * rsc, const char *name, gboolean allow_non_atomic)
{
enum action_tasks task = text2task(name);
if (rsc == NULL) {
return task;
} else if (allow_non_atomic == FALSE || rsc->variant == pe_native) {
switch (task) {
case stopped_rsc:
case started_rsc:
case action_demoted:
case action_promoted:
crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id);
return task - 1;
default:
break;
}
}
return task;
}
/*!
* \internal
* \brief Find first matching action in a list
*
* \param[in] input List of actions to search
* \param[in] uuid If not NULL, action must have this UUID
* \param[in] task If not NULL, action must have this action name
* \param[in] on_node If not NULL, action must be on this node
*
* \return First action in list that matches criteria, or NULL if none
*/
pe_action_t *
find_first_action(const GList *input, const char *uuid, const char *task,
const pe_node_t *on_node)
{
CRM_CHECK(uuid || task, return NULL);
for (const GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (uuid != NULL && !pcmk__str_eq(uuid, action->uuid, pcmk__str_casei)) {
continue;
} else if (task != NULL && !pcmk__str_eq(task, action->task, pcmk__str_casei)) {
continue;
} else if (on_node == NULL) {
return action;
} else if (action->node == NULL) {
continue;
} else if (on_node->details == action->node->details) {
return action;
}
}
return NULL;
}
GList *
find_actions(GList *input, const char *key, const pe_node_t *on_node)
{
GList *gIter = input;
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) {
continue;
} else if (on_node == NULL) {
crm_trace("Action %s matches (ignoring node)", key);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
crm_trace("Action %s matches (unallocated, assigning to %s)",
key, pe__node_name(on_node));
action->node = pe__copy_node(on_node);
result = g_list_prepend(result, action);
} else if (on_node->details == action->node->details) {
crm_trace("Action %s on %s matches", key, pe__node_name(on_node));
result = g_list_prepend(result, action);
}
}
return result;
}
GList *
find_actions_exact(GList *input, const char *key, const pe_node_t *on_node)
{
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
if (on_node == NULL) {
return NULL;
}
for (GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if ((action->node != NULL)
&& pcmk__str_eq(key, action->uuid, pcmk__str_casei)
&& pcmk__str_eq(on_node->details->id, action->node->details->id,
pcmk__str_casei)) {
crm_trace("Action %s on %s matches", key, pe__node_name(on_node));
result = g_list_prepend(result, action);
}
}
return result;
}
/*!
* \brief Find all actions of given type for a resource
*
* \param[in] rsc Resource to search
* \param[in] node Find only actions scheduled on this node
* \param[in] task Action name to search for
* \param[in] require_node If TRUE, NULL node or action node will not match
*
* \return List of actions found (or NULL if none)
* \note If node is not NULL and require_node is FALSE, matching actions
* without a node will be assigned to node.
*/
GList *
pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node,
const char *task, bool require_node)
{
GList *result = NULL;
char *key = pcmk__op_key(rsc->id, task, 0);
if (require_node) {
result = find_actions_exact(rsc->actions, key, node);
} else {
result = find_actions(rsc->actions, key, node);
}
free(key);
return result;
}
/*!
* \internal
* \brief Create an action reason string based on the action itself
*
* \param[in] action Action to create reason string for
* \param[in] flag Action flag that was cleared
*
* \return Newly allocated string suitable for use as action reason
* \note It is the caller's responsibility to free() the result.
*/
char *
pe__action2reason(pe_action_t *action, enum pe_action_flags flag)
{
const char *change = NULL;
switch (flag) {
case pe_action_runnable:
case pe_action_migrate_runnable:
change = "unrunnable";
break;
case pe_action_optional:
change = "required";
break;
default:
// Bug: caller passed unsupported flag
CRM_CHECK(change != NULL, change = "");
break;
}
return crm_strdup_printf("%s%s%s %s", change,
(action->rsc == NULL)? "" : " ",
(action->rsc == NULL)? "" : action->rsc->id,
action->task);
}
void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite)
{
if (action->reason != NULL && overwrite) {
pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'",
action->uuid, action->reason, pcmk__s(reason, "(none)"));
} else if (action->reason == NULL) {
pe_rsc_trace(action->rsc, "Set %s reason to '%s'",
action->uuid, pcmk__s(reason, "(none)"));
} else {
// crm_assert(action->reason != NULL && !overwrite);
return;
}
pcmk__str_update(&action->reason, reason);
}
/*!
* \internal
* \brief Create an action to clear a resource's history from CIB
*
* \param[in] rsc Resource to clear
* \param[in] node Node to clear history on
*
* \return New action to clear resource history
*/
pe_action_t *
pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set)
{
char *key = NULL;
CRM_ASSERT(rsc && node);
key = pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0);
return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE,
data_set);
}
#define sort_return(an_int, why) do { \
free(a_uuid); \
free(b_uuid); \
crm_trace("%s (%d) %c %s (%d) : %s", \
a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \
b_xml_id, b_call_id, why); \
return an_int; \
} while(0)
int
pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b,
bool same_node_default)
{
int a_call_id = -1;
int b_call_id = -1;
char *a_uuid = NULL;
char *b_uuid = NULL;
const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID);
const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID);
const char *a_node = crm_element_value(xml_a, XML_LRM_ATTR_TARGET);
const char *b_node = crm_element_value(xml_b, XML_LRM_ATTR_TARGET);
bool same_node = true;
/* @COMPAT The on_node attribute was added to last_failure as of 1.1.13 (via
* 8b3ca1c) and the other entries as of 1.1.12 (via 0b07b5c).
*
* In case that any of the lrm_rsc_op entries doesn't have on_node
* attribute, we need to explicitly tell whether the two operations are on
* the same node.
*/
if (a_node == NULL || b_node == NULL) {
same_node = same_node_default;
} else {
same_node = pcmk__str_eq(a_node, b_node, pcmk__str_casei);
}
if (same_node && pcmk__str_eq(a_xml_id, b_xml_id, pcmk__str_none)) {
/* We have duplicate lrm_rsc_op entries in the status
* section which is unlikely to be a good thing
* - we can handle it easily enough, but we need to get
* to the bottom of why it's happening.
*/
pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id);
sort_return(0, "duplicate");
}
crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id);
crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id);
if (a_call_id == -1 && b_call_id == -1) {
/* both are pending ops so it doesn't matter since
* stops are never pending
*/
sort_return(0, "pending");
} else if (same_node && a_call_id >= 0 && a_call_id < b_call_id) {
sort_return(-1, "call id");
} else if (same_node && b_call_id >= 0 && a_call_id > b_call_id) {
sort_return(1, "call id");
} else if (a_call_id >= 0 && b_call_id >= 0
&& (!same_node || a_call_id == b_call_id)) {
/*
* The op and last_failed_op are the same
* Order on last-rc-change
*/
time_t last_a = -1;
time_t last_b = -1;
crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
crm_trace("rc-change: %lld vs %lld",
(long long) last_a, (long long) last_b);
if (last_a >= 0 && last_a < last_b) {
sort_return(-1, "rc-change");
} else if (last_b >= 0 && last_a > last_b) {
sort_return(1, "rc-change");
}
sort_return(0, "rc-change");
} else {
/* One of the inputs is a pending operation
* Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other
*/
int a_id = -1;
int b_id = -1;
const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC);
const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC);
CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic"));
if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic a");
}
if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic b");
}
/* try to determine the relative age of the operation...
* some pending operations (e.g. a start) may have been superseded
* by a subsequent stop
*
* [a|b]_id == -1 means it's a shutdown operation and _always_ comes last
*/
if (!pcmk__str_eq(a_uuid, b_uuid, pcmk__str_casei) || a_id == b_id) {
/*
* some of the logic in here may be redundant...
*
* if the UUID from the TE doesn't match then one better
* be a pending operation.
* pending operations don't survive between elections and joins
* because we query the LRM directly
*/
if (b_call_id == -1) {
sort_return(-1, "transition + call");
} else if (a_call_id == -1) {
sort_return(1, "transition + call");
}
} else if ((a_id >= 0 && a_id < b_id) || b_id == -1) {
sort_return(-1, "transition");
} else if ((b_id >= 0 && a_id > b_id) || a_id == -1) {
sort_return(1, "transition");
}
}
/* we should never end up here */
CRM_CHECK(FALSE, sort_return(0, "default"));
}
gint
sort_op_by_callid(gconstpointer a, gconstpointer b)
{
const xmlNode *xml_a = a;
const xmlNode *xml_b = b;
return pe__is_newer_op(xml_a, xml_b, true);
}
/*!
* \internal
* \brief Create a new pseudo-action for a resource
*
* \param[in] rsc Resource to create action for
* \param[in] task Action name
* \param[in] optional Whether action should be considered optional
* \param[in] runnable Whethe action should be considered runnable
*
* \return New action object corresponding to arguments
*/
pe_action_t *
pe__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task, bool optional,
bool runnable)
{
pe_action_t *action = NULL;
CRM_ASSERT((rsc != NULL) && (task != NULL));
action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL,
optional, TRUE, rsc->cluster);
pe__set_action_flags(action, pe_action_pseudo);
if (runnable) {
pe__set_action_flags(action, pe_action_runnable);
}
return action;
}
/*!
* \internal
* \brief Add the expected result to an action
*
* \param[in] action Action to add expected result to
* \param[in] expected_result Expected result to add
*
* \note This is more efficient than calling add_hash_param().
*/
void
pe__add_action_expected_result(pe_action_t *action, int expected_result)
{
char *name = NULL;
CRM_ASSERT((action != NULL) && (action->meta != NULL));
name = strdup(XML_ATTR_TE_TARGET_RC);
CRM_ASSERT (name != NULL);
g_hash_table_insert(action->meta, name, pcmk__itoa(expected_result));
}
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index a45cda2831..5fcba3baf7 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,4382 +1,4377 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
CRM_TRACE_INIT_DATA(pe_status);
/* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
* use pe__set_working_set_flags()/pe__clear_working_set_flags() so that the
* flag is stringified more readably in log messages.
*/
#define set_config_flag(data_set, option, flag) do { \
const char *scf_value = pe_pref((data_set)->config_hash, (option)); \
if (scf_value != NULL) { \
if (crm_is_true(scf_value)) { \
(data_set)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Working set", \
crm_system_name, (data_set)->flags, \
(flag), #flag); \
} else { \
(data_set)->flags = pcmk__clear_flags_as(__func__, __LINE__,\
LOG_TRACE, "Working set", \
crm_system_name, (data_set)->flags, \
(flag), #flag); \
} \
} \
} while(0)
static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
xmlNode **last_failure,
enum action_fail_response *failed,
pe_working_set_t *data_set);
static void determine_remote_online_status(pe_working_set_t *data_set,
pe_node_t *this_node);
static void add_node_attrs(xmlNode *attrs, pe_node_t *node, bool overwrite,
pe_working_set_t *data_set);
static void determine_online_status(xmlNode *node_state, pe_node_t *this_node,
pe_working_set_t *data_set);
static void unpack_node_lrm(pe_node_t *node, xmlNode *xml,
pe_working_set_t *data_set);
// Bitmask for warnings we only want to print once
uint32_t pe_wo = 0;
static gboolean
is_dangling_guest_node(pe_node_t *node)
{
/* we are looking for a remote-node that was supposed to be mapped to a
* container resource, but all traces of that container have disappeared
* from both the config and the status section. */
if (pe__is_guest_or_remote_node(node) &&
node->details->remote_rsc &&
node->details->remote_rsc->container == NULL &&
pcmk_is_set(node->details->remote_rsc->flags,
pe_rsc_orphan_container_filler)) {
return TRUE;
}
return FALSE;
}
/*!
* \brief Schedule a fence action for a node
*
* \param[in,out] data_set Current working set of cluster
* \param[in,out] node Node to fence
* \param[in] reason Text description of why fencing is needed
* \param[in] priority_delay Whether to consider `priority-fencing-delay`
*/
void
pe_fence_node(pe_working_set_t * data_set, pe_node_t * node,
const char *reason, bool priority_delay)
{
CRM_CHECK(node, return);
/* A guest node is fenced by marking its container as failed */
if (pe__is_guest_node(node)) {
pe_resource_t *rsc = node->details->remote_rsc->container;
if (!pcmk_is_set(rsc->flags, pe_rsc_failed)) {
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
crm_notice("Not fencing guest node %s "
"(otherwise would because %s): "
"its guest resource %s is unmanaged",
pe__node_name(node), reason, rsc->id);
} else {
crm_warn("Guest node %s will be fenced "
"(by recovering its guest resource %s): %s",
pe__node_name(node), rsc->id, reason);
/* We don't mark the node as unclean because that would prevent the
* node from running resources. We want to allow it to run resources
* in this transition if the recovery succeeds.
*/
node->details->remote_requires_reset = TRUE;
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
}
}
} else if (is_dangling_guest_node(node)) {
crm_info("Cleaning up dangling connection for guest node %s: "
"fencing was already done because %s, "
"and guest resource no longer exists",
pe__node_name(node), reason);
pe__set_resource_flags(node->details->remote_rsc,
pe_rsc_failed|pe_rsc_stop);
} else if (pe__is_remote_node(node)) {
pe_resource_t *rsc = node->details->remote_rsc;
if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
crm_notice("Not fencing remote node %s "
"(otherwise would because %s): connection is unmanaged",
pe__node_name(node), reason);
} else if(node->details->remote_requires_reset == FALSE) {
node->details->remote_requires_reset = TRUE;
crm_warn("Remote node %s %s: %s",
pe__node_name(node),
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
}
node->details->unclean = TRUE;
// No need to apply `priority-fencing-delay` for remote nodes
pe_fence_op(node, NULL, TRUE, reason, FALSE, data_set);
} else if (node->details->unclean) {
crm_trace("Cluster node %s %s because %s",
pe__node_name(node),
pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
reason);
} else {
crm_warn("Cluster node %s %s: %s",
pe__node_name(node),
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, priority_delay, data_set);
}
}
// @TODO xpaths can't handle templates, rules, or id-refs
// nvpair with provides or requires set to unfencing
#define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
"[(@" XML_NVPAIR_ATTR_NAME "='" PCMK_STONITH_PROVIDES "'" \
"or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
"and @" XML_NVPAIR_ATTR_VALUE "='" PCMK__VALUE_UNFENCING "']"
// unfencing in rsc_defaults or any resource
#define XPATH_ENABLE_UNFENCING \
"/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
"//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
"|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
"/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
static void
set_if_xpath(uint64_t flag, const char *xpath, pe_working_set_t *data_set)
{
xmlXPathObjectPtr result = NULL;
if (!pcmk_is_set(data_set->flags, flag)) {
result = xpath_search(data_set->input, xpath);
if (result && (numXpathResults(result) > 0)) {
pe__set_working_set_flags(data_set, flag);
}
freeXpathObject(result);
}
}
gboolean
unpack_config(xmlNode * config, pe_working_set_t * data_set)
{
const char *value = NULL;
GHashTable *config_hash = pcmk__strkey_table(free, free);
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
data_set->config_hash = config_hash;
pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash,
CIB_OPTIONS_FIRST, FALSE, data_set);
verify_pe_options(data_set->config_hash);
set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
if (!pcmk_is_set(data_set->flags, pe_flag_startup_probes)) {
crm_info("Startup probes: disabled (dangerous)");
}
value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
if (value && crm_is_true(value)) {
crm_info("Watchdog-based self-fencing will be performed via SBD if "
"fencing is required and stonith-watchdog-timeout is nonzero");
pe__set_working_set_flags(data_set, pe_flag_have_stonith_resource);
}
/* Set certain flags via xpath here, so they can be used before the relevant
* configuration sections are unpacked.
*/
set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
value = pe_pref(data_set->config_hash, "stonith-timeout");
data_set->stonith_timeout = (int) crm_parse_interval_spec(value);
crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
crm_debug("STONITH of failed nodes is %s",
pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)? "enabled" : "disabled");
data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
if (!strcmp(data_set->stonith_action, "poweroff")) {
pe_warn_once(pe_wo_poweroff,
"Support for stonith-action of 'poweroff' is deprecated "
"and will be removed in a future release (use 'off' instead)");
data_set->stonith_action = "off";
}
crm_trace("STONITH will %s nodes", data_set->stonith_action);
set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
crm_debug("Concurrent fencing is %s",
pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)? "enabled" : "disabled");
value = pe_pref(data_set->config_hash,
XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
if (value) {
data_set->priority_fencing_delay = crm_parse_interval_spec(value) / 1000;
crm_trace("Priority fencing delay is %ds", data_set->priority_fencing_delay);
}
set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
crm_debug("Stop all active resources: %s",
pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stop_everything)));
set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
if (pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
}
value = pe_pref(data_set->config_hash, "no-quorum-policy");
if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) {
data_set->no_quorum_policy = no_quorum_ignore;
} else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) {
data_set->no_quorum_policy = no_quorum_freeze;
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
data_set->no_quorum_policy = no_quorum_demote;
} else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) {
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
int do_panic = 0;
crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC,
&do_panic);
if (do_panic || pcmk_is_set(data_set->flags, pe_flag_have_quorum)) {
data_set->no_quorum_policy = no_quorum_suicide;
} else {
crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
pcmk__config_err("Resetting no-quorum-policy to 'stop' because "
"fencing is disabled");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
data_set->no_quorum_policy = no_quorum_stop;
}
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
crm_debug("On loss of quorum: Freeze resources");
break;
case no_quorum_stop:
crm_debug("On loss of quorum: Stop ALL resources");
break;
case no_quorum_demote:
crm_debug("On loss of quorum: "
"Demote promotable resources and stop other resources");
break;
case no_quorum_suicide:
crm_notice("On loss of quorum: Fence all remaining nodes");
break;
case no_quorum_ignore:
crm_notice("On loss of quorum: Ignore");
break;
}
set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
crm_trace("Orphan resources are %s",
pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)? "stopped" : "ignored");
set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
crm_trace("Orphan resource actions are %s",
pcmk_is_set(data_set->flags, pe_flag_stop_action_orphans)? "stopped" : "ignored");
value = pe_pref(data_set->config_hash, "remove-after-stop");
if (value != NULL) {
if (crm_is_true(value)) {
pe__set_working_set_flags(data_set, pe_flag_remove_after_stop);
#ifndef PCMK__COMPAT_2_0
pe_warn_once(pe_wo_remove_after,
"Support for the remove-after-stop cluster property is"
" deprecated and will be removed in a future release");
#endif
} else {
pe__clear_working_set_flags(data_set, pe_flag_remove_after_stop);
}
}
set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
crm_trace("Maintenance mode: %s",
pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)));
set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
crm_trace("Start failures are %s",
pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)? "always fatal" : "handled by failcount");
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
}
if (pcmk_is_set(data_set->flags, pe_flag_startup_fencing)) {
crm_trace("Unseen nodes will be fenced");
} else {
pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
}
pe__unpack_node_health_scores(data_set);
data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
crm_trace("Placement strategy: %s", data_set->placement_strategy);
set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock);
crm_trace("Resources will%s be locked to cleanly shut down nodes",
(pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not"));
if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) {
value = pe_pref(data_set->config_hash,
XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000;
crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock);
}
return TRUE;
}
pe_node_t *
pe_create_node(const char *id, const char *uname, const char *type,
const char *score, pe_working_set_t * data_set)
{
pe_node_t *new_node = NULL;
if (pe_find_node(data_set->nodes, uname) != NULL) {
pcmk__config_warn("More than one node entry has name '%s'", uname);
}
new_node = calloc(1, sizeof(pe_node_t));
if (new_node == NULL) {
return NULL;
}
new_node->weight = char2score(score);
new_node->fixed = FALSE;
new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
if (new_node->details == NULL) {
free(new_node);
return NULL;
}
crm_trace("Creating node for entry %s/%s", uname, id);
new_node->details->id = id;
new_node->details->uname = uname;
new_node->details->online = FALSE;
new_node->details->shutdown = FALSE;
new_node->details->rsc_discovery_enabled = TRUE;
new_node->details->running_rsc = NULL;
new_node->details->data_set = data_set;
if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) {
new_node->details->type = node_member;
} else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) {
new_node->details->type = node_remote;
pe__set_working_set_flags(data_set, pe_flag_have_remote_nodes);
} else {
/* @COMPAT 'ping' is the default for backward compatibility, but it
* should be changed to 'member' at a compatibility break
*/
if (!pcmk__str_eq(type, "ping", pcmk__str_casei)) {
pcmk__config_warn("Node %s has unrecognized type '%s', "
"assuming 'ping'", pcmk__s(uname, "without name"),
type);
}
pe_warn_once(pe_wo_ping_node,
"Support for nodes of type 'ping' (such as %s) is "
"deprecated and will be removed in a future release",
pcmk__s(uname, "unnamed node"));
new_node->details->type = node_ping;
}
new_node->details->attrs = pcmk__strkey_table(free, free);
if (pe__is_guest_or_remote_node(new_node)) {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("remote"));
} else {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("cluster"));
}
new_node->details->utilization = pcmk__strkey_table(free, free);
new_node->details->digest_cache = pcmk__strkey_table(free,
pe__free_digests);
data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node,
pe__cmp_node_name);
return new_node;
}
static const char *
expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
{
xmlNode *attr_set = NULL;
xmlNode *attr = NULL;
const char *container_id = ID(xml_obj);
const char *remote_name = NULL;
const char *remote_server = NULL;
const char *remote_port = NULL;
const char *connect_timeout = "60s";
const char *remote_allow_migrate=NULL;
const char *is_managed = NULL;
for (attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL;
attr_set = pcmk__xe_next(attr_set)) {
if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS,
pcmk__str_casei)) {
continue;
}
for (attr = pcmk__xe_first_child(attr_set); attr != NULL;
attr = pcmk__xe_next(attr)) {
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) {
remote_name = value;
} else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) {
remote_server = value;
} else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) {
remote_port = value;
} else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) {
connect_timeout = value;
} else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) {
remote_allow_migrate=value;
} else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) {
is_managed = value;
}
}
}
if (remote_name == NULL) {
return NULL;
}
if (pe_find_resource(data->resources, remote_name) != NULL) {
return NULL;
}
pe_create_remote_xml(parent, remote_name, container_id,
remote_allow_migrate, is_managed,
connect_timeout, remote_server, remote_port);
return remote_name;
}
static void
handle_startup_fencing(pe_working_set_t *data_set, pe_node_t *new_node)
{
if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
/* Ignore fencing for remote nodes that don't have a connection resource
* associated with them. This happens when remote node entries get left
* in the nodes section after the connection resource is removed.
*/
return;
}
if (pcmk_is_set(data_set->flags, pe_flag_startup_fencing)) {
// All nodes are unclean until we've seen their status entry
new_node->details->unclean = TRUE;
} else {
// Blind faith ...
new_node->details->unclean = FALSE;
}
/* We need to be able to determine if a node's status section
* exists or not separate from whether the node is unclean. */
new_node->details->unseen = TRUE;
}
gboolean
unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
pe_node_t *new_node = NULL;
const char *id = NULL;
const char *uname = NULL;
const char *type = NULL;
const char *score = NULL;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
for (xml_obj = pcmk__xe_first_child(xml_nodes); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) {
new_node = NULL;
id = crm_element_value(xml_obj, XML_ATTR_ID);
uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
type = crm_element_value(xml_obj, XML_ATTR_TYPE);
score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
crm_trace("Processing node %s/%s", uname, id);
if (id == NULL) {
pcmk__config_err("Ignoring <" XML_CIB_TAG_NODE
"> entry in configuration without id");
continue;
}
new_node = pe_create_node(id, uname, type, score, data_set);
if (new_node == NULL) {
return FALSE;
}
handle_startup_fencing(data_set, new_node);
add_node_attrs(xml_obj, new_node, FALSE, data_set);
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data,
new_node->details->utilization, NULL,
FALSE, data_set);
crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
}
}
if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
crm_info("Creating a fake local node");
pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
data_set);
}
return TRUE;
}
static void
setup_container(pe_resource_t * rsc, pe_working_set_t * data_set)
{
const char *container_id = NULL;
if (rsc->children) {
g_list_foreach(rsc->children, (GFunc) setup_container, data_set);
return;
}
container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
pe_resource_t *container = pe_find_resource(data_set->resources, container_id);
if (container) {
rsc->container = container;
pe__set_resource_flags(container, pe_rsc_is_container);
container->fillers = g_list_append(container->fillers, rsc);
pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
} else {
pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
}
}
}
gboolean
unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
/* Create remote nodes and guest nodes from the resource configuration
* before unpacking resources.
*/
for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
const char *new_node_id = NULL;
/* Check for remote nodes, which are defined by ocf:pacemaker:remote
* primitives.
*/
if (xml_contains_remote_node(xml_obj)) {
new_node_id = ID(xml_obj);
/* The "pe_find_node" check is here to make sure we don't iterate over
* an expanded node that has already been added to the node list. */
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found remote node %s defined by resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
}
/* Check for guest nodes, which are defined by special meta-attributes
* of a primitive of any type (for example, VirtualDomain or Xen).
*/
if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) {
/* This will add an ocf:pacemaker:remote primitive to the
* configuration for the guest node's connection, to be unpacked
* later.
*/
new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest node %s in resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
}
/* Check for guest nodes inside a group. Clones are currently not
* supported as guest nodes.
*/
if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) {
xmlNode *xml_obj2 = NULL;
for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL;
xml_obj2 = pcmk__xe_next(xml_obj2)) {
new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest node %s in resource %s inside group %s",
new_node_id, ID(xml_obj2), ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
}
}
}
return TRUE;
}
/* Call this after all the nodes and resources have been
* unpacked, but before the status section is read.
*
* A remote node's online status is reflected by the state
* of the remote node's connection resource. We need to link
* the remote node to this connection resource so we can have
* easy access to the connection resource during the scheduler calculations.
*/
static void
link_rsc2remotenode(pe_working_set_t *data_set, pe_resource_t *new_rsc)
{
pe_node_t *remote_node = NULL;
if (new_rsc->is_remote_node == FALSE) {
return;
}
if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) {
/* remote_nodes and remote_resources are not linked in quick location calculations */
return;
}
remote_node = pe_find_node(data_set->nodes, new_rsc->id);
CRM_CHECK(remote_node != NULL, return);
pe_rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
new_rsc->id, pe__node_name(remote_node));
remote_node->details->remote_rsc = new_rsc;
if (new_rsc->container == NULL) {
/* Handle start-up fencing for remote nodes (as opposed to guest nodes)
* the same as is done for cluster nodes.
*/
handle_startup_fencing(data_set, remote_node);
} else {
/* pe_create_node() marks the new node as "remote" or "cluster"; now
* that we know the node is a guest node, update it correctly.
*/
g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("container"));
}
}
static void
destroy_tag(gpointer data)
{
pe_tag_t *tag = data;
if (tag) {
free(tag->id);
g_list_free_full(tag->refs, free);
free(tag);
}
}
/*!
* \internal
* \brief Parse configuration XML for resource information
*
* \param[in] xml_resources Top of resource configuration XML
* \param[in,out] data_set Where to put resource information
*
* \return TRUE
*
* \note unpack_remote_nodes() MUST be called before this, so that the nodes can
* be used when pe__unpack_resource() calls resource_location()
*/
gboolean
unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
GList *gIter = NULL;
data_set->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
pe_resource_t *new_rsc = NULL;
const char *id = ID(xml_obj);
if (pcmk__str_empty(id)) {
pcmk__config_err("Ignoring <%s> resource without ID",
crm_element_name(xml_obj));
continue;
}
if (pcmk__str_eq((const char *) xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE,
pcmk__str_none)) {
if (g_hash_table_lookup_extended(data_set->template_rsc_sets, id,
NULL, NULL) == FALSE) {
/* Record the template's ID for the knowledge of its existence anyway. */
g_hash_table_insert(data_set->template_rsc_sets, strdup(id), NULL);
}
continue;
}
crm_trace("Unpacking <%s id='%s'>", crm_element_name(xml_obj), id);
- if ((pe__unpack_resource(xml_obj, &new_rsc, NULL,
- data_set) == pcmk_rc_ok)
- && (new_rsc != NULL)) {
-
+ if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
+ data_set) == pcmk_rc_ok) {
data_set->resources = g_list_append(data_set->resources, new_rsc);
pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
} else {
pcmk__config_err("Ignoring <%s> resource '%s' "
"because configuration is invalid",
crm_element_name(xml_obj), id);
- if (new_rsc != NULL && new_rsc->fns != NULL) {
- new_rsc->fns->free(new_rsc);
- }
}
}
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
setup_container(rsc, data_set);
link_rsc2remotenode(data_set, rsc);
}
data_set->resources = g_list_sort(data_set->resources,
pe__cmp_rsc_priority);
if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) {
/* Ignore */
} else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)
&& !pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) {
pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option");
pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
}
return TRUE;
}
gboolean
unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
{
xmlNode *xml_tag = NULL;
data_set->tags = pcmk__strkey_table(free, destroy_tag);
for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL;
xml_tag = pcmk__xe_next(xml_tag)) {
xmlNode *xml_obj_ref = NULL;
const char *tag_id = ID(xml_tag);
if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) {
continue;
}
if (tag_id == NULL) {
pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID,
crm_element_name(xml_tag));
continue;
}
for (xml_obj_ref = pcmk__xe_first_child(xml_tag); xml_obj_ref != NULL;
xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
const char *obj_ref = ID(xml_obj_ref);
if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) {
continue;
}
if (obj_ref == NULL) {
pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID,
crm_element_name(xml_obj_ref), tag_id);
continue;
}
if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
return FALSE;
}
}
}
return TRUE;
}
/* The ticket state section:
* "/cib/status/tickets/ticket_state" */
static gboolean
unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
{
const char *ticket_id = NULL;
const char *granted = NULL;
const char *last_granted = NULL;
const char *standby = NULL;
xmlAttrPtr xIter = NULL;
pe_ticket_t *ticket = NULL;
ticket_id = ID(xml_ticket);
if (pcmk__str_empty(ticket_id)) {
return FALSE;
}
crm_trace("Processing ticket state for %s", ticket_id);
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = ticket_new(ticket_id, data_set);
if (ticket == NULL) {
return FALSE;
}
}
for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_ticket, prop_name);
if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) {
continue;
}
g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
}
granted = g_hash_table_lookup(ticket->state, "granted");
if (granted && crm_is_true(granted)) {
ticket->granted = TRUE;
crm_info("We have ticket '%s'", ticket->id);
} else {
ticket->granted = FALSE;
crm_info("We do not have ticket '%s'", ticket->id);
}
last_granted = g_hash_table_lookup(ticket->state, "last-granted");
if (last_granted) {
long long last_granted_ll;
pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
ticket->last_granted = (time_t) last_granted_ll;
}
standby = g_hash_table_lookup(ticket->state, "standby");
if (standby && crm_is_true(standby)) {
ticket->standby = TRUE;
if (ticket->granted) {
crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
}
} else {
ticket->standby = FALSE;
}
crm_trace("Done with ticket state for %s", ticket_id);
return TRUE;
}
static gboolean
unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
for (xml_obj = pcmk__xe_first_child(xml_tickets); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) {
continue;
}
unpack_ticket_state(xml_obj, data_set);
}
return TRUE;
}
static void
unpack_handle_remote_attrs(pe_node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
{
const char *resource_discovery_enabled = NULL;
xmlNode *attrs = NULL;
pe_resource_t *rsc = NULL;
if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
return;
}
if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
return;
}
crm_trace("Processing Pacemaker Remote node %s", pe__node_name(this_node));
pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_MAINTENANCE),
&(this_node->details->remote_maintenance), 0);
rsc = this_node->details->remote_rsc;
if (this_node->details->remote_requires_reset == FALSE) {
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
}
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
if (pe__shutdown_requested(this_node)) {
crm_info("%s is shutting down", pe__node_name(this_node));
this_node->details->shutdown = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
crm_info("%s is in standby mode", pe__node_name(this_node));
this_node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_managed))) {
crm_info("%s is in maintenance mode", pe__node_name(this_node));
this_node->details->maintenance = TRUE;
}
resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
if (pe__is_remote_node(this_node)
&& !pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
crm_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY
" attribute on Pacemaker Remote node %s"
" because fencing is disabled",
pe__node_name(this_node));
} else {
/* This is either a remote node with fencing enabled, or a guest
* node. We don't care whether fencing is enabled when fencing guest
* nodes, because they are "fenced" by recovering their containing
* resource.
*/
crm_info("%s has resource discovery disabled",
pe__node_name(this_node));
this_node->details->rsc_discovery_enabled = FALSE;
}
}
}
/*!
* \internal
* \brief Unpack a cluster node's transient attributes
*
* \param[in] state CIB node state XML
* \param[in] node Cluster node whose attributes are being unpacked
* \param[in] data_set Cluster working set
*/
static void
unpack_transient_attributes(xmlNode *state, pe_node_t *node,
pe_working_set_t *data_set)
{
const char *discovery = NULL;
xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, node, TRUE, data_set);
if (crm_is_true(pe_node_attribute_raw(node, "standby"))) {
crm_info("%s is in standby mode", pe__node_name(node));
node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(node, "maintenance"))) {
crm_info("%s is in maintenance mode", pe__node_name(node));
node->details->maintenance = TRUE;
}
discovery = pe_node_attribute_raw(node, XML_NODE_ATTR_RSC_DISCOVERY);
if ((discovery != NULL) && !crm_is_true(discovery)) {
crm_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY
" attribute for %s because disabling resource discovery "
"is not allowed for cluster nodes", pe__node_name(node));
}
}
/*!
* \internal
* \brief Unpack a node state entry (first pass)
*
* Unpack one node state entry from status. This unpacks information from the
* node_state element itself and node attributes inside it, but not the
* resource history inside it. Multiple passes through the status are needed to
* fully unpack everything.
*
* \param[in] state CIB node state XML
* \param[in] data_set Cluster working set
*/
static void
unpack_node_state(xmlNode *state, pe_working_set_t *data_set)
{
const char *id = NULL;
const char *uname = NULL;
pe_node_t *this_node = NULL;
id = crm_element_value(state, XML_ATTR_ID);
if (id == NULL) {
crm_warn("Ignoring malformed " XML_CIB_TAG_STATE " entry without "
XML_ATTR_ID);
return;
}
uname = crm_element_value(state, XML_ATTR_UNAME);
if (uname == NULL) {
crm_warn("Ignoring malformed " XML_CIB_TAG_STATE " entry without "
XML_ATTR_UNAME);
return;
}
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (this_node == NULL) {
pcmk__config_warn("Ignoring recorded node state for '%s' because "
"it is no longer in the configuration", uname);
return;
}
if (pe__is_guest_or_remote_node(this_node)) {
/* We can't determine the online status of Pacemaker Remote nodes until
* after all resource history has been unpacked. In this first pass, we
* do need to mark whether the node has been fenced, as this plays a
* role during unpacking cluster node resource state.
*/
pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_FENCED),
&(this_node->details->remote_was_fenced), 0);
return;
}
unpack_transient_attributes(state, this_node, data_set);
/* Provisionally mark this cluster node as clean. We have at least seen it
* in the current cluster's lifetime.
*/
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
crm_trace("Determining online status of cluster node %s (id %s)",
pe__node_name(this_node), id);
determine_online_status(state, this_node, data_set);
if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)
&& this_node->details->online
&& (data_set->no_quorum_policy == no_quorum_suicide)) {
/* Everything else should flow from this automatically
* (at least until the scheduler becomes able to migrate off
* healthy resources)
*/
pe_fence_node(data_set, this_node, "cluster does not have quorum",
FALSE);
}
}
/*!
* \internal
* \brief Unpack nodes' resource history as much as possible
*
* Unpack as many nodes' resource history as possible in one pass through the
* status. We need to process Pacemaker Remote nodes' connections/containers
* before unpacking their history; the connection/container history will be
* in another node's history, so it might take multiple passes to unpack
* everything.
*
* \param[in] status CIB XML status section
* \param[in] fence If true, treat any not-yet-unpacked nodes as unseen
* \param[in] data_set Cluster working set
*
* \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
* or EAGAIN if more unpacking remains to be done)
*/
static int
unpack_node_history(xmlNode *status, bool fence, pe_working_set_t *data_set)
{
int rc = pcmk_rc_ok;
// Loop through all node_state entries in CIB status
for (xmlNode *state = first_named_child(status, XML_CIB_TAG_STATE);
state != NULL; state = crm_next_same_xml(state)) {
const char *id = ID(state);
const char *uname = crm_element_value(state, XML_ATTR_UNAME);
pe_node_t *this_node = NULL;
if ((id == NULL) || (uname == NULL)) {
// Warning already logged in first pass through status section
crm_trace("Not unpacking resource history from malformed "
XML_CIB_TAG_STATE " without id and/or uname");
continue;
}
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (this_node == NULL) {
// Warning already logged in first pass through status section
crm_trace("Not unpacking resource history for node %s because "
"no longer in configuration", id);
continue;
}
if (this_node->details->unpacked) {
crm_trace("Not unpacking resource history for node %s because "
"already unpacked", id);
continue;
}
if (fence) {
// We're processing all remaining nodes
} else if (pe__is_guest_node(this_node)) {
/* We can unpack a guest node's history only after we've unpacked
* other resource history to the point that we know that the node's
* connection and containing resource are both up.
*/
pe_resource_t *rsc = this_node->details->remote_rsc;
if ((rsc == NULL) || (rsc->role != RSC_ROLE_STARTED)
|| (rsc->container->role != RSC_ROLE_STARTED)) {
crm_trace("Not unpacking resource history for guest node %s "
"because container and connection are not known to "
"be up", id);
continue;
}
} else if (pe__is_remote_node(this_node)) {
/* We can unpack a remote node's history only after we've unpacked
* other resource history to the point that we know that the node's
* connection is up, with the exception of when shutdown locks are
* in use.
*/
pe_resource_t *rsc = this_node->details->remote_rsc;
if ((rsc == NULL)
|| (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)
&& (rsc->role != RSC_ROLE_STARTED))) {
crm_trace("Not unpacking resource history for remote node %s "
"because connection is not known to be up", id);
continue;
}
/* If fencing and shutdown locks are disabled and we're not processing
* unseen nodes, then we don't want to unpack offline nodes until online
* nodes have been unpacked. This allows us to number active clone
* instances first.
*/
} else if (!pcmk_any_flags_set(data_set->flags, pe_flag_stonith_enabled
|pe_flag_shutdown_lock)
&& !this_node->details->online) {
crm_trace("Not unpacking resource history for offline "
"cluster node %s", id);
continue;
}
if (pe__is_guest_or_remote_node(this_node)) {
determine_remote_online_status(data_set, this_node);
unpack_handle_remote_attrs(this_node, state, data_set);
}
crm_trace("Unpacking resource history for %snode %s",
(fence? "unseen " : ""), id);
this_node->details->unpacked = TRUE;
unpack_node_lrm(this_node, state, data_set);
rc = EAGAIN; // Other node histories might depend on this one
}
return rc;
}
/* remove nodes that are down, stopping */
/* create positive rsc_to_node constraints between resources and the nodes they are running on */
/* anything else? */
gboolean
unpack_status(xmlNode * status, pe_working_set_t * data_set)
{
xmlNode *state = NULL;
crm_trace("Beginning unpack");
if (data_set->tickets == NULL) {
data_set->tickets = pcmk__strkey_table(free, destroy_ticket);
}
for (state = pcmk__xe_first_child(status); state != NULL;
state = pcmk__xe_next(state)) {
if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) {
unpack_tickets_state((xmlNode *) state, data_set);
} else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
unpack_node_state(state, data_set);
}
}
while (unpack_node_history(status, FALSE, data_set) == EAGAIN) {
crm_trace("Another pass through node resource histories is needed");
}
// Now catch any nodes we didn't see
unpack_node_history(status,
pcmk_is_set(data_set->flags, pe_flag_stonith_enabled),
data_set);
/* Now that we know where resources are, we can schedule stops of containers
* with failed bundle connections
*/
if (data_set->stop_needed != NULL) {
for (GList *item = data_set->stop_needed; item; item = item->next) {
pe_resource_t *container = item->data;
pe_node_t *node = pe__current_node(container);
if (node) {
stop_action(container, node, FALSE);
}
}
g_list_free(data_set->stop_needed);
data_set->stop_needed = NULL;
}
/* Now that we know status of all Pacemaker Remote connections and nodes,
* we can stop connections for node shutdowns, and check the online status
* of remote/guest nodes that didn't have any node history to unpack.
*/
for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
pe_node_t *this_node = gIter->data;
if (!pe__is_guest_or_remote_node(this_node)) {
continue;
}
if (this_node->details->shutdown
&& (this_node->details->remote_rsc != NULL)) {
pe__set_next_role(this_node->details->remote_rsc, RSC_ROLE_STOPPED,
"remote shutdown");
}
if (!this_node->details->unpacked) {
determine_remote_online_status(data_set, this_node);
}
}
return TRUE;
}
static gboolean
determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
pe_node_t * this_node)
{
gboolean online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
if (!crm_is_true(in_cluster)) {
crm_trace("Node is down: in_cluster=%s",
pcmk__s(in_cluster, ""));
} else if (pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei)) {
if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
online = TRUE;
} else {
crm_debug("Node is not ready to run resources: %s", join);
}
} else if (this_node->details->expected_up == FALSE) {
crm_trace("Controller is down: "
"in_cluster=%s is_peer=%s join=%s expected=%s",
pcmk__s(in_cluster, ""), pcmk__s(is_peer, ""),
pcmk__s(join, ""), pcmk__s(exp_state, ""));
} else {
/* mark it unclean */
pe_fence_node(data_set, this_node, "peer is unexpectedly down", FALSE);
crm_info("in_cluster=%s is_peer=%s join=%s expected=%s",
pcmk__s(in_cluster, ""), pcmk__s(is_peer, ""),
pcmk__s(join, ""), pcmk__s(exp_state, ""));
}
return online;
}
static gboolean
determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
pe_node_t * this_node)
{
gboolean online = FALSE;
gboolean do_terminate = FALSE;
bool crmd_online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
const char *terminate = pe_node_attribute_raw(this_node, "terminate");
/*
- XML_NODE_IN_CLUSTER ::= true|false
- XML_NODE_IS_PEER ::= online|offline
- XML_NODE_JOIN_STATE ::= member|down|pending|banned
- XML_NODE_EXPECTED ::= member|down
*/
if (crm_is_true(terminate)) {
do_terminate = TRUE;
} else if (terminate != NULL && strlen(terminate) > 0) {
/* could be a time() value */
char t = terminate[0];
if (t != '0' && isdigit(t)) {
do_terminate = TRUE;
}
}
crm_trace("%s: in_cluster=%s is_peer=%s join=%s expected=%s term=%d",
pe__node_name(this_node), pcmk__s(in_cluster, ""),
pcmk__s(is_peer, ""), pcmk__s(join, ""),
pcmk__s(exp_state, ""), do_terminate);
online = crm_is_true(in_cluster);
crmd_online = pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei);
if (exp_state == NULL) {
exp_state = CRMD_JOINSTATE_DOWN;
}
if (this_node->details->shutdown) {
crm_debug("%s is shutting down", pe__node_name(this_node));
/* Slightly different criteria since we can't shut down a dead peer */
online = crmd_online;
} else if (in_cluster == NULL) {
pe_fence_node(data_set, this_node, "peer has not been seen by the cluster", FALSE);
} else if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_casei)) {
pe_fence_node(data_set, this_node,
"peer failed Pacemaker membership criteria", FALSE);
} else if (do_terminate == FALSE && pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_casei)) {
if (crm_is_true(in_cluster) || crmd_online) {
crm_info("- %s is not ready to run resources",
pe__node_name(this_node));
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
crm_trace("%s is down or still coming up",
pe__node_name(this_node));
}
} else if (do_terminate && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_casei)
&& crm_is_true(in_cluster) == FALSE && !crmd_online) {
crm_info("%s was just shot", pe__node_name(this_node));
online = FALSE;
} else if (crm_is_true(in_cluster) == FALSE) {
// Consider `priority-fencing-delay` for lost nodes
pe_fence_node(data_set, this_node, "peer is no longer part of the cluster", TRUE);
} else if (!crmd_online) {
pe_fence_node(data_set, this_node, "peer process is no longer available", FALSE);
/* Everything is running at this point, now check join state */
} else if (do_terminate) {
pe_fence_node(data_set, this_node, "termination was requested", FALSE);
} else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
crm_info("%s is active", pe__node_name(this_node));
} else if (pcmk__strcase_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) {
crm_info("%s is not ready to run resources", pe__node_name(this_node));
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
pe_fence_node(data_set, this_node, "peer was in an unknown state", FALSE);
crm_warn("%s: in-cluster=%s is-peer=%s join=%s expected=%s term=%d shutdown=%d",
pe__node_name(this_node), pcmk__s(in_cluster, ""),
pcmk__s(is_peer, ""), pcmk__s(join, ""),
pcmk__s(exp_state, ""), do_terminate,
this_node->details->shutdown);
}
return online;
}
static void
determine_remote_online_status(pe_working_set_t * data_set, pe_node_t * this_node)
{
pe_resource_t *rsc = this_node->details->remote_rsc;
pe_resource_t *container = NULL;
pe_node_t *host = NULL;
/* If there is a node state entry for a (former) Pacemaker Remote node
* but no resource creating that node, the node's connection resource will
* be NULL. Consider it an offline remote node in that case.
*/
if (rsc == NULL) {
this_node->details->online = FALSE;
goto remote_online_done;
}
container = rsc->container;
if (container && pcmk__list_of_1(rsc->running_on)) {
host = rsc->running_on->data;
}
/* If the resource is currently started, mark it online. */
if (rsc->role == RSC_ROLE_STARTED) {
crm_trace("%s node %s presumed ONLINE because connection resource is started",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = TRUE;
}
/* consider this node shutting down if transitioning start->stop */
if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
crm_trace("%s node %s shutting down because connection resource is stopping",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->shutdown = TRUE;
}
/* Now check all the failure conditions. */
if(container && pcmk_is_set(container->flags, pe_rsc_failed)) {
crm_trace("Guest node %s UNCLEAN because guest resource failed",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
crm_trace("%s node %s OFFLINE because connection resource failed",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
} else if (rsc->role == RSC_ROLE_STOPPED
|| (container && container->role == RSC_ROLE_STOPPED)) {
crm_trace("%s node %s OFFLINE because its resource is stopped",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = FALSE;
} else if (host && (host->details->online == FALSE)
&& host->details->unclean) {
crm_trace("Guest node %s UNCLEAN because host is unclean",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
}
remote_online_done:
crm_trace("Remote node %s online=%s",
this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
}
static void
determine_online_status(xmlNode * node_state, pe_node_t * this_node, pe_working_set_t * data_set)
{
gboolean online = FALSE;
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
CRM_CHECK(this_node != NULL, return);
this_node->details->shutdown = FALSE;
this_node->details->expected_up = FALSE;
if (pe__shutdown_requested(this_node)) {
this_node->details->shutdown = TRUE;
} else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
this_node->details->expected_up = TRUE;
}
if (this_node->details->type == node_ping) {
this_node->details->unclean = FALSE;
online = FALSE; /* As far as resource management is concerned,
* the node is safely offline.
* Anyone caught abusing this logic will be shot
*/
} else if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
online = determine_online_status_no_fencing(data_set, node_state, this_node);
} else {
online = determine_online_status_fencing(data_set, node_state, this_node);
}
if (online) {
this_node->details->online = TRUE;
} else {
/* remove node from contention */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (online && this_node->details->shutdown) {
/* don't run resources here */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (this_node->details->type == node_ping) {
crm_info("%s is not a Pacemaker node", pe__node_name(this_node));
} else if (this_node->details->unclean) {
pe_proc_warn("%s is unclean", pe__node_name(this_node));
} else if (this_node->details->online) {
crm_info("%s is %s", pe__node_name(this_node),
this_node->details->shutdown ? "shutting down" :
this_node->details->pending ? "pending" :
this_node->details->standby ? "standby" :
this_node->details->maintenance ? "maintenance" : "online");
} else {
crm_trace("%s is offline", pe__node_name(this_node));
}
}
/*!
* \internal
* \brief Find the end of a resource's name, excluding any clone suffix
*
* \param[in] id Resource ID to check
*
* \return Pointer to last character of resource's base name
*/
const char *
pe_base_name_end(const char *id)
{
if (!pcmk__str_empty(id)) {
const char *end = id + strlen(id) - 1;
for (const char *s = end; s > id; --s) {
switch (*s) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
return (s == end)? s : (s - 1);
default:
return end;
}
}
return end;
}
return NULL;
}
/*!
* \internal
* \brief Get a resource name excluding any clone suffix
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_strip(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
char *basename = NULL;
CRM_ASSERT(end);
basename = strndup(last_rsc_id, end - last_rsc_id + 1);
CRM_ASSERT(basename);
return basename;
}
/*!
* \internal
* \brief Get the name of the first instance of a cloned resource
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name plus :0
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_zero(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
size_t base_name_len = end - last_rsc_id + 1;
char *zero = NULL;
CRM_ASSERT(end);
zero = calloc(base_name_len + 3, sizeof(char));
CRM_ASSERT(zero);
memcpy(zero, last_rsc_id, base_name_len);
zero[base_name_len] = ':';
zero[base_name_len + 1] = '0';
return zero;
}
static pe_resource_t *
create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
pe_resource_t *rsc = NULL;
xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
copy_in_properties(xml_rsc, rsc_entry);
crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
crm_log_xml_debug(xml_rsc, "Orphan resource");
if (pe__unpack_resource(xml_rsc, &rsc, NULL, data_set) != pcmk_rc_ok) {
return NULL;
}
if (xml_contains_remote_node(xml_rsc)) {
pe_node_t *node;
crm_debug("Detected orphaned remote node %s", rsc_id);
node = pe_find_node(data_set->nodes, rsc_id);
if (node == NULL) {
node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
}
link_rsc2remotenode(data_set, rsc);
if (node) {
crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
node->details->shutdown = TRUE;
}
}
if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
/* This orphaned rsc needs to be mapped to a container. */
crm_trace("Detected orphaned container filler %s", rsc_id);
pe__set_resource_flags(rsc, pe_rsc_orphan_container_filler);
}
pe__set_resource_flags(rsc, pe_rsc_orphan);
data_set->resources = g_list_append(data_set->resources, rsc);
return rsc;
}
/*!
* \internal
* \brief Create orphan instance for anonymous clone resource history
*/
static pe_resource_t *
create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
pe_node_t *node, pe_working_set_t *data_set)
{
pe_resource_t *top = pe__create_clone_child(parent, data_set);
// find_rsc() because we might be a cloned group
pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
top->id, parent->id, rsc_id, pe__node_name(node));
return orphan;
}
/*!
* \internal
* \brief Check a node for an instance of an anonymous clone
*
* Return a child instance of the specified anonymous clone, in order of
* preference: (1) the instance running on the specified node, if any;
* (2) an inactive instance (i.e. within the total of clone-max instances);
* (3) a newly created orphan (i.e. clone-max instances are already active).
*
* \param[in] data_set Cluster information
* \param[in] node Node on which to check for instance
* \param[in] parent Clone to check
* \param[in] rsc_id Name of cloned resource in history (without instance)
*/
static pe_resource_t *
find_anonymous_clone(pe_working_set_t * data_set, pe_node_t * node, pe_resource_t * parent,
const char *rsc_id)
{
GList *rIter = NULL;
pe_resource_t *rsc = NULL;
pe_resource_t *inactive_instance = NULL;
gboolean skip_inactive = FALSE;
CRM_ASSERT(parent != NULL);
CRM_ASSERT(pe_rsc_is_clone(parent));
CRM_ASSERT(!pcmk_is_set(parent->flags, pe_rsc_unique));
// Check for active (or partially active, for cloned groups) instance
pe_rsc_trace(parent, "Looking for %s on %s in %s",
rsc_id, pe__node_name(node), parent->id);
for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
GList *locations = NULL;
pe_resource_t *child = rIter->data;
/* Check whether this instance is already known to be active or pending
* anywhere, at this stage of unpacking. Because this function is called
* for a resource before the resource's individual operation history
* entries are unpacked, locations will generally not contain the
* desired node.
*
* However, there are three exceptions:
* (1) when child is a cloned group and we have already unpacked the
* history of another member of the group on the same node;
* (2) when we've already unpacked the history of another numbered
* instance on the same node (which can happen if globally-unique
* was flipped from true to false); and
* (3) when we re-run calculations on the same data set as part of a
* simulation.
*/
child->fns->location(child, &locations, 2);
if (locations) {
/* We should never associate the same numbered anonymous clone
* instance with multiple nodes, and clone instances can't migrate,
* so there must be only one location, regardless of history.
*/
CRM_LOG_ASSERT(locations->next == NULL);
if (((pe_node_t *)locations->data)->details == node->details) {
/* This child instance is active on the requested node, so check
* for a corresponding configured resource. We use find_rsc()
* instead of child because child may be a cloned group, and we
* need the particular member corresponding to rsc_id.
*
* If the history entry is orphaned, rsc will be NULL.
*/
rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
if (rsc) {
/* If there are multiple instance history entries for an
* anonymous clone in a single node's history (which can
* happen if globally-unique is switched from true to
* false), we want to consider the instances beyond the
* first as orphans, even if there are inactive instance
* numbers available.
*/
if (rsc->running_on) {
crm_notice("Active (now-)anonymous clone %s has "
"multiple (orphan) instance histories on %s",
parent->id, pe__node_name(node));
skip_inactive = TRUE;
rsc = NULL;
} else {
pe_rsc_trace(parent, "Resource %s, active", rsc->id);
}
}
}
g_list_free(locations);
} else {
pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
if (!skip_inactive && !inactive_instance
&& !pcmk_is_set(child->flags, pe_rsc_block)) {
// Remember one inactive instance in case we don't find active
inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
pe_find_clone);
/* ... but don't use it if it was already associated with a
* pending action on another node
*/
if (inactive_instance && inactive_instance->pending_node
&& (inactive_instance->pending_node->details != node->details)) {
inactive_instance = NULL;
}
}
}
}
if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
rsc = inactive_instance;
}
/* If the resource has "requires" set to "quorum" or "nothing", and we don't
* have a clone instance for every node, we don't want to consume a valid
* instance number for unclean nodes. Such instances may appear to be active
* according to the history, but should be considered inactive, so we can
* start an instance elsewhere. Treat such instances as orphans.
*
* An exception is instances running on guest nodes -- since guest node
* "fencing" is actually just a resource stop, requires shouldn't apply.
*
* @TODO Ideally, we'd use an inactive instance number if it is not needed
* for any clean instances. However, we don't know that at this point.
*/
if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)
&& (!node->details->online || node->details->unclean)
&& !pe__is_guest_node(node)
&& !pe__is_universal_clone(parent, data_set)) {
rsc = NULL;
}
if (rsc == NULL) {
rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
}
return rsc;
}
static pe_resource_t *
unpack_find_resource(pe_working_set_t * data_set, pe_node_t * node, const char *rsc_id,
xmlNode * rsc_entry)
{
pe_resource_t *rsc = NULL;
pe_resource_t *parent = NULL;
crm_trace("looking for %s", rsc_id);
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL) {
/* If we didn't find the resource by its name in the operation history,
* check it again as a clone instance. Even when clone-max=0, we create
* a single :0 orphan to match against here.
*/
char *clone0_id = clone_zero(rsc_id);
pe_resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
if (clone0 && !pcmk_is_set(clone0->flags, pe_rsc_unique)) {
rsc = clone0;
parent = uber_parent(clone0);
crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
} else {
crm_trace("%s is not known as %s either (orphan)",
rsc_id, clone0_id);
}
free(clone0_id);
} else if (rsc->variant > pe_native) {
crm_trace("Resource history for %s is orphaned because it is no longer primitive",
rsc_id);
return NULL;
} else {
parent = uber_parent(rsc);
}
if (pe_rsc_is_anon_clone(parent)) {
if (pe_rsc_is_bundled(parent)) {
rsc = pe__find_bundle_replica(parent->parent, node);
} else {
char *base = clone_strip(rsc_id);
rsc = find_anonymous_clone(data_set, node, parent, base);
free(base);
CRM_ASSERT(rsc != NULL);
}
}
if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei)
&& !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) {
pcmk__str_update(&rsc->clone_name, rsc_id);
pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
rsc_id, pe__node_name(node), rsc->id,
(pcmk_is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
}
return rsc;
}
static pe_resource_t *
process_orphan_resource(xmlNode * rsc_entry, pe_node_t * node, pe_working_set_t * data_set)
{
pe_resource_t *rsc = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
crm_debug("Detected orphan resource %s on %s", rsc_id, pe__node_name(node));
rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
if (rsc == NULL) {
return NULL;
}
if (!pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) {
pe__clear_resource_flags(rsc, pe_rsc_managed);
} else {
CRM_CHECK(rsc != NULL, return NULL);
pe_rsc_trace(rsc, "Added orphan %s", rsc->id);
resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set);
}
return rsc;
}
static void
process_rsc_state(pe_resource_t * rsc, pe_node_t * node,
enum action_fail_response on_fail,
xmlNode * migrate_op, pe_working_set_t * data_set)
{
pe_node_t *tmpnode = NULL;
char *reason = NULL;
enum action_fail_response save_on_fail = action_fail_ignore;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
rsc->id, role2text(rsc->role), pe__node_name(node),
fail2text(on_fail));
/* process current state */
if (rsc->role != RSC_ROLE_UNKNOWN) {
pe_resource_t *iter = rsc;
while (iter) {
if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
pe_node_t *n = pe__copy_node(node);
pe_rsc_trace(rsc, "%s%s%s known on %s",
rsc->id,
((rsc->clone_name == NULL)? "" : " also known as "),
((rsc->clone_name == NULL)? "" : rsc->clone_name),
pe__node_name(n));
g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
}
if (pcmk_is_set(iter->flags, pe_rsc_unique)) {
break;
}
iter = iter->parent;
}
}
/* If a managed resource is believed to be running, but node is down ... */
if (rsc->role > RSC_ROLE_STOPPED
&& node->details->online == FALSE
&& node->details->maintenance == FALSE
&& pcmk_is_set(rsc->flags, pe_rsc_managed)) {
gboolean should_fence = FALSE;
/* If this is a guest node, fence it (regardless of whether fencing is
* enabled, because guest node fencing is done by recovery of the
* container resource rather than by the fencer). Mark the resource
* we're processing as failed. When the guest comes back up, its
* operation history in the CIB will be cleared, freeing the affected
* resource to run again once we are sure we know its state.
*/
if (pe__is_guest_node(node)) {
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
should_fence = TRUE;
} else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
if (pe__is_remote_node(node) && node->details->remote_rsc
&& !pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
/* Setting unseen means that fencing of the remote node will
* occur only if the connection resource is not going to start
* somewhere. This allows connection resources on a failed
* cluster node to move to another node without requiring the
* remote nodes to be fenced as well.
*/
node->details->unseen = TRUE;
reason = crm_strdup_printf("%s is active there (fencing will be"
" revoked if remote connection can "
"be re-established elsewhere)",
rsc->id);
}
should_fence = TRUE;
}
if (should_fence) {
if (reason == NULL) {
reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
}
pe_fence_node(data_set, node, reason, FALSE);
}
free(reason);
}
/* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
save_on_fail = on_fail;
if (node->details->unclean) {
/* No extra processing needed
* Also allows resources to be started again after a node is shot
*/
on_fail = action_fail_ignore;
}
switch (on_fail) {
case action_fail_ignore:
/* nothing to do */
break;
case action_fail_demote:
pe__set_resource_flags(rsc, pe_rsc_failed);
demote_action(rsc, node, FALSE);
break;
case action_fail_fence:
/* treat it as if it is still running
* but also mark the node as unclean
*/
reason = crm_strdup_printf("%s failed there", rsc->id);
pe_fence_node(data_set, node, reason, FALSE);
free(reason);
break;
case action_fail_standby:
node->details->standby = TRUE;
node->details->standby_onfail = TRUE;
break;
case action_fail_block:
/* is_managed == FALSE will prevent any
* actions being sent for the resource
*/
pe__clear_resource_flags(rsc, pe_rsc_managed);
pe__set_resource_flags(rsc, pe_rsc_block);
break;
case action_fail_migrate:
/* make sure it comes up somewhere else
* or not at all
*/
resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
break;
case action_fail_stop:
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "on-fail=stop");
break;
case action_fail_recover:
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
stop_action(rsc, node, FALSE);
}
break;
case action_fail_restart_container:
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
if (rsc->container && pe_rsc_is_bundled(rsc)) {
/* A bundle's remote connection can run on a different node than
* the bundle's container. We don't necessarily know where the
* container is running yet, so remember it and add a stop
* action for it later.
*/
data_set->stop_needed = g_list_prepend(data_set->stop_needed,
rsc->container);
} else if (rsc->container) {
stop_action(rsc->container, node, FALSE);
} else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
stop_action(rsc, node, FALSE);
}
break;
case action_fail_reset_remote:
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
tmpnode = NULL;
if (rsc->is_remote_node) {
tmpnode = pe_find_node(data_set->nodes, rsc->id);
}
if (tmpnode &&
pe__is_remote_node(tmpnode) &&
tmpnode->details->remote_was_fenced == 0) {
/* The remote connection resource failed in a way that
* should result in fencing the remote node.
*/
pe_fence_node(data_set, tmpnode,
"remote connection is unrecoverable", FALSE);
}
}
/* require the stop action regardless if fencing is occurring or not. */
if (rsc->role > RSC_ROLE_STOPPED) {
stop_action(rsc, node, FALSE);
}
/* if reconnect delay is in use, prevent the connection from exiting the
* "STOPPED" role until the failure is cleared by the delay timeout. */
if (rsc->remote_reconnect_ms) {
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "remote reset");
}
break;
}
/* ensure a remote-node connection failure forces an unclean remote-node
* to be fenced. By setting unseen = FALSE, the remote-node failure will
* result in a fencing operation regardless if we're going to attempt to
* reconnect to the remote-node in this transition or not. */
if (pcmk_is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
tmpnode = pe_find_node(data_set->nodes, rsc->id);
if (tmpnode && tmpnode->details->unclean) {
tmpnode->details->unseen = FALSE;
}
}
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pcmk__config_warn("Detected active orphan %s running on %s",
rsc->id, pe__node_name(node));
} else {
pcmk__config_warn("Resource '%s' must be stopped manually on "
"%s because cluster is configured not to "
"stop active orphans",
rsc->id, pe__node_name(node));
}
}
native_add_running(rsc, node, data_set, (save_on_fail != action_fail_ignore));
switch (on_fail) {
case action_fail_ignore:
break;
case action_fail_demote:
case action_fail_block:
pe__set_resource_flags(rsc, pe_rsc_failed);
break;
default:
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
break;
}
} else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
/* Only do this for older status sections that included instance numbers
* Otherwise stopped instances will appear as orphans
*/
pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
free(rsc->clone_name);
rsc->clone_name = NULL;
} else {
GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP,
FALSE);
GList *gIter = possible_matches;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *stop = (pe_action_t *) gIter->data;
pe__set_action_flags(stop, pe_action_optional);
}
g_list_free(possible_matches);
}
/* A successful stop after migrate_to on the migration source doesn't make
* the partially migrated resource stopped on the migration target.
*/
if (rsc->role == RSC_ROLE_STOPPED
&& rsc->partial_migration_source
&& rsc->partial_migration_source->details == node->details
&& rsc->partial_migration_target
&& rsc->running_on) {
rsc->role = RSC_ROLE_STARTED;
}
}
/* create active recurring operations as optional */
static void
process_recurring(pe_node_t * node, pe_resource_t * rsc,
int start_index, int stop_index,
GList *sorted_op_list, pe_working_set_t * data_set)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GList *gIter = sorted_op_list;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
guint interval_ms = 0;
char *key = NULL;
const char *id = ID(rsc_op);
counter++;
if (node->details->online == FALSE) {
pe_rsc_trace(rsc, "Skipping %s on %s: node is offline",
rsc->id, pe__node_name(node));
break;
/* Need to check if there's a monitor for role="Stopped" */
} else if (start_index < stop_index && counter <= stop_index) {
pe_rsc_trace(rsc, "Skipping %s on %s: resource is not active",
id, pe__node_name(node));
continue;
} else if (counter < start_index) {
pe_rsc_trace(rsc, "Skipping %s on %s: old %d",
id, pe__node_name(node), counter);
continue;
}
crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if (interval_ms == 0) {
pe_rsc_trace(rsc, "Skipping %s on %s: non-recurring",
id, pe__node_name(node));
continue;
}
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
pe_rsc_trace(rsc, "Skipping %s on %s: status",
id, pe__node_name(node));
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
/* create the action */
key = pcmk__op_key(rsc->id, task, interval_ms);
pe_rsc_trace(rsc, "Creating %s on %s", key, pe__node_name(node));
custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
}
}
void
calculate_active_ops(GList *sorted_op_list, int *start_index, int *stop_index)
{
int counter = -1;
int implied_monitor_start = -1;
int implied_clone_start = -1;
const char *task = NULL;
const char *status = NULL;
GList *gIter = sorted_op_list;
*stop_index = -1;
*start_index = -1;
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)
&& pcmk__str_eq(status, "0", pcmk__str_casei)) {
*stop_index = counter;
} else if (pcmk__strcase_any_of(task, CRMD_ACTION_START, CRMD_ACTION_MIGRATED, NULL)) {
*start_index = counter;
} else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
implied_monitor_start = counter;
}
} else if (pcmk__strcase_any_of(task, CRMD_ACTION_PROMOTE, CRMD_ACTION_DEMOTE, NULL)) {
implied_clone_start = counter;
}
}
if (*start_index == -1) {
if (implied_clone_start != -1) {
*start_index = implied_clone_start;
} else if (implied_monitor_start != -1) {
*start_index = implied_monitor_start;
}
}
}
// If resource history entry has shutdown lock, remember lock node and time
static void
unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set)
{
time_t lock_time = 0; // When lock started (i.e. node shutdown time)
if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
&lock_time) == pcmk_ok) && (lock_time != 0)) {
if ((data_set->shutdown_lock > 0)
&& (get_effective_time(data_set)
> (lock_time + data_set->shutdown_lock))) {
pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
rsc->id, pe__node_name(node));
pe__clear_resource_history(rsc, node, data_set);
} else {
rsc->lock_node = node;
rsc->lock_time = lock_time;
}
}
}
/*!
* \internal
* \brief Unpack one lrm_resource entry from a node's CIB status
*
* \param[in] node Node whose status is being unpacked
* \param[in] rsc_entry lrm_resource XML being unpacked
* \param[in] data_set Cluster working set
*
* \return Resource corresponding to the entry, or NULL if no operation history
*/
static pe_resource_t *
unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
int stop_index = -1;
int start_index = -1;
enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
const char *task = NULL;
const char *rsc_id = ID(lrm_resource);
pe_resource_t *rsc = NULL;
GList *op_list = NULL;
GList *sorted_op_list = NULL;
xmlNode *migrate_op = NULL;
xmlNode *rsc_op = NULL;
xmlNode *last_failure = NULL;
enum action_fail_response on_fail = action_fail_ignore;
enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
if (rsc_id == NULL) {
crm_warn("Ignoring malformed " XML_LRM_TAG_RESOURCE
" entry without id");
return NULL;
}
crm_trace("Unpacking " XML_LRM_TAG_RESOURCE " for %s on %s",
rsc_id, pe__node_name(node));
// Build a list of individual lrm_rsc_op entries, so we can sort them
for (rsc_op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
op_list = g_list_prepend(op_list, rsc_op);
}
if (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) {
if (op_list == NULL) {
// If there are no operations, there is nothing to do
return NULL;
}
}
/* find the resource */
rsc = unpack_find_resource(data_set, node, rsc_id, lrm_resource);
if (rsc == NULL) {
if (op_list == NULL) {
// If there are no operations, there is nothing to do
return NULL;
} else {
rsc = process_orphan_resource(lrm_resource, node, data_set);
}
}
CRM_ASSERT(rsc != NULL);
// Check whether the resource is "shutdown-locked" to this node
if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) {
unpack_shutdown_lock(lrm_resource, rsc, node, data_set);
}
/* process operations */
saved_role = rsc->role;
rsc->role = RSC_ROLE_UNKNOWN;
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
migrate_op = rsc_op;
}
unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
}
/* create active recurring operations as optional */
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
/* no need to free the contents */
g_list_free(sorted_op_list);
process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
if (get_target_role(rsc, &req_role)) {
if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
pe__set_next_role(rsc, req_role, XML_RSC_ATTR_TARGET_ROLE);
} else if (req_role > rsc->next_role) {
pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
}
}
if (saved_role > rsc->role) {
rsc->role = saved_role;
}
return rsc;
}
static void
handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
{
xmlNode *rsc_entry = NULL;
for (rsc_entry = pcmk__xe_first_child(lrm_rsc_list); rsc_entry != NULL;
rsc_entry = pcmk__xe_next(rsc_entry)) {
pe_resource_t *rsc;
pe_resource_t *container;
const char *rsc_id;
const char *container_id;
if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) {
continue;
}
container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
if (container_id == NULL || rsc_id == NULL) {
continue;
}
container = pe_find_resource(data_set->resources, container_id);
if (container == NULL) {
continue;
}
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL ||
!pcmk_is_set(rsc->flags, pe_rsc_orphan_container_filler) ||
rsc->container != NULL) {
continue;
}
pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
rsc->id, container_id);
rsc->container = container;
container->fillers = g_list_append(container->fillers, rsc);
}
}
/*!
* \internal
* \brief Unpack one node's lrm status section
*
* \param[in] node Node whose status is being unpacked
* \param[in] xml CIB node state XML
* \param[in] data_set Cluster working set
*/
static void
unpack_node_lrm(pe_node_t *node, xmlNode *xml, pe_working_set_t *data_set)
{
bool found_orphaned_container_filler = false;
// Drill down to lrm_resources section
xml = find_xml_node(xml, XML_CIB_TAG_LRM, FALSE);
if (xml == NULL) {
return;
}
xml = find_xml_node(xml, XML_LRM_TAG_RESOURCES, FALSE);
if (xml == NULL) {
return;
}
// Unpack each lrm_resource entry
for (xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE);
rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
pe_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, data_set);
if ((rsc != NULL)
&& pcmk_is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
found_orphaned_container_filler = true;
}
}
/* Now that all resource state has been unpacked for this node, map any
* orphaned container fillers to their container resource.
*/
if (found_orphaned_container_filler) {
handle_orphaned_container_fillers(xml, data_set);
}
}
static void
set_active(pe_resource_t * rsc)
{
pe_resource_t *top = uber_parent(rsc);
if (top && pcmk_is_set(top->flags, pe_rsc_promotable)) {
rsc->role = RSC_ROLE_UNPROMOTED;
} else {
rsc->role = RSC_ROLE_STARTED;
}
}
static void
set_node_score(gpointer key, gpointer value, gpointer user_data)
{
pe_node_t *node = value;
int *score = user_data;
node->weight = *score;
}
static xmlNode *
find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
int target_rc, pe_working_set_t *data_set)
{
GString *xpath = NULL;
xmlNode *xml = NULL;
CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
return NULL);
xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
"//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='", node, "']"
"//" XML_LRM_TAG_RESOURCE
"[@" XML_ATTR_ID "='", resource, "']"
"/" XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'",
NULL);
/* Need to check against transition_magic too? */
if ((source != NULL) && (strcmp(op, CRMD_ACTION_MIGRATE) == 0)) {
pcmk__g_strcat(xpath,
" and @" XML_LRM_ATTR_MIGRATE_TARGET "='", source, "']",
NULL);
} else if ((source != NULL) && (strcmp(op, CRMD_ACTION_MIGRATED) == 0)) {
pcmk__g_strcat(xpath,
" and @" XML_LRM_ATTR_MIGRATE_SOURCE "='", source, "']",
NULL);
} else {
g_string_append_c(xpath, ']');
}
xml = get_xpath_object((const char *) xpath->str, data_set->input,
LOG_DEBUG);
g_string_free(xpath, TRUE);
if (xml && target_rc >= 0) {
int rc = PCMK_OCF_UNKNOWN_ERROR;
int status = PCMK_EXEC_ERROR;
crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status);
if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
return NULL;
}
}
return xml;
}
static xmlNode *
find_lrm_resource(const char *rsc_id, const char *node_name,
pe_working_set_t *data_set)
{
GString *xpath = NULL;
xmlNode *xml = NULL;
CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
"//" XML_CIB_TAG_STATE
"[@" XML_ATTR_UNAME "='", node_name, "']"
"//" XML_LRM_TAG_RESOURCE
"[@" XML_ATTR_ID "='", rsc_id, "']",
NULL);
xml = get_xpath_object((const char *) xpath->str, data_set->input,
LOG_DEBUG);
g_string_free(xpath, TRUE);
return xml;
}
static bool
unknown_on_node(const char *rsc_id, const char *node_name,
pe_working_set_t *data_set)
{
xmlNode *lrm_resource = NULL;
lrm_resource = find_lrm_resource(rsc_id, node_name, data_set);
/* If the resource has no lrm_rsc_op history on the node, that means its
* state is unknown there.
*/
return (lrm_resource == NULL
|| first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP) == NULL);
}
/*!
* \brief Check whether a probe/monitor indicating the resource was not running
* on a node happened after some event
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] xml_op Event that monitor is being compared to
* \param[in] data_set Cluster working set
*
* \return true if such a monitor happened after event, false otherwise
*/
static bool
monitor_not_running_after(const char *rsc_id, const char *node_name,
xmlNode *xml_op, bool same_node,
pe_working_set_t *data_set)
{
/* Any probe/monitor operation on the node indicating it was not running
* there
*/
xmlNode *monitor = find_lrm_op(rsc_id, CRMD_ACTION_STATUS, node_name,
NULL, PCMK_OCF_NOT_RUNNING, data_set);
return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
}
/*!
* \brief Check whether any non-monitor operation on a node happened after some
* event
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] xml_op Event that non-monitor is being compared to
* \param[in] same_node Whether the operations are on the same node
* \param[in] data_set Cluster working set
*
* \return true if such a operation happened after event, false otherwise
*/
static bool
non_monitor_after(const char *rsc_id, const char *node_name, xmlNode *xml_op,
bool same_node, pe_working_set_t *data_set)
{
xmlNode *lrm_resource = NULL;
lrm_resource = find_lrm_resource(rsc_id, node_name, data_set);
if (lrm_resource == NULL) {
return false;
}
for (xmlNode *op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
op != NULL; op = crm_next_same_xml(op)) {
const char * task = NULL;
if (op == xml_op) {
continue;
}
task = crm_element_value(op, XML_LRM_ATTR_TASK);
if (pcmk__str_any_of(task, CRMD_ACTION_START, CRMD_ACTION_STOP,
CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)
&& pe__is_newer_op(op, xml_op, same_node) > 0) {
return true;
}
}
return false;
}
/*!
* \brief Check whether the resource has newer state on a node after a migration
* attempt
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] migrate_to Any migrate_to event that is being compared to
* \param[in] migrate_from Any migrate_from event that is being compared to
* \param[in] data_set Cluster working set
*
* \return true if such a operation happened after event, false otherwise
*/
static bool
newer_state_after_migrate(const char *rsc_id, const char *node_name,
xmlNode *migrate_to, xmlNode *migrate_from,
pe_working_set_t *data_set)
{
xmlNode *xml_op = migrate_to;
const char *source = NULL;
const char *target = NULL;
bool same_node = false;
if (migrate_from) {
xml_op = migrate_from;
}
source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
/* It's preferred to compare to the migrate event on the same node if
* existing, since call ids are more reliable.
*/
if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
if (migrate_from) {
xml_op = migrate_from;
same_node = true;
} else {
xml_op = migrate_to;
}
} else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
if (migrate_to) {
xml_op = migrate_to;
same_node = true;
} else {
xml_op = migrate_from;
}
}
/* If there's any newer non-monitor operation on the node, or any newer
* probe/monitor operation on the node indicating it was not running there,
* the migration events potentially no longer matter for the node.
*/
return non_monitor_after(rsc_id, node_name, xml_op, same_node, data_set)
|| monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
data_set);
}
static void
unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
pe_working_set_t *data_set)
{
/* A successful migration sequence is:
* migrate_to on source node
* migrate_from on target node
* stop on source node
*
* But there could be scenarios like (It's easier to produce with cluster
* property batch-limit=1):
*
* - rscA is live-migrating from node1 to node2.
*
* - Before migrate_to on node1 returns, put node2 into standby.
*
* - Transition aborts upon return of successful migrate_to on node1. New
* transition is going to stop the rscA on both nodes and start it on
* node1.
*
* - While it is stopping on node1, run something that is going to make
* the transition abort again like:
* crm_resource --resource rscA --ban --node node2
*
* - Transition aborts upon return of stop on node1.
*
* Now although there's a stop on node1, it's still a partial migration and
* rscA is still potentially active on node2.
*
* So even if a migrate_to is followed by a stop, we still need to check
* whether there's a corresponding migrate_from or any newer operation on
* the target.
*
* If no migrate_from has happened, the migration is considered to be
* "partial". If the migrate_from failed, make sure the resource gets
* stopped on both source and target (if up).
*
* If the migrate_to and migrate_from both succeeded (which also implies the
* resource is no longer running on the source), but there is no stop, the
* migration is considered to be "dangling". Schedule a stop on the source
* in this case.
*/
int from_rc = 0;
int from_status = 0;
pe_node_t *target_node = NULL;
pe_node_t *source_node = NULL;
xmlNode *migrate_from = NULL;
const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
bool source_newer_op = false;
bool target_newer_state = false;
// Sanity check
CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
/* If there's any newer non-monitor operation on the source, this migrate_to
* potentially no longer matters for the source.
*/
source_newer_op = non_monitor_after(rsc->id, source, xml_op, true,
data_set);
// Check whether there was a migrate_from action on the target
migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
source, -1, data_set);
/* Even if there's a newer non-monitor operation on the source, we still
* need to check how this migrate_to might matter for the target.
*/
if (source_newer_op && migrate_from) {
return;
}
/* If the resource has newer state on the target after the migration
* events, this migrate_to no longer matters for the target.
*/
target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op,
migrate_from, data_set);
if (source_newer_op && target_newer_state) {
return;
}
// Clones are not allowed to migrate, so role can't be promoted
rsc->role = RSC_ROLE_STARTED;
target_node = pe_find_node(data_set->nodes, target);
source_node = pe_find_node(data_set->nodes, source);
if (migrate_from) {
crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
ID(migrate_from), target, from_status, from_rc);
}
if (migrate_from && from_rc == PCMK_OCF_OK
&& (from_status == PCMK_EXEC_DONE)) {
/* The migrate_to and migrate_from both succeeded, so mark the migration
* as "dangling". This will be used to schedule a stop action on the
* source without affecting the target.
*/
pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
source);
rsc->role = RSC_ROLE_STOPPED;
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
} else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed
/* If the resource has newer state on the target, this migrate_to no
* longer matters for the target.
*/
if (!target_newer_state
&& target_node && target_node->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
target_node->details->online);
native_add_running(rsc, target_node, data_set, TRUE);
} else {
/* With the earlier bail logic, migrate_from != NULL here implies
* source_newer_op is false, meaning this migrate_to still matters
* for the source.
* Consider it failed here - forces a restart, prevents migration
*/
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
pe__clear_resource_flags(rsc, pe_rsc_allow_migrate);
}
} else { // Pending, or complete but erased
/* If the resource has newer state on the target, this migrate_to no
* longer matters for the target.
*/
if (!target_newer_state
&& target_node && target_node->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
target_node->details->online);
native_add_running(rsc, target_node, data_set, FALSE);
if (source_node && source_node->details->online) {
/* This is a partial migration: the migrate_to completed
* successfully on the source, but the migrate_from has not
* completed. Remember the source and target; if the newly
* chosen target remains the same when we schedule actions
* later, we may continue with the migration.
*/
rsc->partial_migration_target = target_node;
rsc->partial_migration_source = source_node;
}
} else if (!source_newer_op) {
/* This migrate_to matters for the source only if it's the last
* non-monitor operation here.
* Consider it failed here - forces a restart, prevents migration
*/
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
pe__clear_resource_flags(rsc, pe_rsc_allow_migrate);
}
}
}
static void
unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
pe_working_set_t *data_set)
{
xmlNode *target_migrate_from = NULL;
const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
// Sanity check
CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be promoted.
*/
rsc->role = RSC_ROLE_STARTED;
// Check for migrate_from on the target
target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
source, PCMK_OCF_OK, data_set);
if (/* If the resource state is unknown on the target, it will likely be
* probed there.
* Don't just consider it running there. We will get back here anyway in
* case the probe detects it's running there.
*/
!unknown_on_node(rsc->id, target, data_set)
/* If the resource has newer state on the target after the migration
* events, this migrate_to no longer matters for the target.
*/
&& !newer_state_after_migrate(rsc->id, target, xml_op, target_migrate_from,
data_set)) {
/* The resource has no newer state on the target, so assume it's still
* active there.
* (if it is up).
*/
pe_node_t *target_node = pe_find_node(data_set->nodes, target);
if (target_node && target_node->details->online) {
native_add_running(rsc, target_node, data_set, FALSE);
}
} else if (!non_monitor_after(rsc->id, source, xml_op, true, data_set)) {
/* We know the resource has newer state on the target, but this
* migrate_to still matters for the source as long as there's no newer
* non-monitor operation there.
*/
// Mark node as having dangling migration so we can force a stop later
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
}
}
static void
unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node,
xmlNode *xml_op, pe_working_set_t *data_set)
{
xmlNode *source_migrate_to = NULL;
const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
// Sanity check
CRM_CHECK(source && target && !strcmp(target, node->details->uname), return);
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be promoted.
*/
rsc->role = RSC_ROLE_STARTED;
// Check for a migrate_to on the source
source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE,
source, target, PCMK_OCF_OK, data_set);
if (/* If the resource state is unknown on the source, it will likely be
* probed there.
* Don't just consider it running there. We will get back here anyway in
* case the probe detects it's running there.
*/
!unknown_on_node(rsc->id, source, data_set)
/* If the resource has newer state on the source after the migration
* events, this migrate_from no longer matters for the source.
*/
&& !newer_state_after_migrate(rsc->id, source, source_migrate_to, xml_op,
data_set)) {
/* The resource has no newer state on the source, so assume it's still
* active there (if it is up).
*/
pe_node_t *source_node = pe_find_node(data_set->nodes, source);
if (source_node && source_node->details->online) {
native_add_running(rsc, source_node, data_set, TRUE);
}
}
}
static void
record_failed_op(xmlNode *op, const pe_node_t *node,
const pe_resource_t *rsc, pe_working_set_t *data_set)
{
xmlNode *xIter = NULL;
const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
if (node->details->online == FALSE) {
return;
}
for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
if(pcmk__str_eq(op_key, key, pcmk__str_casei) && pcmk__str_eq(uname, node->details->uname, pcmk__str_casei)) {
crm_trace("Skipping duplicate entry %s on %s",
op_key, pe__node_name(node));
return;
}
}
crm_trace("Adding entry %s on %s", op_key, pe__node_name(node));
crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
add_node_copy(data_set->failed, op);
}
static const char *get_op_key(xmlNode *xml_op)
{
const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
if(key == NULL) {
key = ID(xml_op);
}
return key;
}
static const char *
last_change_str(const xmlNode *xml_op)
{
time_t when;
const char *when_s = NULL;
if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
&when) == pcmk_ok) {
when_s = pcmk__epoch2str(&when);
if (when_s) {
// Skip day of week to make message shorter
when_s = strchr(when_s, ' ');
if (when_s) {
++when_s;
}
}
}
return ((when_s && *when_s)? when_s : "unknown time");
}
/*!
* \internal
* \brief Compare two on-fail values
*
* \param[in] first One on-fail value to compare
* \param[in] second The other on-fail value to compare
*
* \return A negative number if second is more severe than first, zero if they
* are equal, or a positive number if first is more severe than second.
* \note This is only needed until the action_fail_response values can be
* renumbered at the next API compatibility break.
*/
static int
cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
{
switch (first) {
case action_fail_demote:
switch (second) {
case action_fail_ignore:
return 1;
case action_fail_demote:
return 0;
default:
return -1;
}
break;
case action_fail_reset_remote:
switch (second) {
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
return 1;
case action_fail_reset_remote:
return 0;
default:
return -1;
}
break;
case action_fail_restart_container:
switch (second) {
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
case action_fail_reset_remote:
return 1;
case action_fail_restart_container:
return 0;
default:
return -1;
}
break;
default:
break;
}
switch (second) {
case action_fail_demote:
return (first == action_fail_ignore)? -1 : 1;
case action_fail_reset_remote:
switch (first) {
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
return -1;
default:
return 1;
}
break;
case action_fail_restart_container:
switch (first) {
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
case action_fail_reset_remote:
return -1;
default:
return 1;
}
break;
default:
break;
}
return first - second;
}
static void
unpack_rsc_op_failure(pe_resource_t * rsc, pe_node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
bool is_probe = false;
pe_action_t *action = NULL;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *exit_reason = crm_element_value(xml_op,
XML_LRM_ATTR_EXIT_REASON);
CRM_ASSERT(rsc);
CRM_CHECK(task != NULL, return);
*last_failure = xml_op;
is_probe = pcmk_xe_is_probe(xml_op);
if (exit_reason == NULL) {
exit_reason = "";
}
if (!pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)
&& (rc == PCMK_OCF_NOT_INSTALLED)) {
crm_trace("Unexpected result (%s%s%s) was recorded for "
"%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
services_ocf_exitcode_str(rc),
(*exit_reason? ": " : ""), exit_reason,
(is_probe? "probe" : task), rsc->id, pe__node_name(node),
last_change_str(xml_op), rc, ID(xml_op));
} else {
crm_warn("Unexpected result (%s%s%s) was recorded for "
"%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
services_ocf_exitcode_str(rc),
(*exit_reason? ": " : ""), exit_reason,
(is_probe? "probe" : task), rsc->id, pe__node_name(node),
last_change_str(xml_op), rc, ID(xml_op));
if (is_probe && (rc != PCMK_OCF_OK)
&& (rc != PCMK_OCF_NOT_RUNNING)
&& (rc != PCMK_OCF_RUNNING_PROMOTED)) {
/* A failed (not just unexpected) probe result could mean the user
* didn't know resources will be probed even where they can't run.
*/
crm_notice("If it is not possible for %s to run on %s, see "
"the resource-discovery option for location constraints",
rsc->id, pe__node_name(node));
}
record_failed_op(xml_op, node, rsc, data_set);
}
action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
if (cmp_on_fail(*on_fail, action->on_fail) < 0) {
pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
fail2text(action->on_fail), action->uuid, key);
*on_fail = action->on_fail;
}
if (!strcmp(task, CRMD_ACTION_STOP)) {
resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
} else if (!strcmp(task, CRMD_ACTION_MIGRATE)) {
unpack_migrate_to_failure(rsc, node, xml_op, data_set);
} else if (!strcmp(task, CRMD_ACTION_MIGRATED)) {
unpack_migrate_from_failure(rsc, node, xml_op, data_set);
} else if (!strcmp(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_PROMOTED;
} else if (!strcmp(task, CRMD_ACTION_DEMOTE)) {
if (action->on_fail == action_fail_block) {
rsc->role = RSC_ROLE_PROMOTED;
pe__set_next_role(rsc, RSC_ROLE_STOPPED,
"demote with on-fail=block");
} else if(rc == PCMK_OCF_NOT_RUNNING) {
rsc->role = RSC_ROLE_STOPPED;
} else {
/* Staying in the promoted role would put the scheduler and
* controller into a loop. Setting the role to unpromoted is not
* dangerous because the resource will be stopped as part of
* recovery, and any promotion will be ordered after that stop.
*/
rsc->role = RSC_ROLE_UNPROMOTED;
}
}
if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
/* leave stopped */
pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
rsc->role = RSC_ROLE_STOPPED;
} else if (rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(rsc, "Setting %s active", rsc->id);
set_active(rsc);
}
pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
rsc->id, role2text(rsc->role),
pcmk__btoa(node->details->unclean),
fail2text(action->on_fail), role2text(action->fail_role));
if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
pe__set_next_role(rsc, action->fail_role, "failure");
}
if (action->fail_role == RSC_ROLE_STOPPED) {
int score = -INFINITY;
pe_resource_t *fail_rsc = rsc;
if (fail_rsc->parent) {
pe_resource_t *parent = uber_parent(fail_rsc);
if (pe_rsc_is_clone(parent)
&& !pcmk_is_set(parent->flags, pe_rsc_unique)) {
/* For clone resources, if a child fails on an operation
* with on-fail = stop, all the resources fail. Do this by preventing
* the parent from coming up again. */
fail_rsc = parent;
}
}
crm_notice("%s will not be started under current conditions",
fail_rsc->id);
/* make sure it doesn't come up again */
if (fail_rsc->allowed_nodes != NULL) {
g_hash_table_destroy(fail_rsc->allowed_nodes);
}
fail_rsc->allowed_nodes = pe__node_list2table(data_set->nodes);
g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
}
pe_free_action(action);
}
/*!
* \internal
* \brief Check whether a resource with a failed action can be recovered
*
* If resource action is a failed stop and fencing is not possible, mark the
* resource as unmanaged and blocked, since recovery cannot be done.
*
* \param[in,out] rsc Resource with failed action
* \param[in] node Node where action failed
* \param[in] task Name of action that failed
* \param[in] exit_status Exit status of failed action (for logging only)
* \param[in] xml_op XML of failed action result (for logging only)
*/
static void
check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task,
int exit_status, const xmlNode *xml_op)
{
const char *exit_reason = NULL;
if (strcmp(task, CRMD_ACTION_STOP) != 0) {
return; // All actions besides stop are always recoverable
}
if (pe_can_fence(node->details->data_set, node)) {
return; // Failed stops are recoverable via fencing
}
exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
pe_proc_err("No further recovery can be attempted for %s "
"because %s on %s failed (%s%s%s) at %s "
CRM_XS " rc=%d id=%s", rsc->id, task, pe__node_name(node),
services_ocf_exitcode_str(exit_status),
((exit_reason == NULL)? "" : ": "), pcmk__s(exit_reason, ""),
last_change_str(xml_op), exit_status, ID(xml_op));
pe__clear_resource_flags(rsc, pe_rsc_managed);
pe__set_resource_flags(rsc, pe_rsc_block);
}
/*!
* \internal
* \brief Remap informational monitor results and operation status
*
* For the monitor results, certain OCF codes are for providing extended information
* to the user about services that aren't yet failed but not entirely healthy either.
* These must be treated as the "normal" result by Pacemaker.
*
* For operation status, the action result can be used to determine an appropriate
* status for the purposes of responding to the action. The status provided by the
* executor is not directly usable since the executor does not know what was expected.
*
* \param[in] xml_op Operation history entry XML from CIB status
* \param[in,out] rsc Resource that operation history entry is for
* \param[in] node Node where operation was executed
* \param[in] data_set Current cluster working set
* \param[in,out] on_fail What should be done about the result
* \param[in] target_rc Expected return code of operation
* \param[in,out] rc Actual return code of operation
* \param[in,out] status Operation execution status
*
* \note If the result is remapped and the node is not shutting down or failed,
* the operation will be recorded in the data set's list of failed operations
* to highlight it for the user.
*
* \note This may update the resource's current and next role.
*/
static void
remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set, enum action_fail_response *on_fail,
int target_rc, int *rc, int *status) {
bool is_probe = false;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *key = get_op_key(xml_op);
const char *exit_reason = crm_element_value(xml_op,
XML_LRM_ATTR_EXIT_REASON);
if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none)) {
int remapped_rc = pcmk__effective_rc(*rc);
if (*rc != remapped_rc) {
crm_trace("Remapping monitor result %d to %d", *rc, remapped_rc);
if (!node->details->shutdown || node->details->online) {
record_failed_op(xml_op, node, rsc, data_set);
}
*rc = remapped_rc;
}
}
if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
*status = PCMK_EXEC_DONE;
*rc = PCMK_OCF_NOT_RUNNING;
}
/* If the executor reported an operation status of anything but done or
* error, consider that final. But for done or error, we know better whether
* it should be treated as a failure or not, because we know the expected
* result.
*/
if (*status != PCMK_EXEC_DONE && *status != PCMK_EXEC_ERROR) {
return;
}
CRM_ASSERT(rsc);
CRM_CHECK(task != NULL,
*status = PCMK_EXEC_ERROR; return);
*status = PCMK_EXEC_DONE;
if (exit_reason == NULL) {
exit_reason = "";
}
is_probe = pcmk_xe_is_probe(xml_op);
if (is_probe) {
task = "probe";
}
if (target_rc < 0) {
/* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
* Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
* target_rc in the transition key, which (along with the similar case
* of a corrupted transition key in the CIB) will be reported to this
* function as -1. Pacemaker 2.0+ does not support rolling upgrades from
* those versions or processing of saved CIB files from those versions,
* so we do not need to care much about this case.
*/
*status = PCMK_EXEC_ERROR;
crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)",
key, pe__node_name(node));
} else if (target_rc != *rc) {
*status = PCMK_EXEC_ERROR;
pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)",
key, pe__node_name(node),
target_rc, services_ocf_exitcode_str(target_rc),
*rc, services_ocf_exitcode_str(*rc),
(*exit_reason? ": " : ""), exit_reason);
}
switch (*rc) {
case PCMK_OCF_OK:
if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) {
*status = PCMK_EXEC_DONE;
pe_rsc_info(rsc, "Probe found %s active on %s at %s",
rsc->id, pe__node_name(node),
last_change_str(xml_op));
}
break;
case PCMK_OCF_NOT_RUNNING:
if (is_probe || (target_rc == *rc)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
*status = PCMK_EXEC_DONE;
rsc->role = RSC_ROLE_STOPPED;
/* clear any previous failure actions */
*on_fail = action_fail_ignore;
pe__set_next_role(rsc, RSC_ROLE_UNKNOWN, "not running");
}
break;
case PCMK_OCF_RUNNING_PROMOTED:
if (is_probe && (*rc != target_rc)) {
*status = PCMK_EXEC_DONE;
pe_rsc_info(rsc,
"Probe found %s active and promoted on %s at %s",
rsc->id, pe__node_name(node),
last_change_str(xml_op));
}
rsc->role = RSC_ROLE_PROMOTED;
break;
case PCMK_OCF_DEGRADED_PROMOTED:
case PCMK_OCF_FAILED_PROMOTED:
rsc->role = RSC_ROLE_PROMOTED;
*status = PCMK_EXEC_ERROR;
break;
case PCMK_OCF_NOT_CONFIGURED:
*status = PCMK_EXEC_ERROR_FATAL;
break;
case PCMK_OCF_UNIMPLEMENT_FEATURE:
{
guint interval_ms = 0;
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS,
&interval_ms);
if (interval_ms == 0) {
check_recoverable(rsc, node, task, *rc, xml_op);
*status = PCMK_EXEC_ERROR_HARD;
} else {
*status = PCMK_EXEC_NOT_SUPPORTED;
}
}
break;
case PCMK_OCF_NOT_INSTALLED:
case PCMK_OCF_INVALID_PARAM:
case PCMK_OCF_INSUFFICIENT_PRIV:
check_recoverable(rsc, node, task, *rc, xml_op);
*status = PCMK_EXEC_ERROR_HARD;
break;
default:
if (*status == PCMK_EXEC_DONE) {
crm_info("Treating unknown exit status %d from %s of %s "
"on %s at %s as failure",
*rc, task, rsc->id, pe__node_name(node),
last_change_str(xml_op));
*status = PCMK_EXEC_ERROR;
}
break;
}
pe_rsc_trace(rsc, "Remapped %s status to '%s'",
key, pcmk_exec_status_str(*status));
}
// return TRUE if start or monitor last failure but parameters changed
static bool
should_clear_for_param_change(xmlNode *xml_op, const char *task,
pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set)
{
if (!strcmp(task, "start") || !strcmp(task, "monitor")) {
if (pe__bundle_needs_remote_name(rsc, data_set)) {
/* We haven't allocated resources yet, so we can't reliably
* substitute addr parameters for the REMOTE_CONTAINER_HACK.
* When that's needed, defer the check until later.
*/
pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
data_set);
} else {
op_digest_cache_t *digest_data = NULL;
digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
switch (digest_data->rc) {
case RSC_DIGEST_UNKNOWN:
crm_trace("Resource %s history entry %s on %s"
" has no digest to compare",
rsc->id, get_op_key(xml_op), node->details->id);
break;
case RSC_DIGEST_MATCH:
break;
default:
return TRUE;
}
}
}
return FALSE;
}
// Order action after fencing of remote node, given connection rsc
static void
order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn,
pe_working_set_t *data_set)
{
pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id);
if (remote_node) {
pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
FALSE, data_set);
order_actions(fence, action, pe_order_implies_then);
}
}
static bool
should_ignore_failure_timeout(pe_resource_t *rsc, xmlNode *xml_op,
const char *task, guint interval_ms,
bool is_last_failure, pe_working_set_t *data_set)
{
/* Clearing failures of recurring monitors has special concerns. The
* executor reports only changes in the monitor result, so if the
* monitor is still active and still getting the same failure result,
* that will go undetected after the failure is cleared.
*
* Also, the operation history will have the time when the recurring
* monitor result changed to the given code, not the time when the
* result last happened.
*
* @TODO We probably should clear such failures only when the failure
* timeout has passed since the last occurrence of the failed result.
* However we don't record that information. We could maybe approximate
* that by clearing only if there is a more recent successful monitor or
* stop result, but we don't even have that information at this point
* since we are still unpacking the resource's operation history.
*
* This is especially important for remote connection resources with a
* reconnect interval, so in that case, we skip clearing failures
* if the remote node hasn't been fenced.
*/
if (rsc->remote_reconnect_ms
&& pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)
&& (interval_ms != 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
if (remote_node && !remote_node->details->remote_was_fenced) {
if (is_last_failure) {
crm_info("Waiting to clear monitor failure for remote node %s"
" until fencing has occurred", rsc->id);
}
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Check operation age and schedule failure clearing when appropriate
*
* This function has two distinct purposes. The first is to check whether an
* operation history entry is expired (i.e. the resource has a failure timeout,
* the entry is older than the timeout, and the resource either has no fail
* count or its fail count is entirely older than the timeout). The second is to
* schedule fail count clearing when appropriate (i.e. the operation is expired
* and either the resource has an expired fail count or the operation is a
* last_failure for a remote connection resource with a reconnect interval,
* or the operation is a last_failure for a start or monitor operation and the
* resource's parameters have changed since the operation).
*
* \param[in] rsc Resource that operation happened to
* \param[in] node Node that operation happened on
* \param[in] rc Actual result of operation
* \param[in] xml_op Operation history entry XML
* \param[in] data_set Current working set
*
* \return TRUE if operation history entry is expired, FALSE otherwise
*/
static bool
check_operation_expiry(pe_resource_t *rsc, pe_node_t *node, int rc,
xmlNode *xml_op, pe_working_set_t *data_set)
{
bool expired = FALSE;
bool is_last_failure = pcmk__ends_with(ID(xml_op), "_last_failure_0");
time_t last_run = 0;
guint interval_ms = 0;
int unexpired_fail_count = 0;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *clear_reason = NULL;
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((rsc->failure_timeout > 0)
&& (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
&last_run) == 0)) {
// Resource has a failure-timeout, and history entry has a timestamp
time_t now = get_effective_time(data_set);
time_t last_failure = 0;
// Is this particular operation history older than the failure timeout?
if ((now >= (last_run + rsc->failure_timeout))
&& !should_ignore_failure_timeout(rsc, xml_op, task, interval_ms,
is_last_failure, data_set)) {
expired = TRUE;
}
// Does the resource as a whole have an unexpired fail count?
unexpired_fail_count = pe_get_failcount(node, rsc, &last_failure,
pe_fc_effective, xml_op,
data_set);
// Update scheduler recheck time according to *last* failure
crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
" last-failure@%lld",
ID(xml_op), (long long) last_run, (expired? "" : "not "),
(long long) now, unexpired_fail_count, rsc->failure_timeout,
(long long) last_failure);
last_failure += rsc->failure_timeout + 1;
if (unexpired_fail_count && (now < last_failure)) {
pe__update_recheck_time(last_failure, data_set);
}
}
if (expired) {
if (pe_get_failcount(node, rsc, NULL, pe_fc_default, xml_op, data_set)) {
// There is a fail count ignoring timeout
if (unexpired_fail_count == 0) {
// There is no fail count considering timeout
clear_reason = "it expired";
} else {
/* This operation is old, but there is an unexpired fail count.
* In a properly functioning cluster, this should only be
* possible if this operation is not a failure (otherwise the
* fail count should be expired too), so this is really just a
* failsafe.
*/
expired = FALSE;
}
} else if (is_last_failure && rsc->remote_reconnect_ms) {
/* Clear any expired last failure when reconnect interval is set,
* even if there is no fail count.
*/
clear_reason = "reconnect interval is set";
}
}
if (!expired && is_last_failure
&& should_clear_for_param_change(xml_op, task, rsc, node, data_set)) {
clear_reason = "resource parameters have changed";
}
if (clear_reason != NULL) {
// Schedule clearing of the fail count
pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason,
data_set);
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)
&& rsc->remote_reconnect_ms) {
/* If we're clearing a remote connection due to a reconnect
* interval, we want to wait until any scheduled fencing
* completes.
*
* We could limit this to remote_node->details->unclean, but at
* this point, that's always true (it won't be reliable until
* after unpack_node_history() is done).
*/
crm_info("Clearing %s failure will wait until any scheduled "
"fencing of %s completes", task, rsc->id);
order_after_remote_fencing(clear_op, rsc, data_set);
}
}
if (expired && (interval_ms == 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
switch(rc) {
case PCMK_OCF_OK:
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_PROMOTED:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_PROMOTED:
// Don't expire probes that return these values
expired = FALSE;
break;
}
}
return expired;
}
int pe__target_rc_from_xml(xmlNode *xml_op)
{
int target_rc = 0;
const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
if (key == NULL) {
return -1;
}
decode_transition_key(key, NULL, NULL, NULL, &target_rc);
return target_rc;
}
static enum action_fail_response
get_action_on_fail(pe_resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
{
enum action_fail_response result = action_fail_recover;
pe_action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
result = action->on_fail;
pe_free_action(action);
return result;
}
static void
update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, const char * task, int rc,
xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
gboolean clear_past_failure = FALSE;
CRM_ASSERT(rsc);
CRM_ASSERT(xml_op);
if (rc == PCMK_OCF_NOT_INSTALLED || (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op))) {
rsc->role = RSC_ROLE_STOPPED;
} else if (rc == PCMK_OCF_NOT_RUNNING) {
clear_past_failure = TRUE;
} else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
if (last_failure) {
const char *op_key = get_op_key(xml_op);
const char *last_failure_key = get_op_key(last_failure);
if (pcmk__str_eq(op_key, last_failure_key, pcmk__str_casei)) {
clear_past_failure = TRUE;
}
}
if (rsc->role < RSC_ROLE_STARTED) {
set_active(rsc);
}
} else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
rsc->role = RSC_ROLE_STOPPED;
clear_past_failure = TRUE;
} else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) {
rsc->role = RSC_ROLE_PROMOTED;
clear_past_failure = TRUE;
} else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
if (*on_fail == action_fail_demote) {
// Demote clears an error only if on-fail=demote
clear_past_failure = TRUE;
}
rsc->role = RSC_ROLE_UNPROMOTED;
} else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
unpack_migrate_to_success(rsc, node, xml_op, data_set);
} else if (rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(rsc, "%s active on %s", rsc->id, pe__node_name(node));
set_active(rsc);
}
/* clear any previous failure actions */
if (clear_past_failure) {
switch (*on_fail) {
case action_fail_stop:
case action_fail_fence:
case action_fail_migrate:
case action_fail_standby:
pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
rsc->id, fail2text(*on_fail));
break;
case action_fail_block:
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
case action_fail_restart_container:
*on_fail = action_fail_ignore;
pe__set_next_role(rsc, RSC_ROLE_UNKNOWN, "clear past failures");
break;
case action_fail_reset_remote:
if (rsc->remote_reconnect_ms == 0) {
/* With no reconnect interval, the connection is allowed to
* start again after the remote node is fenced and
* completely stopped. (With a reconnect interval, we wait
* for the failure to be cleared entirely before attempting
* to reconnect.)
*/
*on_fail = action_fail_ignore;
pe__set_next_role(rsc, RSC_ROLE_UNKNOWN,
"clear past failures and reset remote");
}
break;
}
}
}
static void
unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
xmlNode **last_failure, enum action_fail_response *on_fail,
pe_working_set_t *data_set)
{
int rc = 0;
int old_rc = 0;
int task_id = 0;
int target_rc = 0;
int old_target_rc = 0;
int status = PCMK_EXEC_UNKNOWN;
guint interval_ms = 0;
const char *task = NULL;
const char *task_key = NULL;
const char *exit_reason = NULL;
bool expired = false;
pe_resource_t *parent = rsc;
enum action_fail_response failure_strategy = action_fail_recover;
bool maskable_probe_failure = false;
CRM_CHECK(rsc && node && xml_op, return);
target_rc = pe__target_rc_from_xml(xml_op);
task_key = get_op_key(xml_op);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
if (exit_reason == NULL) {
exit_reason = "";
}
crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
CRM_CHECK(task != NULL, return);
CRM_CHECK((status >= PCMK_EXEC_PENDING) && (status <= PCMK_EXEC_MAX),
return);
if (!strcmp(task, CRMD_ACTION_NOTIFY) ||
!strcmp(task, CRMD_ACTION_METADATA)) {
/* safe to ignore these */
return;
}
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
parent = uber_parent(rsc);
}
pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
task_key, task, task_id, status, rc, pe__node_name(node),
role2text(rsc->role));
if (node->details->unclean) {
pe_rsc_trace(rsc,
"%s is running on %s, which is unclean (further action "
"depends on value of stop's on-fail attribute)",
rsc->id, pe__node_name(node));
}
/* It should be possible to call remap_operation() first then call
* check_operation_expiry() only if rc != target_rc, because there should
* never be a fail count without at least one unexpected result in the
* resource history. That would be more efficient by avoiding having to call
* check_operation_expiry() for expected results.
*
* However, we do have such configurations in the scheduler regression
* tests, even if it shouldn't be possible with the current code. It's
* probably a good idea anyway, but that would require updating the test
* inputs to something currently possible.
*/
if ((status != PCMK_EXEC_NOT_INSTALLED)
&& check_operation_expiry(rsc, node, rc, xml_op, data_set)) {
expired = true;
}
old_rc = rc;
old_target_rc = target_rc;
remap_operation(xml_op, rsc, node, data_set, on_fail, target_rc,
&rc, &status);
maskable_probe_failure = !pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op);
if (expired && maskable_probe_failure && old_rc != old_target_rc) {
if (rsc->role <= RSC_ROLE_STOPPED) {
rsc->role = RSC_ROLE_UNKNOWN;
}
goto done;
} else if (expired && (rc != target_rc)) {
const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
if (interval_ms == 0) {
crm_notice("Ignoring expired %s failure on %s "
CRM_XS " actual=%d expected=%d magic=%s",
task_key, pe__node_name(node), rc, target_rc, magic);
goto done;
} else if(node->details->online && node->details->unclean == FALSE) {
/* Reschedule the recurring monitor. schedule_cancel() won't work at
* this stage, so as a hacky workaround, forcibly change the restart
* digest so pcmk__check_action_config() does what we want later.
*
* @TODO We should skip this if there is a newer successful monitor.
* Also, this causes rescheduling only if the history entry
* has an op-digest (which the expire-non-blocked-failure
* scheduler regression test doesn't, but that may not be a
* realistic scenario in production).
*/
crm_notice("Rescheduling %s after failure expired on %s "
CRM_XS " actual=%d expected=%d magic=%s",
task_key, pe__node_name(node), rc, target_rc, magic);
crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
goto done;
}
}
if (maskable_probe_failure) {
crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
services_ocf_exitcode_str(old_rc), rsc->id,
pe__node_name(node));
update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure,
on_fail, data_set);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
record_failed_op(xml_op, node, rsc, data_set);
resource_location(parent, node, -INFINITY, "masked-probe-failure", data_set);
goto done;
}
switch (status) {
case PCMK_EXEC_CANCELLED:
// Should never happen
pe_err("Resource history contains cancellation '%s' "
"(%s of %s on %s at %s)",
ID(xml_op), task, rsc->id, pe__node_name(node),
last_change_str(xml_op));
goto done;
case PCMK_EXEC_PENDING:
if (!strcmp(task, CRMD_ACTION_START)) {
pe__set_resource_flags(rsc, pe_rsc_start_pending);
set_active(rsc);
} else if (!strcmp(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_PROMOTED;
} else if (!strcmp(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
/* If a pending migrate_to action is out on a unclean node,
* we have to force the stop action on the target. */
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
pe_node_t *target = pe_find_node(data_set->nodes, migrate_target);
if (target) {
stop_action(rsc, target, FALSE);
}
}
if (rsc->pending_task == NULL) {
if ((interval_ms != 0) || strcmp(task, CRMD_ACTION_STATUS)) {
rsc->pending_task = strdup(task);
rsc->pending_node = node;
} else {
/* Pending probes are not printed, even if pending
* operations are requested. If someone ever requests that
* behavior, enable the below and the corresponding part of
* native.c:native_pending_task().
*/
#if 0
rsc->pending_task = strdup("probe");
rsc->pending_node = node;
#endif
}
}
goto done;
case PCMK_EXEC_DONE:
pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s",
task, rsc->id, pe__node_name(node),
last_change_str(xml_op), ID(xml_op));
update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
goto done;
case PCMK_EXEC_NOT_INSTALLED:
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if (failure_strategy == action_fail_ignore) {
crm_warn("Cannot ignore failed %s of %s on %s: "
"Resource agent doesn't exist "
CRM_XS " status=%d rc=%d id=%s",
task, rsc->id, pe__node_name(node), status, rc,
ID(xml_op));
/* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
*on_fail = action_fail_migrate;
}
resource_location(parent, node, -INFINITY, "hard-error", data_set);
unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
goto done;
case PCMK_EXEC_NOT_CONNECTED:
if (pe__is_guest_or_remote_node(node)
&& pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_managed)) {
/* We should never get into a situation where a managed remote
* connection resource is considered OK but a resource action
* behind the connection gets a "not connected" status. But as a
* fail-safe in case a bug or unusual circumstances do lead to
* that, ensure the remote connection is considered failed.
*/
pe__set_resource_flags(node->details->remote_rsc,
pe_rsc_failed|pe_rsc_stop);
}
break; // Not done, do error handling
case PCMK_EXEC_ERROR:
case PCMK_EXEC_ERROR_HARD:
case PCMK_EXEC_ERROR_FATAL:
case PCMK_EXEC_TIMEOUT:
case PCMK_EXEC_NOT_SUPPORTED:
case PCMK_EXEC_INVALID:
break; // Not done, do error handling
case PCMK_EXEC_NO_FENCE_DEVICE:
case PCMK_EXEC_NO_SECRETS:
status = PCMK_EXEC_ERROR_HARD;
break; // Not done, do error handling
}
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if ((failure_strategy == action_fail_ignore)
|| (failure_strategy == action_fail_restart_container
&& !strcmp(task, CRMD_ACTION_STOP))) {
crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s "
"succeeded " CRM_XS " rc=%d id=%s",
task, services_ocf_exitcode_str(rc),
(*exit_reason? ": " : ""), exit_reason, rsc->id,
pe__node_name(node), last_change_str(xml_op), rc,
ID(xml_op));
update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure,
on_fail, data_set);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
pe__set_resource_flags(rsc, pe_rsc_failure_ignored);
record_failed_op(xml_op, node, rsc, data_set);
if ((failure_strategy == action_fail_restart_container)
&& cmp_on_fail(*on_fail, action_fail_recover) <= 0) {
*on_fail = failure_strategy;
}
} else {
unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail,
data_set);
if (status == PCMK_EXEC_ERROR_HARD) {
do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
"Preventing %s from restarting on %s because "
"of hard failure (%s%s%s)" CRM_XS " rc=%d id=%s",
parent->id, pe__node_name(node),
services_ocf_exitcode_str(rc),
(*exit_reason? ": " : ""), exit_reason,
rc, ID(xml_op));
resource_location(parent, node, -INFINITY, "hard-error", data_set);
} else if (status == PCMK_EXEC_ERROR_FATAL) {
crm_err("Preventing %s from restarting anywhere because "
"of fatal failure (%s%s%s) " CRM_XS " rc=%d id=%s",
parent->id, services_ocf_exitcode_str(rc),
(*exit_reason? ": " : ""), exit_reason,
rc, ID(xml_op));
resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
}
}
done:
pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s",
rsc->id, task, role2text(rsc->role),
role2text(rsc->next_role));
}
static void
add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite,
pe_working_set_t *data_set)
{
const char *cluster_name = NULL;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
strdup(node->details->id));
if (pcmk__str_eq(node->details->id, data_set->dc_uuid, pcmk__str_casei)) {
data_set->dc_node = node;
node->details->is_dc = TRUE;
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
} else {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
}
cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
if (cluster_name) {
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
strdup(cluster_name));
}
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data,
node->details->attrs, NULL, overwrite, data_set);
if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
const char *site_name = pe_node_attribute_raw(node, "site-name");
if (site_name) {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(site_name));
} else if (cluster_name) {
/* Default to cluster-name if unset */
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(cluster_name));
}
}
}
static GList *
extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
{
int counter = -1;
int stop_index = -1;
int start_index = -1;
xmlNode *rsc_op = NULL;
GList *gIter = NULL;
GList *op_list = NULL;
GList *sorted_op_list = NULL;
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = pcmk__xe_first_child(rsc_entry);
rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP,
pcmk__str_none)) {
crm_xml_add(rsc_op, "resource", rsc);
crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
/* create active recurring operations as optional */
if (active_filter == FALSE) {
return sorted_op_list;
}
op_list = NULL;
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
if (start_index < stop_index) {
crm_trace("Skipping %s: not active", ID(rsc_entry));
break;
} else if (counter < start_index) {
crm_trace("Skipping %s: old", ID(rsc_op));
continue;
}
op_list = g_list_append(op_list, rsc_op);
}
g_list_free(sorted_op_list);
return op_list;
}
GList *
find_operations(const char *rsc, const char *node, gboolean active_filter,
pe_working_set_t * data_set)
{
GList *output = NULL;
GList *intermediate = NULL;
xmlNode *tmp = NULL;
xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
pe_node_t *this_node = NULL;
xmlNode *node_state = NULL;
for (node_state = pcmk__xe_first_child(status); node_state != NULL;
node_state = pcmk__xe_next(node_state)) {
if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
continue;
}
this_node = pe_find_node(data_set->nodes, uname);
if(this_node == NULL) {
CRM_LOG_ASSERT(this_node != NULL);
continue;
} else if (pe__is_guest_or_remote_node(this_node)) {
determine_remote_online_status(data_set, this_node);
} else {
determine_online_status(node_state, this_node, data_set);
}
if (this_node->details->online
|| pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
/* offline nodes run no resources...
* unless stonith is enabled in which case we need to
* make sure rsc start events happen after the stonith
*/
xmlNode *lrm_rsc = NULL;
tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
for (lrm_rsc = pcmk__xe_first_child(tmp); lrm_rsc != NULL;
lrm_rsc = pcmk__xe_next(lrm_rsc)) {
if (pcmk__str_eq((const char *)lrm_rsc->name,
XML_LRM_TAG_RESOURCE, pcmk__str_none)) {
const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
continue;
}
intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
output = g_list_concat(output, intermediate);
}
}
}
}
}
return output;
}