diff --git a/cts/scheduler/scores/asymmetric.scores b/cts/scheduler/scores/asymmetric.scores index 69310bf9e8..93ed82f3ec 100644 --- a/cts/scheduler/scores/asymmetric.scores +++ b/cts/scheduler/scores/asymmetric.scores @@ -1,14 +1,13 @@ ebe3fb6e-7778-426e-be58-190ab1ff3dd3:0 promotion score on puma1: 5 ebe3fb6e-7778-426e-be58-190ab1ff3dd3:1 promotion score on puma3: 100 pcmk__clone_assign: ebe3fb6e-7778-426e-be58-190ab1ff3dd3:0 allocation score on puma1: 105 pcmk__clone_assign: ebe3fb6e-7778-426e-be58-190ab1ff3dd3:0 allocation score on puma3: 0 pcmk__clone_assign: ebe3fb6e-7778-426e-be58-190ab1ff3dd3:1 allocation score on puma1: 0 pcmk__clone_assign: ebe3fb6e-7778-426e-be58-190ab1ff3dd3:1 allocation score on puma3: 200 pcmk__clone_assign: ms_drbd_poolA allocation score on puma1: 0 pcmk__clone_assign: ms_drbd_poolA allocation score on puma3: 0 pcmk__primitive_assign: ebe3fb6e-7778-426e-be58-190ab1ff3dd3:0 allocation score on puma1: 105 pcmk__primitive_assign: ebe3fb6e-7778-426e-be58-190ab1ff3dd3:0 allocation score on puma3: -INFINITY pcmk__primitive_assign: ebe3fb6e-7778-426e-be58-190ab1ff3dd3:1 allocation score on puma1: 0 pcmk__primitive_assign: ebe3fb6e-7778-426e-be58-190ab1ff3dd3:1 allocation score on puma3: 200 -pcmk__primitive_assign: vpool_ip_poolA allocation score on puma3: -INFINITY diff --git a/doc/sphinx/Pacemaker_Explained/constraints.rst b/doc/sphinx/Pacemaker_Explained/constraints.rst index 75585c12d6..902f1d069e 100644 --- a/doc/sphinx/Pacemaker_Explained/constraints.rst +++ b/doc/sphinx/Pacemaker_Explained/constraints.rst @@ -1,1120 +1,1120 @@ .. index:: single: constraint single: resource; constraint .. _constraints: Resource Constraints -------------------- .. index:: single: resource; score single: node; score Scores ###### Scores of all kinds are integral to how the cluster works. Practically everything from moving a resource to deciding which resource to stop in a degraded cluster is achieved by manipulating scores in some way. Scores are calculated per resource and node. Any node with a negative score for a resource can't run that resource. The cluster places a resource on the node with the highest score for it. Infinity Math _____________ Pacemaker implements **INFINITY** (or equivalently, **+INFINITY**) internally as a score of 1,000,000. Addition and subtraction with it follow these three basic rules: * Any value + **INFINITY** = **INFINITY** * Any value - **INFINITY** = -**INFINITY** * **INFINITY** - **INFINITY** = **-INFINITY** .. note:: What if you want to use a score higher than 1,000,000? Typically this possibility arises when someone wants to base the score on some external metric that might go above 1,000,000. The short answer is you can't. The long answer is it is sometimes possible work around this limitation creatively. You may be able to set the score to some computed value based on the external metric rather than use the metric directly. For nodes, you can store the metric as a node attribute, and query the attribute when computing the score (possibly as part of a custom resource agent). .. _location-constraint: .. index:: single: location constraint single: constraint; location Deciding Which Nodes a Resource Can Run On ########################################## *Location constraints* tell the cluster which nodes a resource can run on. There are two alternative strategies. One way is to say that, by default, resources can run anywhere, and then the location constraints specify nodes that are not allowed (an *opt-out* cluster). The other way is to start with nothing able to run anywhere, and use location constraints to selectively enable allowed nodes (an *opt-in* cluster). Whether you should choose opt-in or opt-out depends on your personal preference and the make-up of your cluster. If most of your resources can run on most of the nodes, then an opt-out arrangement is likely to result in a simpler configuration. On the other-hand, if most resources can only run on a small subset of nodes, an opt-in configuration might be simpler. .. index:: pair: XML element; rsc_location single: constraint; rsc_location Location Properties ___________________ .. table:: **Attributes of a rsc_location Element** :class: longtable :widths: 1 1 4 +--------------------+---------+----------------------------------------------------------------------------------------------+ | Attribute | Default | Description | +====================+=========+==============================================================================================+ | id | | .. index:: | | | | single: rsc_location; attribute, id | | | | single: attribute; id (rsc_location) | | | | single: id; rsc_location attribute | | | | | | | | A unique name for the constraint (required) | +--------------------+---------+----------------------------------------------------------------------------------------------+ | rsc | | .. index:: | | | | single: rsc_location; attribute, rsc | | | | single: attribute; rsc (rsc_location) | | | | single: rsc; rsc_location attribute | | | | | | | | The name of the resource to which this constraint | | | | applies. A location constraint must either have a | | | | ``rsc``, have a ``rsc-pattern``, or contain at | | | | least one resource set. | +--------------------+---------+----------------------------------------------------------------------------------------------+ | rsc-pattern | | .. index:: | | | | single: rsc_location; attribute, rsc-pattern | | | | single: attribute; rsc-pattern (rsc_location) | | | | single: rsc-pattern; rsc_location attribute | | | | | | | | A pattern matching the names of resources to which | | | | this constraint applies. The syntax is the same as | | | | `POSIX `_ | | | | extended regular expressions, with the addition of an | | | | initial ``!`` indicating that resources *not* matching | | | | the pattern are selected. If the regular expression | | | | contains submatches, and the constraint is governed by | | | | a :ref:`rule `, the submatches can be | | | | referenced as ``%1`` through ``%9`` in the rule's | | | | ``score-attribute`` or a rule expression's ``attribute`` | | | | (see :ref:`s-rsc-pattern-rules`). A location constraint | | | | must either have a ``rsc``, have a ``rsc-pattern``, or | | | | contain at least one resource set. | +--------------------+---------+----------------------------------------------------------------------------------------------+ | node | | .. index:: | | | | single: rsc_location; attribute, node | | | | single: attribute; node (rsc_location) | | | | single: node; rsc_location attribute | | | | | | | | The name of the node to which this constraint applies. | | | | A location constraint must either have a ``node`` and | | | | ``score``, or contain at least one rule. | +--------------------+---------+----------------------------------------------------------------------------------------------+ | score | | .. index:: | | | | single: rsc_location; attribute, score | | | | single: attribute; score (rsc_location) | | | | single: score; rsc_location attribute | | | | | | | | Positive values indicate a preference for running the | | | | affected resource(s) on ``node`` -- the higher the value, | | | | the stronger the preference. Negative values indicate | | | | the resource(s) should avoid this node (a value of | | | | **-INFINITY** changes "should" to "must"). A location | | | | constraint must either have a ``node`` and ``score``, | | | | or contain at least one rule. | +--------------------+---------+----------------------------------------------------------------------------------------------+ | resource-discovery | always | .. index:: | | | | single: rsc_location; attribute, resource-discovery | | | | single: attribute; resource-discovery (rsc_location) | | | | single: resource-discovery; rsc_location attribute | | | | | | | | Whether Pacemaker should perform resource discovery | | | | (that is, check whether the resource is already running) | | | | for this resource on this node. This should normally be | | | | left as the default, so that rogue instances of a | | | | service can be stopped when they are running where they | | | | are not supposed to be. However, there are two | | | | situations where disabling resource discovery is a good | | | | idea: when a service is not installed on a node, | | | | discovery might return an error (properly written OCF | | | | agents will not, so this is usually only seen with other | | | | agent types); and when Pacemaker Remote is used to scale | | | | a cluster to hundreds of nodes, limiting resource | | | | discovery to allowed nodes can significantly boost | | | | performance. | | | | | | | | * ``always:`` Always perform resource discovery for | | | | the specified resource on this node. | | | | | | | | * ``never:`` Never perform resource discovery for the | | | | specified resource on this node. This option should | | | | generally be used with a -INFINITY score, although | | | | that is not strictly required. | | | | | | | | * ``exclusive:`` Perform resource discovery for the | | | | specified resource only on this node (and other nodes | | | | similarly marked as ``exclusive``). Multiple location | | | | constraints using ``exclusive`` discovery for the | | | | same resource across different nodes creates a subset | | | | of nodes resource-discovery is exclusive to. If a | | | | resource is marked for ``exclusive`` discovery on one | | | | or more nodes, that resource is only allowed to be | | | | placed within that subset of nodes. | +--------------------+---------+----------------------------------------------------------------------------------------------+ .. warning:: Setting ``resource-discovery`` to ``never`` or ``exclusive`` removes Pacemaker's ability to detect and stop unwanted instances of a service running where it's not supposed to be. It is up to the system administrator (you!) to make sure that the service can *never* be active on nodes without ``resource-discovery`` (such as by leaving the relevant software uninstalled). .. index:: single: Asymmetrical Clusters single: Opt-In Clusters Asymmetrical "Opt-In" Clusters ______________________________ To create an opt-in cluster, start by preventing resources from running anywhere by default: .. code-block:: none # crm_attribute --name symmetric-cluster --update false Then start enabling nodes. The following fragment says that the web server prefers **sles-1**, the database prefers **sles-2** and both can fail over to **sles-3** if their most preferred node fails. .. topic:: Opt-in location constraints for two resources .. code-block:: xml .. index:: single: Symmetrical Clusters single: Opt-Out Clusters Symmetrical "Opt-Out" Clusters ______________________________ To create an opt-out cluster, start by allowing resources to run anywhere by default: .. code-block:: none # crm_attribute --name symmetric-cluster --update true Then start disabling nodes. The following fragment is the equivalent of the above opt-in configuration. .. topic:: Opt-out location constraints for two resources .. code-block:: xml .. _node-score-equal: What if Two Nodes Have the Same Score _____________________________________ If two nodes have the same score, then the cluster will choose one. This choice may seem random and may not be what was intended, however the cluster was not given enough information to know any better. .. topic:: Constraints where a resource prefers two nodes equally .. code-block:: xml In the example above, assuming no other constraints and an inactive cluster, **Webserver** would probably be placed on **sles-1** and **Database** on **sles-2**. It would likely have placed **Webserver** based on the node's uname and **Database** based on the desire to spread the resource load evenly across the cluster. However other factors can also be involved in more complex configurations. .. _s-rsc-pattern: Specifying locations using pattern matching ___________________________________________ A location constraint can affect all resources whose IDs match a given pattern. The following example bans resources named **ip-httpd**, **ip-asterisk**, **ip-gateway**, etc., from **node1**. .. topic:: Location constraint banning all resources matching a pattern from one node .. code-block:: xml .. index:: single: constraint; ordering single: resource; start order .. _s-resource-ordering: Specifying the Order in which Resources Should Start/Stop ######################################################### *Ordering constraints* tell the cluster the order in which certain resource actions should occur. .. important:: Ordering constraints affect *only* the ordering of resource actions; they do *not* require that the resources be placed on the same node. If you want resources to be started on the same node *and* in a specific order, you need both an ordering constraint *and* a colocation constraint (see :ref:`s-resource-colocation`), or alternatively, a group (see :ref:`group-resources`). .. index:: pair: XML element; rsc_order pair: constraint; rsc_order Ordering Properties ___________________ .. table:: **Attributes of a rsc_order Element** :class: longtable :widths: 1 2 4 +--------------+----------------------------+-------------------------------------------------------------------+ | Field | Default | Description | +==============+============================+===================================================================+ | id | | .. index:: | | | | single: rsc_order; attribute, id | | | | single: attribute; id (rsc_order) | | | | single: id; rsc_order attribute | | | | | | | | A unique name for the constraint | +--------------+----------------------------+-------------------------------------------------------------------+ | first | | .. index:: | | | | single: rsc_order; attribute, first | | | | single: attribute; first (rsc_order) | | | | single: first; rsc_order attribute | | | | | | | | Name of the resource that the ``then`` resource | | | | depends on | +--------------+----------------------------+-------------------------------------------------------------------+ | then | | .. index:: | | | | single: rsc_order; attribute, then | | | | single: attribute; then (rsc_order) | | | | single: then; rsc_order attribute | | | | | | | | Name of the dependent resource | +--------------+----------------------------+-------------------------------------------------------------------+ | first-action | start | .. index:: | | | | single: rsc_order; attribute, first-action | | | | single: attribute; first-action (rsc_order) | | | | single: first-action; rsc_order attribute | | | | | | | | The action that the ``first`` resource must complete | | | | before ``then-action`` can be initiated for the ``then`` | | | | resource. Allowed values: ``start``, ``stop``, | | | | ``promote``, ``demote``. | +--------------+----------------------------+-------------------------------------------------------------------+ | then-action | value of ``first-action`` | .. index:: | | | | single: rsc_order; attribute, then-action | | | | single: attribute; then-action (rsc_order) | | | | single: first-action; rsc_order attribute | | | | | | | | The action that the ``then`` resource can execute only | | | | after the ``first-action`` on the ``first`` resource has | | | | completed. Allowed values: ``start``, ``stop``, | | | | ``promote``, ``demote``. | +--------------+----------------------------+-------------------------------------------------------------------+ | kind | Mandatory | .. index:: | | | | single: rsc_order; attribute, kind | | | | single: attribute; kind (rsc_order) | | | | single: kind; rsc_order attribute | | | | | | | | How to enforce the constraint. Allowed values: | | | | | | | | * ``Mandatory:`` ``then-action`` will never be initiated | | | | for the ``then`` resource unless and until ``first-action`` | | | | successfully completes for the ``first`` resource. | | | | | | | | * ``Optional:`` The constraint applies only if both specified | | | | resource actions are scheduled in the same transition | | | | (that is, in response to the same cluster state). This | | | | means that ``then-action`` is allowed on the ``then`` | | | | resource regardless of the state of the ``first`` resource, | | | | but if both actions happen to be scheduled at the same time, | | | | they will be ordered. | | | | | | | | * ``Serialize:`` Ensure that the specified actions are never | | | | performed concurrently for the specified resources. | | | | ``First-action`` and ``then-action`` can be executed in either | | | | order, but one must complete before the other can be initiated. | | | | An example use case is when resource start-up puts a high load | | | | on the host. | +--------------+----------------------------+-------------------------------------------------------------------+ | symmetrical | TRUE for ``Mandatory`` and | .. index:: | | | ``Optional`` kinds. FALSE | single: rsc_order; attribute, symmetrical | | | for ``Serialize`` kind. | single: attribute; symmetrical (rsc)order) | | | | single: symmetrical; rsc_order attribute | | | | | | | | If true, the reverse of the constraint applies for the | | | | opposite action (for example, if B starts after A starts, | | | | then B stops before A stops). ``Serialize`` orders cannot | | | | be symmetrical. | +--------------+----------------------------+-------------------------------------------------------------------+ ``Promote`` and ``demote`` apply to :ref:`promotable ` clone resources. Optional and mandatory ordering _______________________________ Here is an example of ordering constraints where **Database** *must* start before **Webserver**, and **IP** *should* start before **Webserver** if they both need to be started: .. topic:: Optional and mandatory ordering constraints .. code-block:: xml Because the above example lets ``symmetrical`` default to TRUE, **Webserver** must be stopped before **Database** can be stopped, and **Webserver** should be stopped before **IP** if they both need to be stopped. Symmetric and asymmetric ordering _________________________________ A mandatory symmetric ordering of "start A then start B" implies not only that the start actions must be ordered, but that B is not allowed to be active unless A is active. For example, if the ordering is added to the configuration when A is stopped (due to target-role, failure, etc.) and B is already active, then B will be stopped. By contrast, asymmetric ordering of "start A then start B" means the stops can occur in either order, which implies that B *can* remain active in the same situation. .. index:: single: colocation single: constraint; colocation single: resource; location relative to other resources .. _s-resource-colocation: Placing Resources Relative to other Resources ############################################# *Colocation constraints* tell the cluster that the location of one resource depends on the location of another one. Colocation has an important side-effect: it affects the order in which resources are assigned to a node. Think about it: You can't place A relative to B unless you know where B is [#]_. So when you are creating colocation constraints, it is important to consider whether you should colocate A with B, or B with A. .. important:: Colocation constraints affect *only* the placement of resources; they do *not* require that the resources be started in a particular order. If you want resources to be started on the same node *and* in a specific order, you need both an ordering constraint (see :ref:`s-resource-ordering`) *and* a colocation constraint, or alternatively, a group (see :ref:`group-resources`). .. index:: pair: XML element; rsc_colocation single: constraint; rsc_colocation Colocation Properties _____________________ .. table:: **Attributes of a rsc_colocation Constraint** :class: longtable :widths: 2 2 5 +----------------+----------------+--------------------------------------------------------+ | Field | Default | Description | +================+================+========================================================+ | id | | .. index:: | | | | single: rsc_colocation; attribute, id | | | | single: attribute; id (rsc_colocation) | | | | single: id; rsc_colocation attribute | | | | | | | | A unique name for the constraint (required). | +----------------+----------------+--------------------------------------------------------+ | rsc | | .. index:: | | | | single: rsc_colocation; attribute, rsc | | | | single: attribute; rsc (rsc_colocation) | | | | single: rsc; rsc_colocation attribute | | | | | | | | The name of a resource that should be located | | | | relative to ``with-rsc``. A colocation constraint must | | | | either contain at least one | | | | :ref:`resource set `, or specify both | | | | ``rsc`` and ``with-rsc``. | +----------------+----------------+--------------------------------------------------------+ | with-rsc | | .. index:: | | | | single: rsc_colocation; attribute, with-rsc | | | | single: attribute; with-rsc (rsc_colocation) | | | | single: with-rsc; rsc_colocation attribute | | | | | | | | The name of the resource used as the colocation | | | | target. The cluster will decide where to put this | | | | resource first and then decide where to put ``rsc``. | | | | A colocation constraint must either contain at least | | | | one :ref:`resource set `, or specify | | | | both ``rsc`` and ``with-rsc``. | +----------------+----------------+--------------------------------------------------------+ | node-attribute | #uname | .. index:: | | | | single: rsc_colocation; attribute, node-attribute | | | | single: attribute; node-attribute (rsc_colocation) | | | | single: node-attribute; rsc_colocation attribute | | | | | | | | If ``rsc`` and ``with-rsc`` are specified, this node | | | | attribute must be the same on the node running ``rsc`` | | | | and the node running ``with-rsc`` for the constraint | | | | to be satisfied. (For details, see | | | | :ref:`s-coloc-attribute`.) | +----------------+----------------+--------------------------------------------------------+ | score | 0 | .. index:: | | | | single: rsc_colocation; attribute, score | | | | single: attribute; score (rsc_colocation) | | | | single: score; rsc_colocation attribute | | | | | | | | Positive values indicate the resources should run on | | | | the same node. Negative values indicate the resources | | | | should run on different nodes. Values of | | | | +/- ``INFINITY`` change "should" to "must". | +----------------+----------------+--------------------------------------------------------+ | rsc-role | Started | .. index:: | | | | single: clone; ordering constraint, rsc-role | | | | single: ordering constraint; rsc-role (clone) | | | | single: rsc-role; clone ordering constraint | | | | | | | | If ``rsc`` and ``with-rsc`` are specified, and ``rsc`` | | | | is a :ref:`promotable clone `, | | | | the constraint applies only to ``rsc`` instances in | - | | | this role. Allowed values: ``Started``, ``Promoted``, | - | | | ``Unpromoted``. For details, see | + | | | this role. Allowed values: ``Started``, ``Stopped``, | + | | | ``Promoted``, ``Unpromoted``. For details, see | | | | :ref:`promotable-clone-constraints`. | +----------------+----------------+--------------------------------------------------------+ | with-rsc-role | Started | .. index:: | | | | single: clone; ordering constraint, with-rsc-role | | | | single: ordering constraint; with-rsc-role (clone) | | | | single: with-rsc-role; clone ordering constraint | | | | | | | | If ``rsc`` and ``with-rsc`` are specified, and | | | | ``with-rsc`` is a | | | | :ref:`promotable clone `, the | | | | constraint applies only to ``with-rsc`` instances in | - | | | this role. Allowed values: ``Started``, ``Promoted``, | - | | | ``Unpromoted``. For details, see | + | | | this role. Allowed values: ``Started``, ``Stopped``, | + | | | ``Promoted``, ``Unpromoted``. For details, see | | | | :ref:`promotable-clone-constraints`. | +----------------+----------------+--------------------------------------------------------+ | influence | value of | .. index:: | | | ``critical`` | single: rsc_colocation; attribute, influence | | | meta-attribute | single: attribute; influence (rsc_colocation) | | | for ``rsc`` | single: influence; rsc_colocation attribute | | | | | | | | Whether to consider the location preferences of | | | | ``rsc`` when ``with-rsc`` is already active. Allowed | | | | values: ``true``, ``false``. For details, see | | | | :ref:`s-coloc-influence`. *(since 2.1.0)* | +----------------+----------------+--------------------------------------------------------+ Mandatory Placement ___________________ Mandatory placement occurs when the constraint's score is **+INFINITY** or **-INFINITY**. In such cases, if the constraint can't be satisfied, then the **rsc** resource is not permitted to run. For ``score=INFINITY``, this includes cases where the ``with-rsc`` resource is not active. If you need resource **A** to always run on the same machine as resource **B**, you would add the following constraint: .. topic:: Mandatory colocation constraint for two resources .. code-block:: xml Remember, because **INFINITY** was used, if **B** can't run on any of the cluster nodes (for whatever reason) then **A** will not be allowed to run. Whether **A** is running or not has no effect on **B**. Alternatively, you may want the opposite -- that **A** *cannot* run on the same machine as **B**. In this case, use ``score="-INFINITY"``. .. topic:: Mandatory anti-colocation constraint for two resources .. code-block:: xml Again, by specifying **-INFINITY**, the constraint is binding. So if the only place left to run is where **B** already is, then **A** may not run anywhere. As with **INFINITY**, **B** can run even if **A** is stopped. However, in this case **A** also can run if **B** is stopped, because it still meets the constraint of **A** and **B** not running on the same node. Advisory Placement __________________ If mandatory placement is about "must" and "must not", then advisory placement is the "I'd prefer if" alternative. For colocation constraints with scores greater than **-INFINITY** and less than **INFINITY**, the cluster will try to accommodate your wishes, but may ignore them if other factors outweigh the colocation score. Those factors might include other constraints, resource stickiness, failure thresholds, whether other resources would be prevented from being active, etc. .. topic:: Advisory colocation constraint for two resources .. code-block:: xml .. _s-coloc-attribute: Colocation by Node Attribute ____________________________ The ``node-attribute`` property of a colocation constraints allows you to express the requirement, "these resources must be on similar nodes". As an example, imagine that you have two Storage Area Networks (SANs) that are not controlled by the cluster, and each node is connected to one or the other. You may have two resources **r1** and **r2** such that **r2** needs to use the same SAN as **r1**, but doesn't necessarily have to be on the same exact node. In such a case, you could define a :ref:`node attribute ` named **san**, with the value **san1** or **san2** on each node as appropriate. Then, you could colocate **r2** with **r1** using ``node-attribute`` set to **san**. .. _s-coloc-influence: Colocation Influence ____________________ By default, if A is colocated with B, the cluster will take into account A's preferences when deciding where to place B, to maximize the chance that both resources can run. For a detailed look at exactly how this occurs, see `Colocation Explained `_. However, if ``influence`` is set to ``false`` in the colocation constraint, this will happen only if B is inactive and needing to be started. If B is already active, A's preferences will have no effect on placing B. An example of what effect this would have and when it would be desirable would be a nonessential reporting tool colocated with a resource-intensive service that takes a long time to start. If the reporting tool fails enough times to reach its migration threshold, by default the cluster will want to move both resources to another node if possible. Setting ``influence`` to ``false`` on the colocation constraint would mean that the reporting tool would be stopped in this situation instead, to avoid forcing the service to move. The ``critical`` resource meta-attribute is a convenient way to specify the default for all colocation constraints and groups involving a particular resource. .. note:: If a noncritical resource is a member of a group, all later members of the group will be treated as noncritical, even if they are marked as (or left to default to) critical. .. _s-resource-sets: Resource Sets ############# .. index:: single: constraint; resource set single: resource; resource set *Resource sets* allow multiple resources to be affected by a single constraint. .. topic:: A set of 3 resources .. code-block:: xml Resource sets are valid inside ``rsc_location``, ``rsc_order`` (see :ref:`s-resource-sets-ordering`), ``rsc_colocation`` (see :ref:`s-resource-sets-colocation`), and ``rsc_ticket`` (see :ref:`ticket-constraints`) constraints. A resource set has a number of properties that can be set, though not all have an effect in all contexts. .. index:: pair: XML element; resource_set .. table:: **Attributes of a resource_set Element** :class: longtable :widths: 2 2 5 +-------------+------------------+--------------------------------------------------------+ | Field | Default | Description | +=============+==================+========================================================+ | id | | .. index:: | | | | single: resource_set; attribute, id | | | | single: attribute; id (resource_set) | | | | single: id; resource_set attribute | | | | | | | | A unique name for the set (required) | +-------------+------------------+--------------------------------------------------------+ | sequential | true | .. index:: | | | | single: resource_set; attribute, sequential | | | | single: attribute; sequential (resource_set) | | | | single: sequential; resource_set attribute | | | | | | | | Whether the members of the set must be acted on in | | | | order. Meaningful within ``rsc_order`` and | | | | ``rsc_colocation``. | +-------------+------------------+--------------------------------------------------------+ | require-all | true | .. index:: | | | | single: resource_set; attribute, require-all | | | | single: attribute; require-all (resource_set) | | | | single: require-all; resource_set attribute | | | | | | | | Whether all members of the set must be active before | | | | continuing. With the current implementation, the | | | | cluster may continue even if only one member of the | | | | set is started, but if more than one member of the set | | | | is starting at the same time, the cluster will still | | | | wait until all of those have started before continuing | | | | (this may change in future versions). Meaningful | | | | within ``rsc_order``. | +-------------+------------------+--------------------------------------------------------+ | role | | .. index:: | | | | single: resource_set; attribute, role | | | | single: attribute; role (resource_set) | | | | single: role; resource_set attribute | | | | | | | | The constraint applies only to resource set members | | | | that are :ref:`s-resource-promotable` in this | | | | role. Meaningful within ``rsc_location``, | | | | ``rsc_colocation`` and ``rsc_ticket``. | | | | Allowed values: ``Started``, ``Promoted``, | | | | ``Unpromoted``. For details, see | | | | :ref:`promotable-clone-constraints`. | +-------------+------------------+--------------------------------------------------------+ | action | value of | .. index:: | | | ``first-action`` | single: resource_set; attribute, action | | | in the enclosing | single: attribute; action (resource_set) | | | ordering | single: action; resource_set attribute | | | constraint | | | | | The action that applies to *all members* of the set. | | | | Meaningful within ``rsc_order``. Allowed values: | | | | ``start``, ``stop``, ``promote``, ``demote``. | +-------------+------------------+--------------------------------------------------------+ | score | | .. index:: | | | | single: resource_set; attribute, score | | | | single: attribute; score (resource_set) | | | | single: score; resource_set attribute | | | | | | | | *Advanced use only.* Use a specific score for this | | | | set within the constraint. | +-------------+------------------+--------------------------------------------------------+ .. _s-resource-sets-ordering: Ordering Sets of Resources ########################## A common situation is for an administrator to create a chain of ordered resources, such as: .. topic:: A chain of ordered resources .. code-block:: xml .. topic:: Visual representation of the four resources' start order for the above constraints .. image:: images/resource-set.png :alt: Ordered set Ordered Set ___________ To simplify this situation, :ref:`s-resource-sets` can be used within ordering constraints: .. topic:: A chain of ordered resources expressed as a set .. code-block:: xml While the set-based format is not less verbose, it is significantly easier to get right and maintain. .. important:: If you use a higher-level tool, pay attention to how it exposes this functionality. Depending on the tool, creating a set **A B** may be equivalent to **A then B**, or **B then A**. Ordering Multiple Sets ______________________ The syntax can be expanded to allow sets of resources to be ordered relative to each other, where the members of each individual set may be ordered or unordered (controlled by the ``sequential`` property). In the example below, **A** and **B** can both start in parallel, as can **C** and **D**, however **C** and **D** can only start once *both* **A** *and* **B** are active. .. topic:: Ordered sets of unordered resources .. code-block:: xml .. topic:: Visual representation of the start order for two ordered sets of unordered resources .. image:: images/two-sets.png :alt: Two ordered sets Of course either set -- or both sets -- of resources can also be internally ordered (by setting ``sequential="true"``) and there is no limit to the number of sets that can be specified. .. topic:: Advanced use of set ordering - Three ordered sets, two of which are internally unordered .. code-block:: xml .. topic:: Visual representation of the start order for the three sets defined above .. image:: images/three-sets.png :alt: Three ordered sets .. important:: An ordered set with ``sequential=false`` makes sense only if there is another set in the constraint. Otherwise, the constraint has no effect. Resource Set OR Logic _____________________ The unordered set logic discussed so far has all been "AND" logic. To illustrate this take the 3 resource set figure in the previous section. Those sets can be expressed, **(A and B) then (C) then (D) then (E and F)**. Say for example we want to change the first set, **(A and B)**, to use "OR" logic so the sets look like this: **(A or B) then (C) then (D) then (E and F)**. This functionality can be achieved through the use of the ``require-all`` option. This option defaults to TRUE which is why the "AND" logic is used by default. Setting ``require-all=false`` means only one resource in the set needs to be started before continuing on to the next set. .. topic:: Resource Set "OR" logic: Three ordered sets, where the first set is internally unordered with "OR" logic .. code-block:: xml .. important:: An ordered set with ``require-all=false`` makes sense only in conjunction with ``sequential=false``. Think of it like this: ``sequential=false`` modifies the set to be an unordered set using "AND" logic by default, and adding ``require-all=false`` flips the unordered set's "AND" logic to "OR" logic. .. _s-resource-sets-colocation: Colocating Sets of Resources ############################ Another common situation is for an administrator to create a set of colocated resources. The simplest way to do this is to define a resource group (see :ref:`group-resources`), but that cannot always accurately express the desired relationships. For example, maybe the resources do not need to be ordered. Another way would be to define each relationship as an individual constraint, but that causes a difficult-to-follow constraint explosion as the number of resources and combinations grow. .. topic:: Colocation chain as individual constraints, where A is placed first, then B, then C, then D .. code-block:: xml To express complicated relationships with a simplified syntax [#]_, :ref:`resource sets ` can be used within colocation constraints. .. topic:: Equivalent colocation chain expressed using **resource_set** .. code-block:: xml .. note:: Within a ``resource_set``, the resources are listed in the order they are *placed*, which is the reverse of the order in which they are *colocated*. In the above example, resource **A** is placed before resource **B**, which is the same as saying resource **B** is colocated with resource **A**. As with individual constraints, a resource that can't be active prevents any resource that must be colocated with it from being active. In both of the two previous examples, if **B** is unable to run, then both **C** and by inference **D** must remain stopped. .. important:: If you use a higher-level tool, pay attention to how it exposes this functionality. Depending on the tool, creating a set **A B** may be equivalent to **A with B**, or **B with A**. Resource sets can also be used to tell the cluster that entire *sets* of resources must be colocated relative to each other, while the individual members within any one set may or may not be colocated relative to each other (determined by the set's ``sequential`` property). In the following example, resources **B**, **C**, and **D** will each be colocated with **A** (which will be placed first). **A** must be able to run in order for any of the resources to run, but any of **B**, **C**, or **D** may be stopped without affecting any of the others. .. topic:: Using colocated sets to specify a shared dependency .. code-block:: xml .. note:: Pay close attention to the order in which resources and sets are listed. While the members of any one sequential set are placed first to last (i.e., the colocation dependency is last with first), multiple sets are placed last to first (i.e. the colocation dependency is first with last). .. important:: A colocated set with ``sequential="false"`` makes sense only if there is another set in the constraint. Otherwise, the constraint has no effect. There is no inherent limit to the number and size of the sets used. The only thing that matters is that in order for any member of one set in the constraint to be active, all members of sets listed after it must also be active (and naturally on the same node); and if a set has ``sequential="true"``, then in order for one member of that set to be active, all members listed before it must also be active. If desired, you can restrict the dependency to instances of promotable clone resources that are in a specific role, using the set's ``role`` property. .. topic:: Colocation in which the members of the middle set have no interdependencies, and the last set listed applies only to promoted instances .. code-block:: xml .. topic:: Visual representation of the above example (resources are placed from left to right) .. image:: ../shared/images/pcmk-colocated-sets.png :alt: Colocation chain .. note:: Unlike ordered sets, colocated sets do not use the ``require-all`` option. External Resource Dependencies ############################## Sometimes, a resource will depend on services that are not managed by the cluster. An example might be a resource that requires a file system that is not managed by the cluster but mounted by systemd at boot time. To accommodate this, the pacemaker systemd service depends on a normally empty target called ``resource-agents-deps.target``. The system administrator may create a unit drop-in for that target specifying the dependencies, to ensure that the services are started before Pacemaker starts and stopped after Pacemaker stops. Typically, this is accomplished by placing a unit file in the ``/etc/systemd/system/resource-agents-deps.target.d`` directory, with directives such as ``Requires`` and ``After`` specifying the dependencies as needed. .. [#] While the human brain is sophisticated enough to read the constraint in any order and choose the correct one depending on the situation, the cluster is not quite so smart. Yet. .. [#] which is not the same as saying easy to follow diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index d1157ae3ce..69bd3607de 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -1,762 +1,751 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PE_INTERNAL__H # define PE_INTERNAL__H # include # include # include # include # include # include # include # include # include const char *pe__resource_description(const pe_resource_t *rsc, uint32_t show_opts); enum pe__clone_flags { // Whether instances should be started sequentially pe__clone_ordered = (1 << 0), // Whether promotion scores have been added pe__clone_promotion_added = (1 << 1), // Whether promotion constraints have been added pe__clone_promotion_constrained = (1 << 2), }; bool pe__clone_is_ordered(const pe_resource_t *clone); int pe__set_clone_flag(pe_resource_t *clone, enum pe__clone_flags flag); enum pe__group_flags { pe__group_ordered = (1 << 0), // Members start sequentially pe__group_colocated = (1 << 1), // Members must be on same node }; bool pe__group_flag_is_set(const pe_resource_t *group, uint32_t flags); pe_resource_t *pe__last_group_member(const pe_resource_t *group); # define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "", fmt, ##args) # define pe_err(fmt...) do { \ was_processing_error = TRUE; \ pcmk__config_err(fmt); \ } while (0) # define pe_warn(fmt...) do { \ was_processing_warning = TRUE; \ pcmk__config_warn(fmt); \ } while (0) # define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } # define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } #define pe__set_working_set_flags(working_set, flags_to_set) do { \ (working_set)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Working set", crm_system_name, \ (working_set)->flags, (flags_to_set), #flags_to_set); \ } while (0) #define pe__clear_working_set_flags(working_set, flags_to_clear) do { \ (working_set)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Working set", crm_system_name, \ (working_set)->flags, (flags_to_clear), #flags_to_clear); \ } while (0) #define pe__set_resource_flags(resource, flags_to_set) do { \ (resource)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Resource", (resource)->id, (resource)->flags, \ (flags_to_set), #flags_to_set); \ } while (0) #define pe__clear_resource_flags(resource, flags_to_clear) do { \ (resource)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Resource", (resource)->id, (resource)->flags, \ (flags_to_clear), #flags_to_clear); \ } while (0) #define pe__set_action_flags(action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_action_flags(action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define pe__set_raw_action_flags(action_flags, action_name, flags_to_set) do { \ action_flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action", action_name, \ (action_flags), \ (flags_to_set), #flags_to_set); \ } while (0) #define pe__clear_raw_action_flags(action_flags, action_name, flags_to_clear) do { \ action_flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", action_name, \ (action_flags), \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define pe__set_action_flags_as(function, line, action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as((function), (line), \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_action_flags_as(function, line, action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as((function), (line), \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define pe__set_order_flags(order_flags, flags_to_set) do { \ order_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Ordering", "constraint", \ order_flags, (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_order_flags(order_flags, flags_to_clear) do { \ order_flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Ordering", "constraint", \ order_flags, (flags_to_clear), \ #flags_to_clear); \ } while (0) // Some warnings we don't want to print every transition enum pe_warn_once_e { pe_wo_blind = (1 << 0), pe_wo_restart_type = (1 << 1), pe_wo_role_after = (1 << 2), pe_wo_poweroff = (1 << 3), pe_wo_require_all = (1 << 4), pe_wo_order_score = (1 << 5), pe_wo_neg_threshold = (1 << 6), pe_wo_remove_after = (1 << 7), pe_wo_ping_node = (1 << 8), pe_wo_order_inst = (1 << 9), pe_wo_coloc_inst = (1 << 10), pe_wo_group_order = (1 << 11), pe_wo_group_coloc = (1 << 12), pe_wo_upstart = (1 << 13), pe_wo_nagios = (1 << 14), pe_wo_set_ordering = (1 << 15), }; extern uint32_t pe_wo; #define pe_warn_once(pe_wo_bit, fmt...) do { \ if (!pcmk_is_set(pe_wo, pe_wo_bit)) { \ if (pe_wo_bit == pe_wo_blind) { \ crm_warn(fmt); \ } else { \ pe_warn(fmt); \ } \ pe_wo = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Warn-once", "logging", pe_wo, \ (pe_wo_bit), #pe_wo_bit); \ } \ } while (0); typedef struct pe__location_constraint_s { char *id; // Constraint XML ID pe_resource_t *rsc_lh; // Resource being located enum rsc_role_e role_filter; // Role to locate enum pe_discover_e discover_mode; // Resource discovery GList *node_list_rh; // List of pe_node_t* } pe__location_t; typedef struct pe__order_constraint_s { int id; uint32_t flags; // Group of enum pe_ordering flags void *lh_opaque; pe_resource_t *lh_rsc; pe_action_t *lh_action; char *lh_action_task; void *rh_opaque; pe_resource_t *rh_rsc; pe_action_t *rh_action; char *rh_action_task; } pe__ordering_t; const pe_resource_t *pe__const_top_resource(const pe_resource_t *rsc, bool include_bundle); int pe__clone_max(const pe_resource_t *clone); int pe__clone_node_max(const pe_resource_t *clone); int pe__clone_promoted_max(const pe_resource_t *clone); int pe__clone_promoted_node_max(const pe_resource_t *clone); void pe__create_clone_notifications(pe_resource_t *clone); void pe__free_clone_notification_data(pe_resource_t *clone); void pe__create_clone_notif_pseudo_ops(pe_resource_t *clone, pe_action_t *start, pe_action_t *started, pe_action_t *stop, pe_action_t *stopped); pe_action_t *pe__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task, bool optional, bool runnable); void pe__create_promotable_pseudo_ops(pe_resource_t *clone, bool any_promoting, bool any_demoting); bool pe_can_fence(const pe_working_set_t *data_set, const pe_node_t *node); void add_hash_param(GHashTable * hash, const char *name, const char *value); /*! * \internal * \enum pe__rsc_node * \brief Type of resource location lookup to perform */ enum pe__rsc_node { pe__rsc_node_assigned = 0, //!< Where resource is assigned pe__rsc_node_current = 1, //!< Where resource is running // @COMPAT: Use in native_location() at a compatibility break pe__rsc_node_pending = 2, //!< Where resource is pending }; char *native_parameter(pe_resource_t * rsc, pe_node_t * node, gboolean create, const char *name, pe_working_set_t * data_set); pe_node_t *native_location(const pe_resource_t *rsc, GList **list, int current); void pe_metadata(pcmk__output_t *out); void verify_pe_options(GHashTable * options); void native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set, gboolean failed); gboolean native_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean group_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set); pe_resource_t *native_find_rsc(pe_resource_t *rsc, const char *id, const pe_node_t *node, int flags); gboolean native_active(pe_resource_t * rsc, gboolean all); gboolean group_active(pe_resource_t * rsc, gboolean all); gboolean clone_active(pe_resource_t * rsc, gboolean all); gboolean pe__bundle_active(pe_resource_t *rsc, gboolean all); //! \deprecated This function will be removed in a future release void native_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); //! \deprecated This function will be removed in a future release void group_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); //! \deprecated This function will be removed in a future release void clone_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); //! \deprecated This function will be removed in a future release void pe__print_bundle(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); gchar *pcmk__native_output_string(const pe_resource_t *rsc, const char *name, const pe_node_t *node, uint32_t show_opts, const char *target_role, bool show_nodes); int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name , size_t pairs_count, ...); char *pe__node_display_name(pe_node_t *node, bool print_detail); // Clone notifications (pe_notif.c) void pe__order_notifs_after_fencing(const pe_action_t *action, pe_resource_t *rsc, pe_action_t *stonith_op); static inline const char * pe__rsc_bool_str(const pe_resource_t *rsc, uint64_t rsc_flag) { return pcmk__btoa(pcmk_is_set(rsc->flags, rsc_flag)); } int pe__clone_xml(pcmk__output_t *out, va_list args); int pe__clone_default(pcmk__output_t *out, va_list args); int pe__group_xml(pcmk__output_t *out, va_list args); int pe__group_default(pcmk__output_t *out, va_list args); int pe__bundle_xml(pcmk__output_t *out, va_list args); int pe__bundle_html(pcmk__output_t *out, va_list args); int pe__bundle_text(pcmk__output_t *out, va_list args); int pe__node_html(pcmk__output_t *out, va_list args); int pe__node_text(pcmk__output_t *out, va_list args); int pe__node_xml(pcmk__output_t *out, va_list args); int pe__resource_xml(pcmk__output_t *out, va_list args); int pe__resource_html(pcmk__output_t *out, va_list args); int pe__resource_text(pcmk__output_t *out, va_list args); void native_free(pe_resource_t * rsc); void group_free(pe_resource_t * rsc); void clone_free(pe_resource_t * rsc); void pe__free_bundle(pe_resource_t *rsc); enum rsc_role_e native_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e group_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e clone_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e pe__bundle_resource_state(const pe_resource_t *rsc, gboolean current); void pe__count_common(pe_resource_t *rsc); void pe__count_bundle(pe_resource_t *rsc); void common_free(pe_resource_t * rsc); pe_node_t *pe__copy_node(const pe_node_t *this_node); extern time_t get_effective_time(pe_working_set_t * data_set); /* Failure handling utilities (from failcounts.c) */ // bit flags for fail count handling options enum pe_fc_flags_e { pe_fc_default = (1 << 0), pe_fc_effective = (1 << 1), // don't count expired failures pe_fc_fillers = (1 << 2), // if container, include filler failures in count }; int pe_get_failcount(const pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op); pe_action_t *pe__clear_failcount(pe_resource_t *rsc, const pe_node_t *node, const char *reason, pe_working_set_t *data_set); /* Functions for finding/counting a resource's active nodes */ bool pe__count_active_node(const pe_resource_t *rsc, pe_node_t *node, pe_node_t **active, unsigned int *count_all, unsigned int *count_clean); pe_node_t *pe__find_active_requires(const pe_resource_t *rsc, unsigned int *count); static inline pe_node_t * pe__current_node(const pe_resource_t *rsc) { return (rsc == NULL)? NULL : rsc->fns->active_node(rsc, NULL, NULL); } /* Binary like operators for lists of nodes */ -extern void node_list_exclude(GHashTable * list, GList *list2, gboolean merge_scores); - GHashTable *pe__node_list2table(const GList *list); -static inline gpointer -pe_hash_table_lookup(GHashTable * hash, gconstpointer key) -{ - if (hash) { - return g_hash_table_lookup(hash, key); - } - return NULL; -} - extern pe_action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set); extern gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order); void pe__show_node_scores_as(const char *file, const char *function, int line, bool to_log, const pe_resource_t *rsc, const char *comment, GHashTable *nodes, pe_working_set_t *data_set); #define pe__show_node_scores(level, rsc, text, nodes, data_set) \ pe__show_node_scores_as(__FILE__, __func__, __LINE__, \ (level), (rsc), (text), (nodes), (data_set)) xmlNode *find_rsc_op_entry(const pe_resource_t *rsc, const char *key); pe_action_t *custom_action(pe_resource_t *rsc, char *key, const char *task, const pe_node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set); # define delete_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DELETE, 0) # define delete_action(rsc, node, optional) custom_action( \ rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \ optional, TRUE, rsc->cluster); # define stopped_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOPPED, 0) # define stopped_action(rsc, node, optional) custom_action( \ rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \ optional, TRUE, rsc->cluster); # define stop_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOP, 0) # define stop_action(rsc, node, optional) custom_action( \ rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \ optional, TRUE, rsc->cluster); # define reload_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_RELOAD_AGENT, 0) # define start_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_START, 0) # define start_action(rsc, node, optional) custom_action( \ rsc, start_key(rsc), CRMD_ACTION_START, node, \ optional, TRUE, rsc->cluster) # define started_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STARTED, 0) # define started_action(rsc, node, optional) custom_action( \ rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \ optional, TRUE, rsc->cluster) # define promote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTE, 0) # define promote_action(rsc, node, optional) custom_action( \ rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \ optional, TRUE, rsc->cluster) # define promoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTED, 0) # define promoted_action(rsc, node, optional) custom_action( \ rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \ optional, TRUE, rsc->cluster) # define demote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTE, 0) # define demote_action(rsc, node, optional) custom_action( \ rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \ optional, TRUE, rsc->cluster) # define demoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTED, 0) # define demoted_action(rsc, node, optional) custom_action( \ rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \ optional, TRUE, rsc->cluster) extern int pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set); pe_action_t *find_first_action(const GList *input, const char *uuid, const char *task, const pe_node_t *on_node); enum action_tasks get_complex_task(const pe_resource_t *rsc, const char *name); extern GList *find_actions(GList *input, const char *key, const pe_node_t *on_node); GList *find_actions_exact(GList *input, const char *key, const pe_node_t *on_node); GList *pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node); extern void pe_free_action(pe_action_t * action); void resource_location(pe_resource_t *rsc, const pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set); extern int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default); extern gint sort_op_by_callid(gconstpointer a, gconstpointer b); gboolean get_target_role(const pe_resource_t *rsc, enum rsc_role_e *role); void pe__set_next_role(pe_resource_t *rsc, enum rsc_role_e role, const char *why); pe_resource_t *find_clone_instance(const pe_resource_t *rsc, const char *sub_id); extern void destroy_ticket(gpointer data); extern pe_ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set); // Resources for manipulating resource names const char *pe_base_name_end(const char *id); char *clone_strip(const char *last_rsc_id); char *clone_zero(const char *last_rsc_id); static inline bool pe_base_name_eq(const pe_resource_t *rsc, const char *id) { if (id && rsc && rsc->id) { // Number of characters in rsc->id before any clone suffix size_t base_len = pe_base_name_end(rsc->id) - rsc->id + 1; return (strlen(id) == base_len) && !strncmp(id, rsc->id, base_len); } return false; } int pe__target_rc_from_xml(const xmlNode *xml_op); gint pe__cmp_node_name(gconstpointer a, gconstpointer b); bool is_set_recursive(const pe_resource_t *rsc, long long flag, bool any); enum rsc_digest_cmp_val { /*! Digests are the same */ RSC_DIGEST_MATCH = 0, /*! Params that require a restart changed */ RSC_DIGEST_RESTART, /*! Some parameter changed. */ RSC_DIGEST_ALL, /*! rsc op didn't have a digest associated with it, so * it is unknown if parameters changed or not. */ RSC_DIGEST_UNKNOWN, }; typedef struct op_digest_cache_s { enum rsc_digest_cmp_val rc; xmlNode *params_all; xmlNode *params_secure; xmlNode *params_restart; char *digest_all_calc; char *digest_secure_calc; char *digest_restart_calc; } op_digest_cache_t; op_digest_cache_t *pe__calculate_digests(pe_resource_t *rsc, const char *task, guint *interval_ms, const pe_node_t *node, const xmlNode *xml_op, GHashTable *overrides, bool calc_secure, pe_working_set_t *data_set); void pe__free_digests(gpointer ptr); op_digest_cache_t *rsc_action_digest_cmp(pe_resource_t *rsc, const xmlNode *xml_op, pe_node_t *node, pe_working_set_t *data_set); pe_action_t *pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set); void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set); char *pe__action2reason(const pe_action_t *action, enum pe_action_flags flag); void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite); void pe__add_action_expected_result(pe_action_t *action, int expected_result); void pe__set_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags); void pe__clear_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags); void pe__clear_resource_flags_on_all(pe_working_set_t *data_set, uint64_t flag); gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref); //! \deprecated This function will be removed in a future release void print_rscs_brief(GList *rsc_list, const char * pre_text, long options, void * print_data, gboolean print_all); int pe__rscs_brief_output(pcmk__output_t *out, GList *rsc_list, unsigned int options); void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay); pe_node_t *pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); //! \deprecated This function will be removed in a future release void common_print(pe_resource_t *rsc, const char *pre_text, const char *name, const pe_node_t *node, long options, void *print_data); int pe__common_output_text(pcmk__output_t *out, const pe_resource_t *rsc, const char *name, const pe_node_t *node, unsigned int options); int pe__common_output_html(pcmk__output_t *out, const pe_resource_t *rsc, const char *name, const pe_node_t *node, unsigned int options); //! A single instance of a bundle typedef struct { int offset; //!< 0-origin index of this instance in bundle char *ipaddr; //!< IP address associated with this instance pe_node_t *node; //!< Node created for this instance pe_resource_t *ip; //!< IP address resource for ipaddr pe_resource_t *child; //!< Instance of bundled resource pe_resource_t *container; //!< Container associated with this instance pe_resource_t *remote; //!< Pacemaker Remote connection into container } pe__bundle_replica_t; GList *pe__bundle_containers(const pe_resource_t *bundle); int pe__bundle_max(const pe_resource_t *rsc); bool pe__node_is_bundle_instance(const pe_resource_t *bundle, const pe_node_t *node); pe_resource_t *pe__bundled_resource(const pe_resource_t *rsc); const pe_resource_t *pe__get_rsc_in_container(const pe_resource_t *instance); pe_resource_t *pe__first_container(const pe_resource_t *bundle); void pe__foreach_bundle_replica(pe_resource_t *bundle, bool (*fn)(pe__bundle_replica_t *, void *), void *user_data); void pe__foreach_const_bundle_replica(const pe_resource_t *bundle, bool (*fn)(const pe__bundle_replica_t *, void *), void *user_data); pe_resource_t *pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node); bool pe__bundle_needs_remote_name(pe_resource_t *rsc); const char *pe__add_bundle_remote_name(pe_resource_t *rsc, pe_working_set_t *data_set, xmlNode *xml, const char *field); const char *pe_node_attribute_calculated(const pe_node_t *node, const char *name, const pe_resource_t *rsc, enum pe__rsc_node node_type); const char *pe_node_attribute_raw(const pe_node_t *node, const char *name); bool pe__is_universal_clone(const pe_resource_t *rsc, const pe_working_set_t *data_set); void pe__add_param_check(const xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set); void pe__foreach_param_check(pe_working_set_t *data_set, void (*cb)(pe_resource_t*, pe_node_t*, const xmlNode*, enum pe_check_parameters)); void pe__free_param_checks(pe_working_set_t *data_set); bool pe__shutdown_requested(const pe_node_t *node); void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set); /*! * \internal * \brief Register xml formatting message functions. * * \param[in,out] out Output object to register messages with */ void pe__register_messages(pcmk__output_t *out); void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set); bool pe__resource_is_disabled(const pe_resource_t *rsc); pe_action_t *pe__clear_resource_history(pe_resource_t *rsc, const pe_node_t *node, pe_working_set_t *data_set); GList *pe__rscs_with_tag(pe_working_set_t *data_set, const char *tag_name); GList *pe__unames_with_tag(pe_working_set_t *data_set, const char *tag_name); bool pe__rsc_has_tag(pe_working_set_t *data_set, const char *rsc, const char *tag); bool pe__uname_has_tag(pe_working_set_t *data_set, const char *node, const char *tag); bool pe__rsc_running_on_only(const pe_resource_t *rsc, const pe_node_t *node); bool pe__rsc_running_on_any(pe_resource_t *rsc, GList *node_list); GList *pe__filter_rsc_list(GList *rscs, GList *filter); GList * pe__build_node_name_list(pe_working_set_t *data_set, const char *s); GList * pe__build_rsc_list(pe_working_set_t *data_set, const char *s); bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GList *only_node); gboolean pe__bundle_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); gboolean pe__clone_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); gboolean pe__group_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); gboolean pe__native_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); xmlNode *pe__failed_probe_for_rsc(const pe_resource_t *rsc, const char *name); const char *pe__clone_child_id(const pe_resource_t *rsc); int pe__sum_node_health_scores(const pe_node_t *node, int base_health); int pe__node_health(pe_node_t *node); static inline enum pcmk__health_strategy pe__health_strategy(pe_working_set_t *data_set) { return pcmk__parse_health_strategy(pe_pref(data_set->config_hash, PCMK__OPT_NODE_HEALTH_STRATEGY)); } static inline int pe__health_score(const char *option, pe_working_set_t *data_set) { return char2score(pe_pref(data_set->config_hash, option)); } /*! * \internal * \brief Return a string suitable for logging as a node name * * \param[in] node Node to return a node name string for * * \return Node name if available, otherwise node ID if available, * otherwise "unspecified node" if node is NULL or "unidentified node" * if node has neither a name nor ID. */ static inline const char * pe__node_name(const pe_node_t *node) { if (node == NULL) { return "unspecified node"; } else if (node->details->uname != NULL) { return node->details->uname; } else if (node->details->id != NULL) { return node->details->id; } else { return "unidentified node"; } } /*! * \internal * \brief Check whether two node objects refer to the same node * * \param[in] node1 First node object to compare * \param[in] node2 Second node object to compare * * \return true if \p node1 and \p node2 refer to the same node */ static inline bool pe__same_node(const pe_node_t *node1, const pe_node_t *node2) { return (node1 != NULL) && (node2 != NULL) && (node1->details == node2->details); } /*! * \internal * \brief Get the operation key from an action history entry * * \param[in] xml Action history entry * * \return Entry's operation key */ static inline const char * pe__xe_history_key(const xmlNode *xml) { if (xml == NULL) { return NULL; } else { /* @COMPAT Pacemaker <= 1.1.5 did not add the key, and used the ID * instead. Checking for that allows us to process old saved CIBs, * including some regression tests. */ const char *key = crm_element_value(xml, XML_LRM_ATTR_TASK_KEY); return pcmk__str_empty(key)? ID(xml) : key; } } #endif diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h index 44bfb99e68..0973226f64 100644 --- a/lib/pacemaker/libpacemaker_private.h +++ b/lib/pacemaker/libpacemaker_private.h @@ -1,1083 +1,1090 @@ /* * Copyright 2021-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__LIBPACEMAKER_PRIVATE__H # define PCMK__LIBPACEMAKER_PRIVATE__H /* This header is for the sole use of libpacemaker, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // pe_action_t, pe_node_t, pe_working_set_t #include // pe__location_t // Colocation flags enum pcmk__coloc_flags { pcmk__coloc_none = 0U, // Primary is affected even if already active pcmk__coloc_influence = (1U << 0), // Colocation was explicitly configured in CIB pcmk__coloc_explicit = (1U << 1), }; // Flags to modify the behavior of add_colocated_node_scores() enum pcmk__coloc_select { // With no other flags, apply all "with this" colocations pcmk__coloc_select_default = 0, // Apply "this with" colocations instead of "with this" colocations pcmk__coloc_select_this_with = (1 << 0), // Apply only colocations with non-negative scores pcmk__coloc_select_nonnegative = (1 << 1), // Apply only colocations with at least one matching node pcmk__coloc_select_active = (1 << 2), }; // Flags the update_ordered_actions() method can return enum pcmk__updated { pcmk__updated_none = 0, // Nothing changed pcmk__updated_first = (1 << 0), // First action was updated pcmk__updated_then = (1 << 1), // Then action was updated }; #define pcmk__set_updated_flags(au_flags, action, flags_to_set) do { \ au_flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action update", \ (action)->uuid, au_flags, \ (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_updated_flags(au_flags, action, flags_to_clear) do { \ au_flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action update", \ (action)->uuid, au_flags, \ (flags_to_clear), #flags_to_clear); \ } while (0) // Resource assignment methods struct resource_alloc_functions_s { /*! * \internal * \brief Assign a resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t *(*assign)(pe_resource_t *rsc, const pe_node_t *prefer); /*! * \internal * \brief Create all actions needed for a given resource * * \param[in,out] rsc Resource to create actions for */ void (*create_actions)(pe_resource_t *rsc); /*! * \internal * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool (*create_probe)(pe_resource_t *rsc, pe_node_t *node); /*! * \internal * \brief Create implicit constraints needed for a resource * * \param[in,out] rsc Resource to create implicit constraints for */ void (*internal_constraints)(pe_resource_t *rsc); /*! * \internal * \brief Apply a colocation's score to node scores or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node scores (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void (*apply_coloc_score)(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); /*! * \internal * \brief Create list of all resources in colocations with a given resource * * Given a resource, create a list of all resources involved in mandatory * colocations with it, whether directly or via chained colocations. * * \param[in] rsc Resource to add to colocated list * \param[in] orig_rsc Resource originally requested * \param[in,out] colocated_rscs Existing list * * \return List of given resource and all resources involved in colocations * * \note This function is recursive; top-level callers should pass NULL as * \p colocated_rscs and \p orig_rsc, and the desired resource as * \p rsc. The recursive calls will use other values. */ GList *(*colocated_resources)(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *colocated_rscs); /*! * \internal * \brief Add colocations affecting a resource as primary to a list * * Given a resource being assigned (\p orig_rsc) and a resource somewhere in * its chain of ancestors (\p rsc, which may be \p orig_rsc), get * colocations that affect the ancestor as primary and should affect the * resource, and add them to a given list. * * \param[in] rsc Resource whose colocations should be added * \param[in] orig_rsc Affected resource (\p rsc or a descendant) * \param[in,out] list List of colocations to add to * * \note All arguments should be non-NULL. * \note The pcmk__with_this_colocations() wrapper should usually be used * instead of using this method directly. */ void (*with_this_colocations)(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); /*! * \internal * \brief Add colocations affecting a resource as dependent to a list * * Given a resource being assigned (\p orig_rsc) and a resource somewhere in * its chain of ancestors (\p rsc, which may be \p orig_rsc), get * colocations that affect the ancestor as dependent and should affect the * resource, and add them to a given list. * * * \param[in] rsc Resource whose colocations should be added * \param[in] orig_rsc Affected resource (\p rsc or a descendant) * \param[in,out] list List of colocations to add to * * \note All arguments should be non-NULL. * \note The pcmk__this_with_colocations() wrapper should usually be used * instead of using this method directly. */ void (*this_with_colocations)(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); /*! * \internal * \brief Update nodes with scores of colocated resources' nodes * * Given a table of nodes and a resource, update the nodes' scores with the * scores of the best nodes matching the attribute used for each of the * resource's relevant colocations. * * \param[in,out] rsc Resource to check colocations for * \param[in] log_id Resource ID for logs (if NULL, use \p rsc ID) * \param[in,out] nodes Nodes to update (set initial contents to NULL * to copy \p rsc's allowed nodes) * \param[in] colocation Original colocation constraint (used to get * configured primary resource's stickiness, and * to get colocation node attribute; if NULL, * \p rsc's own matching node scores will not be * added, and *nodes must be NULL as well) * \param[in] factor Incorporate scores multiplied by this factor * \param[in] flags Bitmask of enum pcmk__coloc_select values * * \note NULL *nodes, NULL colocation, and the pcmk__coloc_select_this_with * flag are used together (and only by cmp_resources()). * \note The caller remains responsible for freeing \p *nodes. */ void (*add_colocated_node_scores)(pe_resource_t *rsc, const char *log_id, GHashTable **nodes, pcmk__colocation_t *colocation, float factor, uint32_t flags); /*! * \internal * \brief Apply a location constraint to a resource's allowed node scores * * \param[in,out] rsc Resource to apply constraint to * \param[in,out] location Location constraint to apply */ void (*apply_location)(pe_resource_t *rsc, pe__location_t *location); /*! * \internal * \brief Return action flags for a given resource action * * \param[in,out] action Action to get flags for * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node * \note For primitives, this will be the same as action->flags regardless * of node. For collective resources, the flags can differ due to * multiple instances possibly being involved. */ uint32_t (*action_flags)(pe_action_t *action, const pe_node_t *node); /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two actions, update the actions' * flags (and runnable_before members if appropriate) as appropriate for the * ordering. Effects may cascade to other orderings involving the actions as * well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this * node (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates * (may include pe_action_optional to affect only * mandatory actions, and pe_action_runnable to * affect only runnable actions) * \param[in] type Group of enum pe_ordering flags to apply * \param[in,out] data_set Cluster working set * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t (*update_ordered_actions)(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); /*! * \internal * \brief Output a summary of scheduled actions for a resource * * \param[in,out] rsc Resource to output actions for */ void (*output_actions)(pe_resource_t *rsc); /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void (*add_actions_to_graph)(pe_resource_t *rsc); /*! * \internal * \brief Add meta-attributes relevant to transition graph actions to XML * * If a given resource supports variant-specific meta-attributes that are * needed for transition graph actions, add them to a given XML element. * * \param[in] rsc Resource whose meta-attributes should be added * \param[in,out] xml Transition graph action attributes XML to add to */ void (*add_graph_meta)(const pe_resource_t *rsc, xmlNode *xml); /*! * \internal * \brief Add a resource's utilization to a table of utilization values * * This function is used when summing the utilization of a resource and all * resources colocated with it, to determine whether a node has sufficient * capacity. Given a resource and a table of utilization values, it will add * the resource's utilization to the existing values, if the resource has * not yet been assigned to a node. * * \param[in] rsc Resource with utilization to add * \param[in] orig_rsc Resource being assigned (for logging only) * \param[in] all_rscs List of all resources that will be summed * \param[in,out] utilization Table of utilization values to add to */ void (*add_utilization)(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); /*! * \internal * \brief Apply a shutdown lock for a resource, if appropriate * * \param[in,out] rsc Resource to check for shutdown lock */ void (*shutdown_lock)(pe_resource_t *rsc); }; // Actions (pcmk_sched_actions.c) G_GNUC_INTERNAL void pcmk__update_action_for_orderings(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL uint32_t pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__log_action(const char *pre_text, const pe_action_t *action, bool details); G_GNUC_INTERNAL pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name, guint interval_ms, const pe_node_t *node); G_GNUC_INTERNAL pe_action_t *pcmk__new_shutdown_action(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__action_locks_rsc_to_node(const pe_action_t *action); G_GNUC_INTERNAL void pcmk__deduplicate_action_inputs(pe_action_t *action); G_GNUC_INTERNAL void pcmk__output_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node, const xmlNode *xml_op); G_GNUC_INTERNAL void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set); // Recurring actions (pcmk_sched_recurring.c) G_GNUC_INTERNAL void pcmk__create_recurring_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id, const char *task, guint interval_ms, const pe_node_t *node, const char *reason); G_GNUC_INTERNAL void pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task, guint interval_ms, pe_node_t *node); G_GNUC_INTERNAL bool pcmk__action_is_recurring(const pe_action_t *action); // Producing transition graphs (pcmk_graph_producer.c) G_GNUC_INTERNAL bool pcmk__graph_has_loop(const pe_action_t *init_action, const pe_action_t *action, pe_action_wrapper_t *input); G_GNUC_INTERNAL void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__create_graph(pe_working_set_t *data_set); // Fencing (pcmk_sched_fencing.c) G_GNUC_INTERNAL void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order); G_GNUC_INTERNAL void pcmk__fence_guest(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__node_unfenced(const pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data); // Injected scheduler inputs (pcmk_sched_injections.c) void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib, const pcmk_injections_t *injections); // Constraints of any type (pcmk_sched_constraints.c) G_GNUC_INTERNAL pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id); G_GNUC_INTERNAL xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj, const pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__valid_resource_or_tag(const pe_working_set_t *data_set, const char *id, pe_resource_t **rsc, pe_tag_t **tag); G_GNUC_INTERNAL bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, bool convert_rsc, const pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_internal_constraints(pe_working_set_t *data_set); // Location constraints G_GNUC_INTERNAL void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc, int node_score, const char *discover_mode, pe_node_t *foo_node); G_GNUC_INTERNAL void pcmk__apply_locations(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_location(pe_resource_t *rsc, pe__location_t *constraint); // Colocation constraints (pcmk_sched_colocation.c) enum pcmk__coloc_affects { pcmk__coloc_affects_nothing = 0, pcmk__coloc_affects_location, pcmk__coloc_affects_role, }; G_GNUC_INTERNAL enum pcmk__coloc_affects pcmk__colocation_affects(const pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool preview); G_GNUC_INTERNAL void pcmk__apply_coloc_to_scores(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id, GHashTable **nodes, pcmk__colocation_t *colocation, float factor, uint32_t flags); G_GNUC_INTERNAL void pcmk__add_dependent_scores(gpointer data, gpointer user_data); +G_GNUC_INTERNAL +void pcmk__colocation_intersect_nodes(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + const GList *primary_nodes, + bool merge_scores); + G_GNUC_INTERNAL void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation, const pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__add_this_with_list(GList **list, GList *addition, const pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation, const pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__add_with_this_list(GList **list, GList *addition, const pe_resource_t *rsc); G_GNUC_INTERNAL GList *pcmk__with_this_colocations(const pe_resource_t *rsc); G_GNUC_INTERNAL GList *pcmk__this_with_colocations(const pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *dependent, pe_resource_t *primary, const char *dependent_role, const char *primary_role, uint32_t flags); G_GNUC_INTERNAL void pcmk__block_colocation_dependents(pe_action_t *action); /*! * \internal * \brief Check whether colocation's dependent preferences should be considered * * \param[in] colocation Colocation constraint * \param[in] rsc Primary instance (normally this will be * colocation->primary, which NULL will be treated as, * but for clones or bundles with multiple instances * this can be a particular instance) * * \return true if colocation influence should be effective, otherwise false */ static inline bool pcmk__colocation_has_influence(const pcmk__colocation_t *colocation, const pe_resource_t *rsc) { if (rsc == NULL) { rsc = colocation->primary; } /* A bundle replica colocates its remote connection with its container, * using a finite score so that the container can run on Pacemaker Remote * nodes. * * Moving a connection is lightweight and does not interrupt the service, * while moving a container is heavyweight and does interrupt the service, * so don't move a clean, active container based solely on the preferences * of its connection. * * This also avoids problematic scenarios where two containers want to * perpetually swap places. */ if (pcmk_is_set(colocation->dependent->flags, pe_rsc_allow_remote_remotes) && !pcmk_is_set(rsc->flags, pe_rsc_failed) && pcmk__list_of_1(rsc->running_on)) { return false; } /* The dependent in a colocation influences the primary's location * if the influence option is true or the primary is not yet active. */ return pcmk_is_set(colocation->flags, pcmk__coloc_influence) || (rsc->running_on == NULL); } // Ordering constraints (pcmk_sched_ordering.c) G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task, pe_action_t *first_action, pe_resource_t *then_rsc, char *then_task, pe_action_t *then_action, uint32_t flags, pe_working_set_t *sched); G_GNUC_INTERNAL void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__disable_invalid_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op); G_GNUC_INTERNAL void pcmk__apply_orderings(pe_working_set_t *sched); G_GNUC_INTERNAL void pcmk__order_after_each(pe_action_t *after, GList *list); /*! * \internal * \brief Create a new ordering between two resource actions * * \param[in,out] first_rsc Resource for 'first' action * \param[in,out] first_task Action key for 'first' action * \param[in] then_rsc Resource for 'then' action * \param[in,out] then_task Action key for 'then' action * \param[in] flags Bitmask of enum pe_ordering flags */ #define pcmk__order_resource_actions(first_rsc, first_task, \ then_rsc, then_task, flags) \ pcmk__new_ordering((first_rsc), \ pcmk__op_key((first_rsc)->id, (first_task), 0), \ NULL, \ (then_rsc), \ pcmk__op_key((then_rsc)->id, (then_task), 0), \ NULL, (flags), (first_rsc)->cluster) #define pcmk__order_starts(rsc1, rsc2, flags) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \ (rsc2), CRMD_ACTION_START, (flags)) #define pcmk__order_stops(rsc1, rsc2, flags) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \ (rsc2), CRMD_ACTION_STOP, (flags)) // Ticket constraints (pcmk_sched_tickets.c) G_GNUC_INTERNAL void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set); // Promotable clone resources (pcmk_sched_promotable.c) G_GNUC_INTERNAL void pcmk__add_promotion_scores(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__require_promotion_tickets(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__set_instance_roles(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__create_promotable_actions(pe_resource_t *clone); G_GNUC_INTERNAL void pcmk__promotable_restart_ordering(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__order_promotable_instances(pe_resource_t *clone); G_GNUC_INTERNAL void pcmk__update_dependent_with_promotable(const pe_resource_t *primary, pe_resource_t *dependent, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__update_promotable_dependent_priority(const pe_resource_t *primary, pe_resource_t *dependent, const pcmk__colocation_t *colocation); // Pacemaker Remote nodes (pcmk_sched_remote.c) G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(const pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_remote_connection_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_corresponds_to_guest(const pe_resource_t *rsc, const pe_node_t *node); G_GNUC_INTERNAL pe_node_t *pcmk__connection_host_for_action(const pe_action_t *action); G_GNUC_INTERNAL void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params); G_GNUC_INTERNAL void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action); // Primitives (pcmk_sched_primitive.c) G_GNUC_INTERNAL pe_node_t *pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer); G_GNUC_INTERNAL void pcmk__primitive_create_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__primitive_internal_constraints(pe_resource_t *rsc); G_GNUC_INTERNAL uint32_t pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node); G_GNUC_INTERNAL void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__with_primitive_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); G_GNUC_INTERNAL void pcmk__primitive_with_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); G_GNUC_INTERNAL void pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional); G_GNUC_INTERNAL void pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); G_GNUC_INTERNAL void pcmk__primitive_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); G_GNUC_INTERNAL void pcmk__primitive_shutdown_lock(pe_resource_t *rsc); // Groups (pcmk_sched_group.c) G_GNUC_INTERNAL pe_node_t *pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer); G_GNUC_INTERNAL void pcmk__group_create_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__group_internal_constraints(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__group_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__with_group_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); G_GNUC_INTERNAL void pcmk__group_with_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); G_GNUC_INTERNAL void pcmk__group_add_colocated_node_scores(pe_resource_t *rsc, const char *log_id, GHashTable **nodes, pcmk__colocation_t *colocation, float factor, uint32_t flags); G_GNUC_INTERNAL void pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location); G_GNUC_INTERNAL uint32_t pcmk__group_action_flags(pe_action_t *action, const pe_node_t *node); G_GNUC_INTERNAL uint32_t pcmk__group_update_ordered_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); G_GNUC_INTERNAL GList *pcmk__group_colocated_resources(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL void pcmk__group_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); G_GNUC_INTERNAL void pcmk__group_shutdown_lock(pe_resource_t *rsc); // Clones (pcmk_sched_clone.c) G_GNUC_INTERNAL pe_node_t *pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer); G_GNUC_INTERNAL void pcmk__clone_create_actions(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__clone_create_probe(pe_resource_t *rsc, pe_node_t *node); G_GNUC_INTERNAL void pcmk__clone_internal_constraints(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__clone_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__with_clone_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); G_GNUC_INTERNAL void pcmk__clone_with_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); G_GNUC_INTERNAL void pcmk__clone_apply_location(pe_resource_t *rsc, pe__location_t *constraint); G_GNUC_INTERNAL uint32_t pcmk__clone_action_flags(pe_action_t *action, const pe_node_t *node); G_GNUC_INTERNAL void pcmk__clone_add_actions_to_graph(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__clone_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); G_GNUC_INTERNAL void pcmk__clone_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); G_GNUC_INTERNAL void pcmk__clone_shutdown_lock(pe_resource_t *rsc); // Bundles (pcmk_sched_bundle.c) G_GNUC_INTERNAL pe_node_t *pcmk__bundle_assign(pe_resource_t *rsc, const pe_node_t *prefer); G_GNUC_INTERNAL void pcmk__bundle_create_actions(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node); G_GNUC_INTERNAL void pcmk__bundle_internal_constraints(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__with_bundle_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); G_GNUC_INTERNAL void pcmk__bundle_with_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list); G_GNUC_INTERNAL void pcmk__bundle_apply_location(pe_resource_t *rsc, pe__location_t *constraint); G_GNUC_INTERNAL uint32_t pcmk__bundle_action_flags(pe_action_t *action, const pe_node_t *node); G_GNUC_INTERNAL void pcmk__output_bundle_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__bundle_add_actions_to_graph(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__bundle_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); G_GNUC_INTERNAL void pcmk__bundle_shutdown_lock(pe_resource_t *rsc); // Clone instances or bundle replica containers (pcmk_sched_instances.c) G_GNUC_INTERNAL void pcmk__assign_instances(pe_resource_t *collective, GList *instances, int max_total, int max_per_node); G_GNUC_INTERNAL void pcmk__create_instance_actions(pe_resource_t *rsc, GList *instances); G_GNUC_INTERNAL bool pcmk__instance_matches(const pe_resource_t *instance, const pe_node_t *node, enum rsc_role_e role, bool current); G_GNUC_INTERNAL pe_resource_t *pcmk__find_compatible_instance(const pe_resource_t *match_rsc, const pe_resource_t *rsc, enum rsc_role_e role, bool current); G_GNUC_INTERNAL uint32_t pcmk__instance_update_ordered_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); G_GNUC_INTERNAL uint32_t pcmk__collective_action_flags(pe_action_t *action, const GList *instances, const pe_node_t *node); G_GNUC_INTERNAL void pcmk__add_collective_constraints(GList **list, const pe_resource_t *instance, const pe_resource_t *collective, bool with_this); // Injections (pcmk_injections.c) G_GNUC_INTERNAL xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid); G_GNUC_INTERNAL xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node, bool up); G_GNUC_INTERNAL xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *lrm_name, const char *rclass, const char *rtype, const char *rprovider); G_GNUC_INTERNAL void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *task, guint interval_ms, int rc); G_GNUC_INTERNAL xmlNode *pcmk__inject_action_result(xmlNode *cib_resource, lrmd_event_data_t *op, int target_rc); // Nodes (pcmk_sched_nodes.c) G_GNUC_INTERNAL bool pcmk__node_available(const pe_node_t *node, bool consider_score, bool consider_guest); G_GNUC_INTERNAL bool pcmk__any_node_available(GHashTable *nodes); G_GNUC_INTERNAL GHashTable *pcmk__copy_node_table(GHashTable *nodes); G_GNUC_INTERNAL void pcmk__copy_node_tables(const pe_resource_t *rsc, GHashTable **copy); G_GNUC_INTERNAL void pcmk__restore_node_tables(pe_resource_t *rsc, GHashTable *backup); G_GNUC_INTERNAL GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node); G_GNUC_INTERNAL void pcmk__apply_node_health(pe_working_set_t *data_set); G_GNUC_INTERNAL pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc, const pe_node_t *node); // Functions applying to more than one variant (pcmk_sched_resource.c) G_GNUC_INTERNAL void pcmk__set_assignment_methods(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_entry, bool active_on_node); G_GNUC_INTERNAL GList *pcmk__rscs_matching_id(const char *id, const pe_working_set_t *data_set); G_GNUC_INTERNAL GList *pcmk__colocated_resources(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL void pcmk__noop_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); G_GNUC_INTERNAL void pcmk__output_resource_actions(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); G_GNUC_INTERNAL void pcmk__unassign_resource(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__threshold_reached(pe_resource_t *rsc, const pe_node_t *node, pe_resource_t **failed); G_GNUC_INTERNAL void pcmk__sort_resources(pe_working_set_t *data_set); G_GNUC_INTERNAL gint pcmk__cmp_instance(gconstpointer a, gconstpointer b); G_GNUC_INTERNAL gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b); // Functions related to probes (pcmk_sched_probes.c) G_GNUC_INTERNAL bool pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_probes(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__probe_resource_list(GList *rscs, pe_node_t *node); G_GNUC_INTERNAL void pcmk__schedule_probes(pe_working_set_t *data_set); // Functions related to live migration (pcmk_sched_migration.c) void pcmk__create_migration_actions(pe_resource_t *rsc, const pe_node_t *current); void pcmk__abort_dangling_migration(void *data, void *user_data); bool pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current); void pcmk__order_migration_equivalents(pe__ordering_t *order); // Functions related to node utilization (pcmk_sched_utilization.c) G_GNUC_INTERNAL int pcmk__compare_node_capacities(const pe_node_t *node1, const pe_node_t *node2); G_GNUC_INTERNAL void pcmk__consume_node_capacity(GHashTable *current_utilization, const pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__release_node_capacity(GHashTable *current_utilization, const pe_resource_t *rsc); G_GNUC_INTERNAL const pe_node_t *pcmk__ban_insufficient_capacity(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__create_utilization_constraints(pe_resource_t *rsc, const GList *allowed_nodes); G_GNUC_INTERNAL void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set); #endif // PCMK__LIBPACEMAKER_PRIVATE__H diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c index dd64e49a0b..f13e477d6f 100644 --- a/lib/pacemaker/pcmk_sched_bundle.c +++ b/lib/pacemaker/pcmk_sched_bundle.c @@ -1,941 +1,941 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Assign a single bundle replica's resources (other than container) * * \param[in,out] replica Replica to assign * \param[in] user_data Preferred node, if any * * \return true (to indicate that any further replicas should be processed) */ static bool assign_replica(pe__bundle_replica_t *replica, void *user_data) { pe_node_t *container_host = NULL; const pe_node_t *prefer = user_data; const pe_resource_t *bundle = pe__const_top_resource(replica->container, true); if (replica->ip != NULL) { pe_rsc_trace(bundle, "Assigning bundle %s IP %s", bundle->id, replica->ip->id); replica->ip->cmds->assign(replica->ip, prefer); } container_host = replica->container->allocated_to; if (replica->remote != NULL) { if (pe__is_guest_or_remote_node(container_host)) { /* REMOTE_CONTAINER_HACK: "Nested" connection resources must be on * the same host because Pacemaker Remote only supports a single * active connection. */ pcmk__new_colocation("#replica-remote-with-host-remote", NULL, INFINITY, replica->remote, container_host->details->remote_rsc, NULL, NULL, pcmk__coloc_influence); } pe_rsc_trace(bundle, "Assigning bundle %s connection %s", bundle->id, replica->remote->id); replica->remote->cmds->assign(replica->remote, prefer); } if (replica->child != NULL) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, replica->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (!pe__same_node(node, replica->node)) { node->weight = -INFINITY; } else if (!pcmk__threshold_reached(replica->child, node, NULL)) { node->weight = INFINITY; } } pe__set_resource_flags(replica->child->parent, pe_rsc_allocating); pe_rsc_trace(bundle, "Assigning bundle %s replica child %s", bundle->id, replica->child->id); replica->child->cmds->assign(replica->child, replica->node); pe__clear_resource_flags(replica->child->parent, pe_rsc_allocating); } return true; } /*! * \internal * \brief Assign a bundle resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t * pcmk__bundle_assign(pe_resource_t *rsc, const pe_node_t *prefer) { GList *containers = NULL; pe_resource_t *bundled_resource = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container)); pe_rsc_trace(rsc, "Assigning bundle %s", rsc->id); pe__set_resource_flags(rsc, pe_rsc_allocating); pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); // Assign all containers first, so we know what nodes the bundle will be on containers = g_list_sort(pe__bundle_containers(rsc), pcmk__cmp_instance); pcmk__assign_instances(rsc, containers, pe__bundle_max(rsc), rsc->fns->max_per_node(rsc)); g_list_free(containers); // Then assign remaining replica resources pe__foreach_bundle_replica(rsc, assign_replica, (void *) prefer); // Finally, assign the bundled resources to each bundle node bundled_resource = pe__bundled_resource(rsc); if (bundled_resource != NULL) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, bundled_resource->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (pe__node_is_bundle_instance(rsc, node)) { node->weight = 0; } else { node->weight = -INFINITY; } } bundled_resource->cmds->assign(bundled_resource, prefer); } pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional); return NULL; } /*! * \internal * \brief Create actions for a bundle replica's resources (other than child) * * \param[in,out] replica Replica to create actions for * \param[in] user_data Unused * * \return true (to indicate that any further replicas should be processed) */ static bool create_replica_actions(pe__bundle_replica_t *replica, void *user_data) { if (replica->ip != NULL) { replica->ip->cmds->create_actions(replica->ip); } if (replica->container != NULL) { replica->container->cmds->create_actions(replica->container); } if (replica->remote != NULL) { replica->remote->cmds->create_actions(replica->remote); } return true; } /*! * \internal * \brief Create all actions needed for a given bundle resource * * \param[in,out] rsc Bundle resource to create actions for */ void pcmk__bundle_create_actions(pe_resource_t *rsc) { pe_action_t *action = NULL; GList *containers = NULL; pe_resource_t *bundled_resource = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container)); pe__foreach_bundle_replica(rsc, create_replica_actions, NULL); containers = pe__bundle_containers(rsc); pcmk__create_instance_actions(rsc, containers); g_list_free(containers); bundled_resource = pe__bundled_resource(rsc); if (bundled_resource != NULL) { bundled_resource->cmds->create_actions(bundled_resource); if (pcmk_is_set(bundled_resource->flags, pe_rsc_promotable)) { pe__new_rsc_pseudo_action(rsc, RSC_PROMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, RSC_PROMOTED, true, true); action->priority = INFINITY; pe__new_rsc_pseudo_action(rsc, RSC_DEMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, RSC_DEMOTED, true, true); action->priority = INFINITY; } } } /*! * \internal * \brief Create internal constraints for a bundle replica's resources * * \param[in,out] replica Replica to create internal constraints for * \param[in,out] user_data Replica's parent bundle * * \return true (to indicate that any further replicas should be processed) */ static bool replica_internal_constraints(pe__bundle_replica_t *replica, void *user_data) { pe_resource_t *bundle = user_data; replica->container->cmds->internal_constraints(replica->container); // Start bundle -> start replica container pcmk__order_starts(bundle, replica->container, pe_order_runnable_left|pe_order_implies_first_printed); // Stop bundle -> stop replica child and container if (replica->child != NULL) { pcmk__order_stops(bundle, replica->child, pe_order_implies_first_printed); } pcmk__order_stops(bundle, replica->container, pe_order_implies_first_printed); // Start replica container -> bundle is started pcmk__order_resource_actions(replica->container, RSC_START, bundle, RSC_STARTED, pe_order_implies_then_printed); // Stop replica container -> bundle is stopped pcmk__order_resource_actions(replica->container, RSC_STOP, bundle, RSC_STOPPED, pe_order_implies_then_printed); if (replica->ip != NULL) { replica->ip->cmds->internal_constraints(replica->ip); // Replica IP address -> replica container (symmetric) pcmk__order_starts(replica->ip, replica->container, pe_order_runnable_left|pe_order_preserve); pcmk__order_stops(replica->container, replica->ip, pe_order_implies_first|pe_order_preserve); pcmk__new_colocation("#ip-with-container", NULL, INFINITY, replica->ip, replica->container, NULL, NULL, pcmk__coloc_influence); } if (replica->remote != NULL) { /* This handles ordering and colocating remote relative to container * (via "#resource-with-container"). Since IP is also ordered and * colocated relative to the container, we don't need to do anything * explicit here with IP. */ replica->remote->cmds->internal_constraints(replica->remote); } if (replica->child != NULL) { CRM_ASSERT(replica->remote != NULL); // "Start remote then child" is implicit in scheduler's remote logic } return true; } /*! * \internal * \brief Create implicit constraints needed for a bundle resource * * \param[in,out] rsc Bundle resource to create implicit constraints for */ void pcmk__bundle_internal_constraints(pe_resource_t *rsc) { pe_resource_t *bundled_resource = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container)); pe__foreach_bundle_replica(rsc, replica_internal_constraints, rsc); bundled_resource = pe__bundled_resource(rsc); if (bundled_resource == NULL) { return; } // Start bundle -> start bundled clone pcmk__order_resource_actions(rsc, RSC_START, bundled_resource, RSC_START, pe_order_implies_first_printed); // Bundled clone is started -> bundle is started pcmk__order_resource_actions(bundled_resource, RSC_STARTED, rsc, RSC_STARTED, pe_order_implies_then_printed); // Stop bundle -> stop bundled clone pcmk__order_resource_actions(rsc, RSC_STOP, bundled_resource, RSC_STOP, pe_order_implies_first_printed); // Bundled clone is stopped -> bundle is stopped pcmk__order_resource_actions(bundled_resource, RSC_STOPPED, rsc, RSC_STOPPED, pe_order_implies_then_printed); bundled_resource->cmds->internal_constraints(bundled_resource); if (!pcmk_is_set(bundled_resource->flags, pe_rsc_promotable)) { return; } pcmk__promotable_restart_ordering(rsc); // Demote bundle -> demote bundled clone pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundled_resource, RSC_DEMOTE, pe_order_implies_first_printed); // Bundled clone is demoted -> bundle is demoted pcmk__order_resource_actions(bundled_resource, RSC_DEMOTED, rsc, RSC_DEMOTED, pe_order_implies_then_printed); // Promote bundle -> promote bundled clone pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundled_resource, RSC_PROMOTE, pe_order_implies_first_printed); // Bundled clone is promoted -> bundle is promoted pcmk__order_resource_actions(bundled_resource, RSC_PROMOTED, rsc, RSC_PROMOTED, pe_order_implies_then_printed); } struct match_data { const pe_node_t *node; // Node to compare against replica pe_resource_t *container; // Replica container corresponding to node }; /*! * \internal * \brief Check whether a replica container is assigned to a given node * * \param[in] replica Replica to check * \param[in,out] user_data struct match_data with node to compare against * * \return true if the replica does not match (to indicate further replicas * should be processed), otherwise false */ static bool match_replica_container(const pe__bundle_replica_t *replica, void *user_data) { struct match_data *match_data = user_data; if (pcmk__instance_matches(replica->container, match_data->node, RSC_ROLE_UNKNOWN, false)) { match_data->container = replica->container; return false; // Match found, don't bother searching further replicas } return true; // No match, keep searching } /*! * \internal * \brief Find a bundle container compatible with a dependent resource * * \param[in] dependent Dependent resource in colocation with bundle * \param[in] bundle Bundle that \p dependent is colocated with * * \return A container from \p bundle assigned to the same node as \p dependent * if assigned, otherwise assigned to any of dependent's allowed nodes, * otherwise NULL. */ static pe_resource_t * compatible_container(const pe_resource_t *dependent, const pe_resource_t *bundle) { GList *scratch = NULL; struct match_data match_data = { NULL, NULL }; // If dependent is assigned, only check there match_data.node = dependent->fns->location(dependent, NULL, 0); if (match_data.node != NULL) { pe__foreach_const_bundle_replica(bundle, match_replica_container, &match_data); return match_data.container; } // Otherwise, check for any of the dependent's allowed nodes scratch = g_hash_table_get_values(dependent->allowed_nodes); scratch = pcmk__sort_nodes(scratch, NULL); for (const GList *iter = scratch; iter != NULL; iter = iter->next) { match_data.node = iter->data; pe__foreach_const_bundle_replica(bundle, match_replica_container, &match_data); if (match_data.container != NULL) { break; } } g_list_free(scratch); return match_data.container; } struct coloc_data { const pcmk__colocation_t *colocation; pe_resource_t *dependent; GList *container_hosts; }; /*! * \internal * \brief Apply a colocation score to replica node scores or resource priority * * \param[in] replica Replica of primary bundle resource in colocation * \param[in,out] user_data struct coloc_data for colocation being applied * * \return true (to indicate that any further replicas should be processed) */ static bool replica_apply_coloc_score(const pe__bundle_replica_t *replica, void *user_data) { struct coloc_data *coloc_data = user_data; pe_node_t *chosen = NULL; if (coloc_data->colocation->score < INFINITY) { replica->container->cmds->apply_coloc_score(coloc_data->dependent, replica->container, coloc_data->colocation, false); return true; } chosen = replica->container->fns->location(replica->container, NULL, 0); if ((chosen == NULL) || is_set_recursive(replica->container, pe_rsc_block, true)) { return true; } if ((coloc_data->colocation->primary_role >= RSC_ROLE_PROMOTED) && ((replica->child == NULL) || (replica->child->next_role < RSC_ROLE_PROMOTED))) { return true; } pe_rsc_trace(pe__const_top_resource(replica->container, true), "Allowing mandatory colocation %s using %s @%d", coloc_data->colocation->id, pe__node_name(chosen), chosen->weight); coloc_data->container_hosts = g_list_prepend(coloc_data->container_hosts, chosen); return true; } /*! * \internal * \brief Apply a colocation's score to node scores or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node scores (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { struct coloc_data coloc_data = { colocation, dependent, NULL }; /* This should never be called for the bundle itself as a dependent. * Instead, we add its colocation constraints to its containers and call the * apply_coloc_score() method for the containers as dependents. */ CRM_ASSERT((primary != NULL) && (primary->variant == pe_container) && (dependent != NULL) && (dependent->variant == pe_native) && (colocation != NULL) && !for_dependent); if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "Skipping applying colocation %s " "because %s is still provisional", colocation->id, primary->id); return; } pe_rsc_trace(primary, "Applying colocation %s (%s with %s at %s)", colocation->id, dependent->id, primary->id, pcmk_readable_score(colocation->score)); /* If the constraint dependent is a clone or bundle, "dependent" here is one * of its instances. Look for a compatible instance of this bundle. */ if (colocation->dependent->variant > pe_group) { const pe_resource_t *primary_container = compatible_container(dependent, primary); if (primary_container != NULL) { // Success, we found one pe_rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_container->id); dependent->cmds->apply_coloc_score(dependent, primary_container, colocation, true); } else if (colocation->score >= INFINITY) { // Failure, and it's fatal crm_notice("%s cannot run because there is no compatible " "instance of %s to colocate with", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true); } else { // Failure, but we can ignore it pe_rsc_debug(primary, "%s cannot be colocated with any instance of %s", dependent->id, primary->id); } return; } pe__foreach_const_bundle_replica(primary, replica_apply_coloc_score, &coloc_data); if (colocation->score >= INFINITY) { - node_list_exclude(dependent->allowed_nodes, coloc_data.container_hosts, - FALSE); + pcmk__colocation_intersect_nodes(dependent, primary, colocation, + coloc_data.container_hosts, false); } g_list_free(coloc_data.container_hosts); } // Bundle implementation of resource_alloc_functions_t:with_this_colocations() void pcmk__with_bundle_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list) { CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container) && (orig_rsc != NULL) && (list != NULL)); if (rsc == orig_rsc) { // Colocations are wanted for bundle itself pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc); // Only the bundle replicas' containers get the bundle's constraints } else if (pcmk_is_set(orig_rsc->flags, pe_rsc_replica_container)) { pcmk__add_collective_constraints(list, orig_rsc, rsc, true); } } // Bundle implementation of resource_alloc_functions_t:this_with_colocations() void pcmk__bundle_with_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list) { CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container) && (orig_rsc != NULL) && (list != NULL)); if (rsc == orig_rsc) { // Colocations are wanted for bundle itself pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc); // Only the bundle replicas' containers get the bundle's constraints } else if (pcmk_is_set(orig_rsc->flags, pe_rsc_replica_container)) { pcmk__add_collective_constraints(list, orig_rsc, rsc, false); } } /*! * \internal * \brief Return action flags for a given bundle resource action * * \param[in,out] action Bundle resource action to get flags for * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node */ uint32_t pcmk__bundle_action_flags(pe_action_t *action, const pe_node_t *node) { GList *containers = NULL; uint32_t flags = 0; pe_resource_t *bundled_resource = NULL; CRM_ASSERT((action != NULL) && (action->rsc != NULL) && (action->rsc->variant == pe_container)); bundled_resource = pe__bundled_resource(action->rsc); if (bundled_resource != NULL) { // Clone actions are done on the bundled clone resource, not container switch (get_complex_task(bundled_resource, action->task)) { case no_action: case action_notify: case action_notified: case action_promote: case action_promoted: case action_demote: case action_demoted: return pcmk__collective_action_flags(action, bundled_resource->children, node); default: break; } } containers = pe__bundle_containers(action->rsc); flags = pcmk__collective_action_flags(action, containers, node); g_list_free(containers); return flags; } /*! * \internal * \brief Apply a location constraint to a bundle replica * * \param[in,out] replica Replica to apply constraint to * \param[in,out] user_data Location constraint to apply * * \return true (to indicate that any further replicas should be processed) */ static bool apply_location_to_replica(pe__bundle_replica_t *replica, void *user_data) { pe__location_t *location = user_data; if (replica->container != NULL) { replica->container->cmds->apply_location(replica->container, location); } if (replica->ip != NULL) { replica->ip->cmds->apply_location(replica->ip, location); } return true; } /*! * \internal * \brief Apply a location constraint to a bundle resource's allowed node scores * * \param[in,out] rsc Bundle resource to apply constraint to * \param[in,out] location Location constraint to apply */ void pcmk__bundle_apply_location(pe_resource_t *rsc, pe__location_t *location) { pe_resource_t *bundled_resource = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container) && (location != NULL)); pcmk__apply_location(rsc, location); pe__foreach_bundle_replica(rsc, apply_location_to_replica, location); bundled_resource = pe__bundled_resource(rsc); if ((bundled_resource != NULL) && ((location->role_filter == RSC_ROLE_UNPROMOTED) || (location->role_filter == RSC_ROLE_PROMOTED))) { bundled_resource->cmds->apply_location(bundled_resource, location); bundled_resource->rsc_location = g_list_prepend( bundled_resource->rsc_location, location); } } #define XPATH_REMOTE "//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']" /*! * \internal * \brief Add a bundle replica's actions to transition graph * * \param[in,out] replica Replica to add to graph * \param[in] user_data Bundle that replica belongs to (for logging only) * * \return true (to indicate that any further replicas should be processed) */ static bool add_replica_actions_to_graph(pe__bundle_replica_t *replica, void *user_data) { if ((replica->remote != NULL) && (replica->container != NULL) && pe__bundle_needs_remote_name(replica->remote)) { /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that * run pacemaker-remoted inside, without needing a separate IP for * the container. This is done by configuring the inner remote's * connection host as the magic string "#uname", then * replacing it with the underlying host when needed. */ xmlNode *nvpair = get_xpath_object(XPATH_REMOTE, replica->remote->xml, LOG_ERR); const char *calculated_addr = NULL; // Replace the value in replica->remote->xml (if appropriate) calculated_addr = pe__add_bundle_remote_name(replica->remote, replica->remote->cluster, nvpair, "value"); if (calculated_addr != NULL) { /* Since this is for the bundle as a resource, and not any * particular action, replace the value in the default * parameters (not evaluated for node). create_graph_action() * will grab it from there to replace it in node-evaluated * parameters. */ GHashTable *params = pe_rsc_params(replica->remote, NULL, replica->remote->cluster); g_hash_table_replace(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), strdup(calculated_addr)); } else { pe_resource_t *bundle = user_data; /* The only way to get here is if the remote connection is * neither currently running nor scheduled to run. That means we * won't be doing any operations that require addr (only start * requires it; we additionally use it to compare digests when * unpacking status, promote, and migrate_from history, but * that's already happened by this point). */ pe_rsc_info(bundle, "Unable to determine address for bundle %s " "remote connection", bundle->id); } } if (replica->ip != NULL) { replica->ip->cmds->add_actions_to_graph(replica->ip); } if (replica->container != NULL) { replica->container->cmds->add_actions_to_graph(replica->container); } if (replica->remote != NULL) { replica->remote->cmds->add_actions_to_graph(replica->remote); } return true; } /*! * \internal * \brief Add a bundle resource's actions to the transition graph * * \param[in,out] rsc Bundle resource whose actions should be added */ void pcmk__bundle_add_actions_to_graph(pe_resource_t *rsc) { pe_resource_t *bundled_resource = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container)); bundled_resource = pe__bundled_resource(rsc); if (bundled_resource != NULL) { bundled_resource->cmds->add_actions_to_graph(bundled_resource); } pe__foreach_bundle_replica(rsc, add_replica_actions_to_graph, rsc); } struct probe_data { pe_resource_t *bundle; // Bundle being probed pe_node_t *node; // Node to create probes on bool any_created; // Whether any probes have been created }; /*! * \internal * \brief Order a bundle replica's start after another replica's probe * * \param[in,out] replica Replica to order start for * \param[in,out] user_data Replica with probe to order after * * \return true (to indicate that any further replicas should be processed) */ static bool order_replica_start_after(pe__bundle_replica_t *replica, void *user_data) { pe__bundle_replica_t *probed_replica = user_data; if ((replica == probed_replica) || (replica->container == NULL)) { return true; } pcmk__new_ordering(probed_replica->container, pcmk__op_key(probed_replica->container->id, RSC_STATUS, 0), NULL, replica->container, pcmk__op_key(replica->container->id, RSC_START, 0), NULL, pe_order_optional|pe_order_same_node, replica->container->cluster); return true; } /*! * \internal * \brief Create probes for a bundle replica's resources * * \param[in,out] replica Replica to create probes for * \param[in,out] user_data struct probe_data * * \return true (to indicate that any further replicas should be processed) */ static bool create_replica_probes(pe__bundle_replica_t *replica, void *user_data) { struct probe_data *probe_data = user_data; if ((replica->ip != NULL) && replica->ip->cmds->create_probe(replica->ip, probe_data->node)) { probe_data->any_created = true; } if ((replica->child != NULL) && pe__same_node(probe_data->node, replica->node) && replica->child->cmds->create_probe(replica->child, probe_data->node)) { probe_data->any_created = true; } if ((replica->container != NULL) && replica->container->cmds->create_probe(replica->container, probe_data->node)) { probe_data->any_created = true; /* If we're limited to one replica per host (due to * the lack of an IP range probably), then we don't * want any of our peer containers starting until * we've established that no other copies are already * running. * * Partly this is to ensure that the maximum replicas per host is * observed, but also to ensure that the containers * don't fail to start because the necessary port * mappings (which won't include an IP for uniqueness) * are already taken */ if (probe_data->bundle->fns->max_per_node(probe_data->bundle) == 1) { pe__foreach_bundle_replica(probe_data->bundle, order_replica_start_after, replica); } } if ((replica->container != NULL) && (replica->remote != NULL) && replica->remote->cmds->create_probe(replica->remote, probe_data->node)) { /* Do not probe the remote resource until we know where the container is * running. This is required for REMOTE_CONTAINER_HACK to correctly * probe remote resources. */ char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS, 0); pe_action_t *probe = find_first_action(replica->remote->actions, probe_uuid, NULL, probe_data->node); free(probe_uuid); if (probe != NULL) { probe_data->any_created = true; pe_rsc_trace(probe_data->bundle, "Ordering %s probe on %s", replica->remote->id, pe__node_name(probe_data->node)); pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, RSC_START, 0), NULL, replica->remote, NULL, probe, pe_order_probe, probe_data->bundle->cluster); } } return true; } /*! * \internal * * \brief Schedule any probes needed for a bundle resource on a node * * \param[in,out] rsc Bundle resource to create probes for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node) { struct probe_data probe_data = { rsc, node, false }; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container)); pe__foreach_bundle_replica(rsc, create_replica_probes, &probe_data); return probe_data.any_created; } /*! * \internal * \brief Output actions for one bundle replica * * \param[in,out] replica Replica to output actions for * \param[in] user_data Unused * * \return true (to indicate that any further replicas should be processed) */ static bool output_replica_actions(pe__bundle_replica_t *replica, void *user_data) { if (replica->ip != NULL) { replica->ip->cmds->output_actions(replica->ip); } if (replica->container != NULL) { replica->container->cmds->output_actions(replica->container); } if (replica->remote != NULL) { replica->remote->cmds->output_actions(replica->remote); } if (replica->child != NULL) { replica->child->cmds->output_actions(replica->child); } return true; } /*! * \internal * \brief Output a summary of scheduled actions for a bundle resource * * \param[in,out] rsc Bundle resource to output actions for */ void pcmk__output_bundle_actions(pe_resource_t *rsc) { CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container)); pe__foreach_bundle_replica(rsc, output_replica_actions, NULL); } // Bundle implementation of resource_alloc_functions_t:add_utilization() void pcmk__bundle_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { pe_resource_t *container = NULL; CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container)); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } /* All bundle replicas are identical, so using the utilization of the first * is sufficient for any. Only the implicit container resource can have * utilization values. */ container = pe__first_container(rsc); if (container != NULL) { container->cmds->add_utilization(container, orig_rsc, all_rscs, utilization); } } // Bundle implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__bundle_shutdown_lock(pe_resource_t *rsc) { CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_container)); // Bundles currently don't support shutdown locks } diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c index 014369e47d..a1b3bd92fe 100644 --- a/lib/pacemaker/pcmk_sched_clone.c +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -1,687 +1,688 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Assign a clone resource's instances to nodes * * \param[in,out] rsc Clone resource to assign * \param[in] prefer Node to prefer, if all else is equal * * \return NULL (clones are not assigned to a single node) */ pe_node_t * pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer) { CRM_ASSERT(pe_rsc_is_clone(rsc)); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return NULL; // Assignment has already been done } // Detect assignment loops if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id); return NULL; } pe__set_resource_flags(rsc, pe_rsc_allocating); // If this clone is promotable, consider nodes' promotion scores if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__add_promotion_scores(rsc); } /* If this clone is colocated with any other resources, assign those first. * Since the this_with_colocations() method boils down to a copy of rsc_cons * for clones, we can use that here directly for efficiency. */ for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) iter->data; pe_rsc_trace(rsc, "%s: Assigning colocation %s primary %s first", rsc->id, constraint->id, constraint->primary->id); constraint->primary->cmds->assign(constraint->primary, prefer); } /* If any resources are colocated with this one, consider their preferences. * Because the with_this_colocations() method boils down to a copy of * rsc_cons_lhs for clones, we can use that here directly for efficiency. */ g_list_foreach(rsc->rsc_cons_lhs, pcmk__add_dependent_scores, rsc); pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance); pcmk__assign_instances(rsc, rsc->children, pe__clone_max(rsc), pe__clone_node_max(rsc)); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__set_instance_roles(rsc); } pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating); pe_rsc_trace(rsc, "Assigned clone %s", rsc->id); return NULL; } /*! * \internal * \brief Create all actions needed for a given clone resource * * \param[in,out] rsc Clone resource to create actions for */ void pcmk__clone_create_actions(pe_resource_t *rsc) { CRM_ASSERT(pe_rsc_is_clone(rsc)); pe_rsc_trace(rsc, "Creating actions for clone %s", rsc->id); pcmk__create_instance_actions(rsc, rsc->children); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__create_promotable_actions(rsc); } } /*! * \internal * \brief Create implicit constraints needed for a clone resource * * \param[in,out] rsc Clone resource to create implicit constraints for */ void pcmk__clone_internal_constraints(pe_resource_t *rsc) { bool ordered = false; CRM_ASSERT(pe_rsc_is_clone(rsc)); pe_rsc_trace(rsc, "Creating internal constraints for clone %s", rsc->id); // Restart ordering: Stop -> stopped -> start -> started pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional); pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left); pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left); // Demoted -> stop and started -> promote if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional); pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left); } ordered = pe__clone_is_ordered(rsc); if (ordered) { /* Ordered clone instances must start and stop by instance number. The * instances might have been previously shuffled for assignment or * promotion purposes, so re-sort them. */ rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); } for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; instance->cmds->internal_constraints(instance); // Start clone -> start instance -> clone started pcmk__order_starts(rsc, instance, pe_order_runnable_left |pe_order_implies_first_printed); pcmk__order_resource_actions(instance, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed); // Stop clone -> stop instance -> clone stopped pcmk__order_stops(rsc, instance, pe_order_implies_first_printed); pcmk__order_resource_actions(instance, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed); /* Instances of ordered clones must be started and stopped by instance * number. Since only some instances may be starting or stopping, order * each instance relative to every later instance. */ if (ordered) { for (GList *later = iter->next; later != NULL; later = later->next) { pcmk__order_starts(instance, (pe_resource_t *) later->data, pe_order_optional); pcmk__order_stops((pe_resource_t *) later->data, instance, pe_order_optional); } } } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__order_promotable_instances(rsc); } } /*! * \internal * \brief Check whether colocated resources can be interleaved * * \param[in] colocation Colocation constraint with clone as primary * * \return true if colocated resources can be interleaved, otherwise false */ static bool can_interleave(const pcmk__colocation_t *colocation) { const pe_resource_t *dependent = colocation->dependent; // Only colocations between clone or bundle resources use interleaving if (dependent->variant <= pe_group) { return false; } // Only the dependent needs to be marked for interleaving if (!crm_is_true(g_hash_table_lookup(dependent->meta, XML_RSC_ATTR_INTERLEAVE))) { return false; } /* @TODO Do we actually care about multiple primary instances sharing a * dependent instance? */ if (dependent->fns->max_per_node(dependent) != colocation->primary->fns->max_per_node(colocation->primary)) { pcmk__config_err("Cannot interleave %s and %s because they do not " "support the same number of instances per node", dependent->id, colocation->primary->id); return false; } return true; } /*! * \internal * \brief Apply a colocation's score to node scores or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node scores (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__clone_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { const GList *iter = NULL; /* This should never be called for the clone itself as a dependent. Instead, * we add its colocation constraints to its instances and call the * apply_coloc_score() method for the instances as dependents. */ CRM_ASSERT(!for_dependent); CRM_ASSERT((colocation != NULL) && pe_rsc_is_clone(primary) && (dependent != NULL) && (dependent->variant == pe_native)); if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "Delaying processing colocation %s " "because cloned primary %s is still provisional", colocation->id, primary->id); return; } pe_rsc_trace(primary, "Processing colocation %s (%s with clone %s @%s)", colocation->id, dependent->id, primary->id, pcmk_readable_score(colocation->score)); // Apply role-specific colocations if (pcmk_is_set(primary->flags, pe_rsc_promotable) && (colocation->primary_role != RSC_ROLE_UNKNOWN)) { if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) { // We're assigning the dependent to a node pcmk__update_dependent_with_promotable(primary, dependent, colocation); return; } if (colocation->dependent_role == RSC_ROLE_PROMOTED) { // We're choosing a role for the dependent pcmk__update_promotable_dependent_priority(primary, dependent, colocation); return; } } // Apply interleaved colocations if (can_interleave(colocation)) { const pe_resource_t *primary_instance = NULL; primary_instance = pcmk__find_compatible_instance(dependent, primary, RSC_ROLE_UNKNOWN, false); if (primary_instance != NULL) { pe_rsc_debug(primary, "Interleaving %s with %s", dependent->id, primary_instance->id); dependent->cmds->apply_coloc_score(dependent, primary_instance, colocation, true); } else if (colocation->score >= INFINITY) { crm_notice("%s cannot run because it cannot interleave with " "any instance of %s", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true); } else { pe_rsc_debug(primary, "%s will not colocate with %s " "because no instance can interleave with it", dependent->id, primary->id); } return; } // Apply mandatory colocations if (colocation->score >= INFINITY) { GList *primary_nodes = NULL; // Dependent can run only where primary will have unblocked instances for (iter = primary->children; iter != NULL; iter = iter->next) { const pe_resource_t *instance = iter->data; pe_node_t *chosen = instance->fns->location(instance, NULL, 0); if ((chosen != NULL) && !is_set_recursive(instance, pe_rsc_block, TRUE)) { pe_rsc_trace(primary, "Allowing %s: %s %d", colocation->id, pe__node_name(chosen), chosen->weight); primary_nodes = g_list_prepend(primary_nodes, chosen); } } - node_list_exclude(dependent->allowed_nodes, primary_nodes, FALSE); + pcmk__colocation_intersect_nodes(dependent, primary, colocation, + primary_nodes, false); g_list_free(primary_nodes); return; } // Apply optional colocations for (iter = primary->children; iter != NULL; iter = iter->next) { const pe_resource_t *instance = iter->data; instance->cmds->apply_coloc_score(dependent, instance, colocation, false); } } // Clone implementation of resource_alloc_functions_t:with_this_colocations() void pcmk__with_clone_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list) { CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return); if (rsc == orig_rsc) { // Colocations are wanted for clone itself pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc); } else { pcmk__add_collective_constraints(list, orig_rsc, rsc, true); } } // Clone implementation of resource_alloc_functions_t:this_with_colocations() void pcmk__clone_with_colocations(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList **list) { CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return); if (rsc == orig_rsc) { // Colocations are wanted for clone itself pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc); } else { pcmk__add_collective_constraints(list, orig_rsc, rsc, false); } } /*! * \internal * \brief Return action flags for a given clone resource action * * \param[in,out] action Action to get flags for * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node */ uint32_t pcmk__clone_action_flags(pe_action_t *action, const pe_node_t *node) { CRM_ASSERT((action != NULL) && pe_rsc_is_clone(action->rsc)); return pcmk__collective_action_flags(action, action->rsc->children, node); } /*! * \internal * \brief Apply a location constraint to a clone resource's allowed node scores * * \param[in,out] rsc Clone resource to apply constraint to * \param[in,out] location Location constraint to apply */ void pcmk__clone_apply_location(pe_resource_t *rsc, pe__location_t *location) { CRM_CHECK((location != NULL) && pe_rsc_is_clone(rsc), return); pcmk__apply_location(rsc, location); for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; instance->cmds->apply_location(instance, location); } } // GFunc wrapper for calling the action_flags() resource method static void call_action_flags(gpointer data, gpointer user_data) { pe_resource_t *rsc = user_data; rsc->cmds->action_flags((pe_action_t *) data, NULL); } /*! * \internal * \brief Add a clone resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__clone_add_actions_to_graph(pe_resource_t *rsc) { CRM_ASSERT(pe_rsc_is_clone(rsc)); g_list_foreach(rsc->actions, call_action_flags, rsc); pe__create_clone_notifications(rsc); for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child_rsc = (pe_resource_t *) iter->data; child_rsc->cmds->add_actions_to_graph(child_rsc); } pcmk__add_rsc_actions_to_graph(rsc); pe__free_clone_notification_data(rsc); } /*! * \internal * \brief Check whether a resource or any children have been probed on a node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p node is in the known_on table of \p rsc or any of its * children, otherwise false */ static bool rsc_probed_on(const pe_resource_t *rsc, const pe_node_t *node) { if (rsc->children != NULL) { for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; if (rsc_probed_on(child, node)) { return true; } } return false; } if (rsc->known_on != NULL) { GHashTableIter iter; pe_node_t *known_node = NULL; g_hash_table_iter_init(&iter, rsc->known_on); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { if (pe__same_node(node, known_node)) { return true; } } } return false; } /*! * \internal * \brief Find clone instance that has been probed on given node * * \param[in] clone Clone resource to check * \param[in] node Node to check * * \return Instance of \p clone that has been probed on \p node if any, * otherwise NULL */ static pe_resource_t * find_probed_instance_on(const pe_resource_t *clone, const pe_node_t *node) { for (GList *iter = clone->children; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; if (rsc_probed_on(instance, node)) { return instance; } } return NULL; } /*! * \internal * \brief Probe an anonymous clone on a node * * \param[in,out] clone Anonymous clone to probe * \param[in,out] node Node to probe \p clone on */ static bool probe_anonymous_clone(pe_resource_t *clone, pe_node_t *node) { // Check whether we already probed an instance on this node pe_resource_t *child = find_probed_instance_on(clone, node); // Otherwise, check if we plan to start an instance on this node for (GList *iter = clone->children; (iter != NULL) && (child == NULL); iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; const pe_node_t *instance_node = NULL; instance_node = instance->fns->location(instance, NULL, 0); if (pe__same_node(instance_node, node)) { child = instance; } } // Otherwise, use the first clone instance if (child == NULL) { child = clone->children->data; } // Anonymous clones only need to probe a single instance return child->cmds->create_probe(child, node); } /*! * \internal * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool pcmk__clone_create_probe(pe_resource_t *rsc, pe_node_t *node) { CRM_ASSERT((node != NULL) && pe_rsc_is_clone(rsc)); if (rsc->exclusive_discover) { /* The clone is configured to be probed only where a location constraint * exists with resource-discovery set to exclusive. * * This check is not strictly necessary here since the instance's * create_probe() method would also check, but doing it here is more * efficient (especially for unique clones with a large number of * instances), and affects the CRM_meta_notify_available_uname variable * passed with notify actions. */ pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if ((allowed == NULL) || (allowed->rsc_discover_mode != pe_discover_exclusive)) { /* This node is not marked for resource discovery. Remove it from * allowed_nodes so that notifications contain only nodes that the * clone can possibly run on. */ pe_rsc_trace(rsc, "Skipping probe for %s on %s because resource has " "exclusive discovery but is not allowed on node", rsc->id, pe__node_name(node)); g_hash_table_remove(rsc->allowed_nodes, node->details->id); return false; } } rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { return pcmk__probe_resource_list(rsc->children, node); } else { return probe_anonymous_clone(rsc, node); } } /*! * \internal * \brief Add meta-attributes relevant to transition graph actions to XML * * Add clone-specific meta-attributes needed for transition graph actions. * * \param[in] rsc Clone resource whose meta-attributes should be added * \param[in,out] xml Transition graph action attributes XML to add to */ void pcmk__clone_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml) { char *name = NULL; CRM_ASSERT(pe_rsc_is_clone(rsc) && (xml != NULL)); name = crm_meta_name(XML_RSC_ATTR_UNIQUE); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique)); free(name); name = crm_meta_name(XML_RSC_ATTR_NOTIFY); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify)); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); crm_xml_add_int(xml, name, pe__clone_max(rsc)); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); crm_xml_add_int(xml, name, pe__clone_node_max(rsc)); free(name); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { int promoted_max = pe__clone_promoted_max(rsc); int promoted_node_max = pe__clone_promoted_node_max(rsc); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX); crm_xml_add_int(xml, name, promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX); crm_xml_add_int(xml, name, promoted_node_max); free(name); /* @COMPAT Maintain backward compatibility with resource agents that * expect the old names (deprecated since 2.0.0). */ name = crm_meta_name(PCMK_XA_PROMOTED_MAX_LEGACY); crm_xml_add_int(xml, name, promoted_max); free(name); name = crm_meta_name(PCMK_XA_PROMOTED_NODE_MAX_LEGACY); crm_xml_add_int(xml, name, promoted_node_max); free(name); } } // Clone implementation of resource_alloc_functions_t:add_utilization() void pcmk__clone_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { bool existing = false; pe_resource_t *child = NULL; CRM_ASSERT(pe_rsc_is_clone(rsc) && (orig_rsc != NULL) && (utilization != NULL)); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } // Look for any child already existing in the list for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { child = (pe_resource_t *) iter->data; if (g_list_find(all_rscs, child)) { existing = true; // Keep checking remaining children } else { // If this is a clone of a group, look for group's members for (GList *member_iter = child->children; member_iter != NULL; member_iter = member_iter->next) { pe_resource_t *member = (pe_resource_t *) member_iter->data; if (g_list_find(all_rscs, member) != NULL) { // Add *child's* utilization, not group member's child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); existing = true; break; } } } } if (!existing && (rsc->children != NULL)) { // If nothing was found, still add first child's utilization child = (pe_resource_t *) rsc->children->data; child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); } } // Clone implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__clone_shutdown_lock(pe_resource_t *rsc) { CRM_ASSERT(pe_rsc_is_clone(rsc)); return; // Clones currently don't support shutdown locks } diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c index e47185455d..42490c8189 100644 --- a/lib/pacemaker/pcmk_sched_colocation.c +++ b/lib/pacemaker/pcmk_sched_colocation.c @@ -1,1789 +1,1845 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include "crm/common/util.h" #include "crm/common/xml_internal.h" #include "crm/msg_xml.h" #include "libpacemaker_private.h" // Used to temporarily mark a node as unusable #define INFINITY_HACK (INFINITY * -100) /*! * \internal * \brief Compare two colocations according to priority * * Compare two colocations according to the order in which they should be * considered, based on either their dependent resources or their primary * resources -- preferring (in order): * * Colocation that is not \c NULL * * Colocation whose resource has higher priority * * Colocation whose resource is of a higher-level variant * (bundle > clone > group > primitive) * * Colocation whose resource is promotable, if both are clones * * Colocation whose resource has lower ID in lexicographic order * * \param[in] colocation1 First colocation to compare * \param[in] colocation2 Second colocation to compare * \param[in] dependent If \c true, compare colocations by dependent * priority; otherwise compare them by primary priority * * \return A negative number if \p colocation1 should be considered first, * a positive number if \p colocation2 should be considered first, * or 0 if order doesn't matter */ static gint cmp_colocation_priority(const pcmk__colocation_t *colocation1, const pcmk__colocation_t *colocation2, bool dependent) { const pe_resource_t *rsc1 = NULL; const pe_resource_t *rsc2 = NULL; if (colocation1 == NULL) { return 1; } if (colocation2 == NULL) { return -1; } if (dependent) { rsc1 = colocation1->dependent; rsc2 = colocation2->dependent; CRM_ASSERT(colocation1->primary != NULL); } else { rsc1 = colocation1->primary; rsc2 = colocation2->primary; CRM_ASSERT(colocation1->dependent != NULL); } CRM_ASSERT((rsc1 != NULL) && (rsc2 != NULL)); if (rsc1->priority > rsc2->priority) { return -1; } if (rsc1->priority < rsc2->priority) { return 1; } // Process clones before primitives and groups if (rsc1->variant > rsc2->variant) { return -1; } if (rsc1->variant < rsc2->variant) { return 1; } /* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable * clones (probably unnecessary, but avoids having to update regression * tests) */ if (rsc1->variant == pe_clone) { if (pcmk_is_set(rsc1->flags, pe_rsc_promotable) && !pcmk_is_set(rsc2->flags, pe_rsc_promotable)) { return -1; } if (!pcmk_is_set(rsc1->flags, pe_rsc_promotable) && pcmk_is_set(rsc2->flags, pe_rsc_promotable)) { return 1; } } return strcmp(rsc1->id, rsc2->id); } /*! * \internal * \brief Compare two colocations according to priority based on dependents * * Compare two colocations according to the order in which they should be * considered, based on their dependent resources -- preferring (in order): * * Colocation that is not \c NULL * * Colocation whose resource has higher priority * * Colocation whose resource is of a higher-level variant * (bundle > clone > group > primitive) * * Colocation whose resource is promotable, if both are clones * * Colocation whose resource has lower ID in lexicographic order * * \param[in] a First colocation to compare * \param[in] b Second colocation to compare * * \return A negative number if \p a should be considered first, * a positive number if \p b should be considered first, * or 0 if order doesn't matter */ static gint cmp_dependent_priority(gconstpointer a, gconstpointer b) { return cmp_colocation_priority(a, b, true); } /*! * \internal * \brief Compare two colocations according to priority based on primaries * * Compare two colocations according to the order in which they should be * considered, based on their primary resources -- preferring (in order): * * Colocation that is not \c NULL * * Colocation whose primary has higher priority * * Colocation whose primary is of a higher-level variant * (bundle > clone > group > primitive) * * Colocation whose primary is promotable, if both are clones * * Colocation whose primary has lower ID in lexicographic order * * \param[in] a First colocation to compare * \param[in] b Second colocation to compare * * \return A negative number if \p a should be considered first, * a positive number if \p b should be considered first, * or 0 if order doesn't matter */ static gint cmp_primary_priority(gconstpointer a, gconstpointer b) { return cmp_colocation_priority(a, b, false); } /*! * \internal * \brief Add a "this with" colocation constraint to a sorted list * * \param[in,out] list List of constraints to add \p colocation to * \param[in] colocation Colocation constraint to add to \p list * \param[in] rsc Resource whose colocations we're getting (for * logging only) * * \note The list will be sorted using cmp_primary_priority(). */ void pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation, const pe_resource_t *rsc) { CRM_ASSERT((list != NULL) && (colocation != NULL) && (rsc != NULL)); pe_rsc_trace(rsc, "Adding colocation %s (%s with %s using %s @%s) to " "'this with' list for %s", colocation->id, colocation->dependent->id, colocation->primary->id, colocation->node_attribute, pcmk_readable_score(colocation->score), rsc->id); *list = g_list_insert_sorted(*list, (gpointer) colocation, cmp_primary_priority); } /*! * \internal * \brief Add a list of "this with" colocation constraints to a list * * \param[in,out] list List of constraints to add \p addition to * \param[in] addition List of colocation constraints to add to \p list * \param[in] rsc Resource whose colocations we're getting (for * logging only) * * \note The lists must be pre-sorted by cmp_primary_priority(). */ void pcmk__add_this_with_list(GList **list, GList *addition, const pe_resource_t *rsc) { CRM_ASSERT((list != NULL) && (rsc != NULL)); pcmk__if_tracing( {}, // Always add each colocation individually if tracing { if (*list == NULL) { // Trivial case for efficiency if not tracing *list = g_list_copy(addition); return; } } ); for (const GList *iter = addition; iter != NULL; iter = iter->next) { pcmk__add_this_with(list, addition->data, rsc); } } /*! * \internal * \brief Add a "with this" colocation constraint to a sorted list * * \param[in,out] list List of constraints to add \p colocation to * \param[in] colocation Colocation constraint to add to \p list * \param[in] rsc Resource whose colocations we're getting (for * logging only) * * \note The list will be sorted using cmp_dependent_priority(). */ void pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation, const pe_resource_t *rsc) { CRM_ASSERT((list != NULL) && (colocation != NULL) && (rsc != NULL)); pe_rsc_trace(rsc, "Adding colocation %s (%s with %s using %s @%s) to " "'with this' list for %s", colocation->id, colocation->dependent->id, colocation->primary->id, colocation->node_attribute, pcmk_readable_score(colocation->score), rsc->id); *list = g_list_insert_sorted(*list, (gpointer) colocation, cmp_dependent_priority); } /*! * \internal * \brief Add a list of "with this" colocation constraints to a list * * \param[in,out] list List of constraints to add \p addition to * \param[in] addition List of colocation constraints to add to \p list * \param[in] rsc Resource whose colocations we're getting (for * logging only) * * \note The lists must be pre-sorted by cmp_dependent_priority(). */ void pcmk__add_with_this_list(GList **list, GList *addition, const pe_resource_t *rsc) { CRM_ASSERT((list != NULL) && (rsc != NULL)); pcmk__if_tracing( {}, // Always add each colocation individually if tracing { if (*list == NULL) { // Trivial case for efficiency if not tracing *list = g_list_copy(addition); return; } } ); for (const GList *iter = addition; iter != NULL; iter = iter->next) { pcmk__add_with_this(list, addition->data, rsc); } } /*! * \internal * \brief Add orderings necessary for an anti-colocation constraint * * \param[in,out] first_rsc One resource in an anti-colocation * \param[in] first_role Anti-colocation role of \p first_rsc * \param[in] then_rsc Other resource in the anti-colocation * \param[in] then_role Anti-colocation role of \p then_rsc */ static void anti_colocation_order(pe_resource_t *first_rsc, int first_role, pe_resource_t *then_rsc, int then_role) { const char *first_tasks[] = { NULL, NULL }; const char *then_tasks[] = { NULL, NULL }; /* Actions to make first_rsc lose first_role */ if (first_role == RSC_ROLE_PROMOTED) { first_tasks[0] = CRMD_ACTION_DEMOTE; } else { first_tasks[0] = CRMD_ACTION_STOP; if (first_role == RSC_ROLE_UNPROMOTED) { first_tasks[1] = CRMD_ACTION_PROMOTE; } } /* Actions to make then_rsc gain then_role */ if (then_role == RSC_ROLE_PROMOTED) { then_tasks[0] = CRMD_ACTION_PROMOTE; } else { then_tasks[0] = CRMD_ACTION_START; if (then_role == RSC_ROLE_UNPROMOTED) { then_tasks[1] = CRMD_ACTION_DEMOTE; } } for (int first_lpc = 0; (first_lpc <= 1) && (first_tasks[first_lpc] != NULL); first_lpc++) { for (int then_lpc = 0; (then_lpc <= 1) && (then_tasks[then_lpc] != NULL); then_lpc++) { pcmk__order_resource_actions(first_rsc, first_tasks[first_lpc], then_rsc, then_tasks[then_lpc], pe_order_anti_colocation); } } } /*! * \internal * \brief Add a new colocation constraint to a cluster working set * * \param[in] id XML ID for this constraint * \param[in] node_attr Colocate by this attribute (NULL for #uname) * \param[in] score Constraint score * \param[in,out] dependent Resource to be colocated * \param[in,out] primary Resource to colocate \p dependent with * \param[in] dependent_role Current role of \p dependent * \param[in] primary_role Current role of \p primary * \param[in] flags Group of enum pcmk__coloc_flags */ void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *dependent, pe_resource_t *primary, const char *dependent_role, const char *primary_role, uint32_t flags) { pcmk__colocation_t *new_con = NULL; CRM_CHECK(id != NULL, return); if ((dependent == NULL) || (primary == NULL)) { pcmk__config_err("Ignoring colocation '%s' because resource " "does not exist", id); return; } if (score == 0) { pe_rsc_trace(dependent, "Ignoring colocation '%s' (%s with %s) because score is 0", id, dependent->id, primary->id); return; } new_con = calloc(1, sizeof(pcmk__colocation_t)); CRM_ASSERT(new_con != NULL); if (pcmk__str_eq(dependent_role, RSC_ROLE_STARTED_S, pcmk__str_null_matches|pcmk__str_casei)) { dependent_role = RSC_ROLE_UNKNOWN_S; } if (pcmk__str_eq(primary_role, RSC_ROLE_STARTED_S, pcmk__str_null_matches|pcmk__str_casei)) { primary_role = RSC_ROLE_UNKNOWN_S; } new_con->id = id; new_con->dependent = dependent; new_con->primary = primary; new_con->score = score; new_con->dependent_role = text2role(dependent_role); new_con->primary_role = text2role(primary_role); new_con->node_attribute = pcmk__s(node_attr, CRM_ATTR_UNAME); new_con->flags = flags; pcmk__add_this_with(&(dependent->rsc_cons), new_con, dependent); pcmk__add_with_this(&(primary->rsc_cons_lhs), new_con, primary); dependent->cluster->colocation_constraints = g_list_prepend( dependent->cluster->colocation_constraints, new_con); if (score <= -INFINITY) { anti_colocation_order(dependent, new_con->dependent_role, primary, new_con->primary_role); anti_colocation_order(primary, new_con->primary_role, dependent, new_con->dependent_role); } } /*! * \internal * \brief Return the boolean influence corresponding to configuration * * \param[in] coloc_id Colocation XML ID (for error logging) * \param[in] rsc Resource involved in constraint (for default) * \param[in] influence_s String value of influence option * * \return pcmk__coloc_influence if string evaluates true, or string is NULL or * invalid and resource's critical option evaluates true, otherwise * pcmk__coloc_none */ static uint32_t unpack_influence(const char *coloc_id, const pe_resource_t *rsc, const char *influence_s) { if (influence_s != NULL) { int influence_i = 0; if (crm_str_to_boolean(influence_s, &influence_i) < 0) { pcmk__config_err("Constraint '%s' has invalid value for " XML_COLOC_ATTR_INFLUENCE " (using default)", coloc_id); } else { return (influence_i == 0)? pcmk__coloc_none : pcmk__coloc_influence; } } if (pcmk_is_set(rsc->flags, pe_rsc_critical)) { return pcmk__coloc_influence; } return pcmk__coloc_none; } static void unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, const char *influence_s, pe_working_set_t *data_set) { xmlNode *xml_rsc = NULL; pe_resource_t *other = NULL; pe_resource_t *resource = NULL; const char *set_id = ID(set); const char *role = crm_element_value(set, "role"); bool with_previous = false; int local_score = score; bool sequential = false; uint32_t flags = pcmk__coloc_none; const char *xml_rsc_id = NULL; const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE); if (score_s) { local_score = char2score(score_s); } if (local_score == 0) { crm_trace("Ignoring colocation '%s' for set '%s' because score is 0", coloc_id, set_id); return; } /* @COMPAT The deprecated "ordering" attribute specifies whether resources * in a positive-score set are colocated with the previous or next resource. */ if (pcmk__str_eq(crm_element_value(set, "ordering"), "group", pcmk__str_null_matches|pcmk__str_casei)) { with_previous = true; } else { pe_warn_once(pe_wo_set_ordering, "Support for 'ordering' other than 'group' in " XML_CONS_TAG_RSC_SET " (such as %s) is deprecated and " "will be removed in a future release", set_id); } if ((pcmk__xe_get_bool_attr(set, "sequential", &sequential) == pcmk_rc_ok) && !sequential) { return; } if (local_score > 0) { for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { xml_rsc_id = ID(xml_rsc); resource = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); if (resource == NULL) { // Should be possible only with validation disabled pcmk__config_err("Ignoring %s and later resources in set %s: " "No such resource", xml_rsc_id, set_id); return; } if (other != NULL) { flags = pcmk__coloc_explicit | unpack_influence(coloc_id, resource, influence_s); if (with_previous) { pe_rsc_trace(resource, "Colocating %s with %s in set %s", resource->id, other->id, set_id); pcmk__new_colocation(set_id, NULL, local_score, resource, other, role, role, flags); } else { pe_rsc_trace(resource, "Colocating %s with %s in set %s", other->id, resource->id, set_id); pcmk__new_colocation(set_id, NULL, local_score, other, resource, role, role, flags); } } other = resource; } } else { /* Anti-colocating with every prior resource is * the only way to ensure the intuitive result * (i.e. that no one in the set can run with anyone else in the set) */ for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { xmlNode *xml_rsc_with = NULL; xml_rsc_id = ID(xml_rsc); resource = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); if (resource == NULL) { // Should be possible only with validation disabled pcmk__config_err("Ignoring %s and later resources in set %s: " "No such resource", xml_rsc_id, set_id); return; } flags = pcmk__coloc_explicit | unpack_influence(coloc_id, resource, influence_s); for (xml_rsc_with = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc_with != NULL; xml_rsc_with = crm_next_same_xml(xml_rsc_with)) { xml_rsc_id = ID(xml_rsc_with); if (pcmk__str_eq(resource->id, xml_rsc_id, pcmk__str_none)) { break; } other = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); CRM_ASSERT(other != NULL); // We already processed it pcmk__new_colocation(set_id, NULL, local_score, resource, other, role, role, flags); } } } } /*! * \internal * \brief Colocate two resource sets relative to each other * * \param[in] id Colocation XML ID * \param[in] set1 Dependent set * \param[in] set2 Primary set * \param[in] score Colocation score * \param[in] influence_s Value of colocation's "influence" attribute * \param[in,out] data_set Cluster working set */ static void colocate_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2, int score, const char *influence_s, pe_working_set_t *data_set) { xmlNode *xml_rsc = NULL; pe_resource_t *rsc_1 = NULL; pe_resource_t *rsc_2 = NULL; const char *xml_rsc_id = NULL; const char *role_1 = crm_element_value(set1, "role"); const char *role_2 = crm_element_value(set2, "role"); int rc = pcmk_rc_ok; bool sequential = false; uint32_t flags = pcmk__coloc_none; if (score == 0) { crm_trace("Ignoring colocation '%s' between sets %s and %s " "because score is 0", id, ID(set1), ID(set2)); return; } rc = pcmk__xe_get_bool_attr(set1, "sequential", &sequential); if ((rc != pcmk_rc_ok) || sequential) { // Get the first one xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); if (xml_rsc != NULL) { xml_rsc_id = ID(xml_rsc); rsc_1 = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); if (rsc_1 == NULL) { // Should be possible only with validation disabled pcmk__config_err("Ignoring colocation of set %s with set %s " "because first resource %s not found", ID(set1), ID(set2), xml_rsc_id); return; } } } rc = pcmk__xe_get_bool_attr(set2, "sequential", &sequential); if ((rc != pcmk_rc_ok) || sequential) { // Get the last one for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { xml_rsc_id = ID(xml_rsc); } rsc_2 = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); if (rsc_2 == NULL) { // Should be possible only with validation disabled pcmk__config_err("Ignoring colocation of set %s with set %s " "because last resource %s not found", ID(set1), ID(set2), xml_rsc_id); return; } } if ((rsc_1 != NULL) && (rsc_2 != NULL)) { // Both sets are sequential flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s); pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, flags); } else if (rsc_1 != NULL) { // Only set1 is sequential flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s); for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { xml_rsc_id = ID(xml_rsc); rsc_2 = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); if (rsc_2 == NULL) { // Should be possible only with validation disabled pcmk__config_err("Ignoring set %s colocation with resource %s " "in set %s: No such resource", ID(set1), xml_rsc_id, ID(set2)); continue; } pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, flags); } } else if (rsc_2 != NULL) { // Only set2 is sequential for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { xml_rsc_id = ID(xml_rsc); rsc_1 = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); if (rsc_1 == NULL) { // Should be possible only with validation disabled pcmk__config_err("Ignoring colocation of set %s resource %s " "with set %s: No such resource", ID(set1), xml_rsc_id, ID(set2)); continue; } flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s); pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, flags); } } else { // Neither set is sequential for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { xmlNode *xml_rsc_2 = NULL; xml_rsc_id = ID(xml_rsc); rsc_1 = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); if (rsc_1 == NULL) { // Should be possible only with validation disabled pcmk__config_err("Ignoring colocation of set %s resource %s " "with set %s: No such resource", ID(set1), xml_rsc_id, ID(set2)); continue; } flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s); for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc_2 != NULL; xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { xml_rsc_id = ID(xml_rsc_2); rsc_2 = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); if (rsc_2 == NULL) { // Should be possible only with validation disabled pcmk__config_err("Ignoring colocation of set %s resource " "%s with set %s resource %s: No such " "resource", ID(set1), ID(xml_rsc), ID(set2), xml_rsc_id); continue; } pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, flags); } } } } static void unpack_simple_colocation(xmlNode *xml_obj, const char *id, const char *influence_s, pe_working_set_t *data_set) { int score_i = 0; uint32_t flags = pcmk__coloc_none; const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); const char *dependent_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); const char *primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET); const char *dependent_role = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); const char *primary_role = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE); const char *attr = crm_element_value(xml_obj, XML_COLOC_ATTR_NODE_ATTR); const char *primary_instance = NULL; const char *dependent_instance = NULL; pe_resource_t *primary = NULL; pe_resource_t *dependent = NULL; primary = pcmk__find_constraint_resource(data_set->resources, primary_id); dependent = pcmk__find_constraint_resource(data_set->resources, dependent_id); // @COMPAT: Deprecated since 2.1.5 primary_instance = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_INSTANCE); dependent_instance = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_INSTANCE); if (dependent_instance != NULL) { pe_warn_once(pe_wo_coloc_inst, "Support for " XML_COLOC_ATTR_SOURCE_INSTANCE " is " "deprecated and will be removed in a future release."); } if (primary_instance != NULL) { pe_warn_once(pe_wo_coloc_inst, "Support for " XML_COLOC_ATTR_TARGET_INSTANCE " is " "deprecated and will be removed in a future release."); } if (dependent == NULL) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "does not exist", id, dependent_id); return; } else if (primary == NULL) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "does not exist", id, primary_id); return; } else if ((dependent_instance != NULL) && !pe_rsc_is_clone(dependent)) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "is not a clone but instance '%s' was requested", id, dependent_id, dependent_instance); return; } else if ((primary_instance != NULL) && !pe_rsc_is_clone(primary)) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "is not a clone but instance '%s' was requested", id, primary_id, primary_instance); return; } if (dependent_instance != NULL) { dependent = find_clone_instance(dependent, dependent_instance); if (dependent == NULL) { pcmk__config_warn("Ignoring constraint '%s' because resource '%s' " "does not have an instance '%s'", id, dependent_id, dependent_instance); return; } } if (primary_instance != NULL) { primary = find_clone_instance(primary, primary_instance); if (primary == NULL) { pcmk__config_warn("Ignoring constraint '%s' because resource '%s' " "does not have an instance '%s'", "'%s'", id, primary_id, primary_instance); return; } } if (pcmk__xe_attr_is_true(xml_obj, XML_CONS_ATTR_SYMMETRICAL)) { pcmk__config_warn("The colocation constraint '" XML_CONS_ATTR_SYMMETRICAL "' attribute has been removed"); } if (score) { score_i = char2score(score); } flags = pcmk__coloc_explicit | unpack_influence(id, dependent, influence_s); pcmk__new_colocation(id, attr, score_i, dependent, primary, dependent_role, primary_role, flags); } // \return Standard Pacemaker return code static int unpack_colocation_tags(xmlNode *xml_obj, xmlNode **expanded_xml, pe_working_set_t *data_set) { const char *id = NULL; const char *dependent_id = NULL; const char *primary_id = NULL; const char *dependent_role = NULL; const char *primary_role = NULL; pe_resource_t *dependent = NULL; pe_resource_t *primary = NULL; pe_tag_t *dependent_tag = NULL; pe_tag_t *primary_tag = NULL; xmlNode *dependent_set = NULL; xmlNode *primary_set = NULL; bool any_sets = false; *expanded_xml = NULL; CRM_CHECK(xml_obj != NULL, return EINVAL); id = ID(xml_obj); if (id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, crm_element_name(xml_obj)); return pcmk_rc_unpack_error; } // Check whether there are any resource sets with template or tag references *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set); if (*expanded_xml != NULL) { crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation"); return pcmk_rc_ok; } dependent_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET); if ((dependent_id == NULL) || (primary_id == NULL)) { return pcmk_rc_ok; } if (!pcmk__valid_resource_or_tag(data_set, dependent_id, &dependent, &dependent_tag)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", id, dependent_id); return pcmk_rc_unpack_error; } if (!pcmk__valid_resource_or_tag(data_set, primary_id, &primary, &primary_tag)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", id, primary_id); return pcmk_rc_unpack_error; } if ((dependent != NULL) && (primary != NULL)) { /* Neither side references any template/tag. */ return pcmk_rc_ok; } if ((dependent_tag != NULL) && (primary_tag != NULL)) { // A colocation constraint between two templates/tags makes no sense pcmk__config_err("Ignoring constraint '%s' because two templates or " "tags cannot be colocated", id); return pcmk_rc_unpack_error; } dependent_role = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); primary_role = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE); *expanded_xml = copy_xml(xml_obj); // Convert dependent's template/tag reference into constraint resource_set if (!pcmk__tag_to_set(*expanded_xml, &dependent_set, XML_COLOC_ATTR_SOURCE, true, data_set)) { free_xml(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_unpack_error; } if (dependent_set != NULL) { if (dependent_role != NULL) { // Move "rsc-role" into converted resource_set as "role" crm_xml_add(dependent_set, "role", dependent_role); xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_SOURCE_ROLE); } any_sets = true; } // Convert primary's template/tag reference into constraint resource_set if (!pcmk__tag_to_set(*expanded_xml, &primary_set, XML_COLOC_ATTR_TARGET, true, data_set)) { free_xml(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_unpack_error; } if (primary_set != NULL) { if (primary_role != NULL) { // Move "with-rsc-role" into converted resource_set as "role" crm_xml_add(primary_set, "role", primary_role); xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_TARGET_ROLE); } any_sets = true; } if (any_sets) { crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation"); } else { free_xml(*expanded_xml); *expanded_xml = NULL; } return pcmk_rc_ok; } /*! * \internal * \brief Parse a colocation constraint from XML into a cluster working set * * \param[in,out] xml_obj Colocation constraint XML to unpack * \param[in,out] data_set Cluster working set to add constraint to */ void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set) { int score_i = 0; xmlNode *set = NULL; xmlNode *last = NULL; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *score = NULL; const char *influence_s = NULL; if (pcmk__str_empty(id)) { pcmk__config_err("Ignoring " XML_CONS_TAG_RSC_DEPEND " without " CRM_ATTR_ID); return; } if (unpack_colocation_tags(xml_obj, &expanded_xml, data_set) != pcmk_rc_ok) { return; } if (expanded_xml != NULL) { orig_xml = xml_obj; xml_obj = expanded_xml; } score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); if (score != NULL) { score_i = char2score(score); } influence_s = crm_element_value(xml_obj, XML_COLOC_ATTR_INFLUENCE); for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL; set = crm_next_same_xml(set)) { set = expand_idref(set, data_set->input); if (set == NULL) { // Configuration error, message already logged if (expanded_xml != NULL) { free_xml(expanded_xml); } return; } if (pcmk__str_empty(ID(set))) { pcmk__config_err("Ignoring " XML_CONS_TAG_RSC_SET " without " CRM_ATTR_ID); continue; } unpack_colocation_set(set, score_i, id, influence_s, data_set); if (last != NULL) { colocate_rsc_sets(id, last, set, score_i, influence_s, data_set); } last = set; } if (expanded_xml) { free_xml(expanded_xml); xml_obj = orig_xml; } if (last == NULL) { unpack_simple_colocation(xml_obj, id, influence_s, data_set); } } /*! * \internal * \brief Make actions of a given type unrunnable for a given resource * * \param[in,out] rsc Resource whose actions should be blocked * \param[in] task Name of action to block * \param[in] reason Unrunnable start action causing the block */ static void mark_action_blocked(pe_resource_t *rsc, const char *task, const pe_resource_t *reason) { GList *iter = NULL; char *reason_text = crm_strdup_printf("colocation with %s", reason->id); for (iter = rsc->actions; iter != NULL; iter = iter->next) { pe_action_t *action = iter->data; if (pcmk_is_set(action->flags, pe_action_runnable) && pcmk__str_eq(action->task, task, pcmk__str_none)) { pe__clear_action_flags(action, pe_action_runnable); pe_action_set_reason(action, reason_text, false); pcmk__block_colocation_dependents(action); pcmk__update_action_for_orderings(action, rsc->cluster); } } // If parent resource can't perform an action, neither can any children for (iter = rsc->children; iter != NULL; iter = iter->next) { mark_action_blocked((pe_resource_t *) (iter->data), task, reason); } free(reason_text); } /*! * \internal * \brief If an action is unrunnable, block any relevant dependent actions * * If a given action is an unrunnable start or promote, block the start or * promote actions of resources colocated with it, as appropriate to the * colocations' configured roles. * * \param[in,out] action Action to check */ void pcmk__block_colocation_dependents(pe_action_t *action) { GList *iter = NULL; GList *colocations = NULL; pe_resource_t *rsc = NULL; bool is_start = false; if (pcmk_is_set(action->flags, pe_action_runnable)) { return; // Only unrunnable actions block dependents } is_start = pcmk__str_eq(action->task, RSC_START, pcmk__str_none); if (!is_start && !pcmk__str_eq(action->task, RSC_PROMOTE, pcmk__str_none)) { return; // Only unrunnable starts and promotes block dependents } CRM_ASSERT(action->rsc != NULL); // Start and promote are resource actions /* If this resource is part of a collective resource, dependents are blocked * only if all instances of the collective are unrunnable, so check the * collective resource. */ rsc = uber_parent(action->rsc); if (rsc->parent != NULL) { rsc = rsc->parent; // Bundle } // Colocation fails only if entire primary can't reach desired role for (iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child = iter->data; pe_action_t *child_action = find_first_action(child->actions, NULL, action->task, NULL); if ((child_action == NULL) || pcmk_is_set(child_action->flags, pe_action_runnable)) { crm_trace("Not blocking %s colocation dependents because " "at least %s has runnable %s", rsc->id, child->id, action->task); return; // At least one child can reach desired role } } crm_trace("Blocking %s colocation dependents due to unrunnable %s %s", rsc->id, action->rsc->id, action->task); // Check each colocation where this resource is primary colocations = pcmk__with_this_colocations(rsc); for (iter = colocations; iter != NULL; iter = iter->next) { pcmk__colocation_t *colocation = iter->data; if (colocation->score < INFINITY) { continue; // Only mandatory colocations block dependent } /* If the primary can't start, the dependent can't reach its colocated * role, regardless of what the primary or dependent colocation role is. * * If the primary can't be promoted, the dependent can't reach its * colocated role if the primary's colocation role is promoted. */ if (!is_start && (colocation->primary_role != RSC_ROLE_PROMOTED)) { continue; } // Block the dependent from reaching its colocated role if (colocation->dependent_role == RSC_ROLE_PROMOTED) { mark_action_blocked(colocation->dependent, RSC_PROMOTE, action->rsc); } else { mark_action_blocked(colocation->dependent, RSC_START, action->rsc); } } g_list_free(colocations); } /*! * \internal * \brief Determine how a colocation constraint should affect a resource * * Colocation constraints have different effects at different points in the * scheduler sequence. Initially, they affect a resource's location; once that * is determined, then for promotable clones they can affect a resource * instance's role; after both are determined, the constraints no longer matter. * Given a specific colocation constraint, check what has been done so far to * determine what should be affected at the current point in the scheduler. * * \param[in] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint * \param[in] preview If true, pretend resources have already been assigned * * \return How colocation constraint should be applied at this point */ enum pcmk__coloc_affects pcmk__colocation_affects(const pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool preview) { if (!preview && pcmk_is_set(primary->flags, pe_rsc_provisional)) { // Primary resource has not been assigned yet, so we can't do anything return pcmk__coloc_affects_nothing; } if ((colocation->dependent_role >= RSC_ROLE_UNPROMOTED) && (dependent->parent != NULL) && pcmk_is_set(dependent->parent->flags, pe_rsc_promotable) && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) { /* This is a colocation by role, and the dependent is a promotable clone * that has already been assigned, so the colocation should now affect * the role. */ return pcmk__coloc_affects_role; } if (!preview && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) { /* The dependent resource has already been through assignment, so the * constraint no longer has any effect. Log an error if a mandatory * colocation constraint has been violated. */ const pe_node_t *primary_node = primary->allocated_to; if (dependent->allocated_to == NULL) { crm_trace("Skipping colocation '%s': %s will not run anywhere", colocation->id, dependent->id); } else if (colocation->score >= INFINITY) { // Dependent resource must colocate with primary resource if (!pe__same_node(primary_node, dependent->allocated_to)) { crm_err("%s must be colocated with %s but is not (%s vs. %s)", dependent->id, primary->id, pe__node_name(dependent->allocated_to), pe__node_name(primary_node)); } } else if (colocation->score <= -CRM_SCORE_INFINITY) { // Dependent resource must anti-colocate with primary resource if (pe__same_node(dependent->allocated_to, primary_node)) { crm_err("%s and %s must be anti-colocated but are assigned " "to the same node (%s)", dependent->id, primary->id, pe__node_name(primary_node)); } } return pcmk__coloc_affects_nothing; } if ((colocation->score > 0) && (colocation->dependent_role != RSC_ROLE_UNKNOWN) && (colocation->dependent_role != dependent->next_role)) { crm_trace("Skipping colocation '%s': dependent limited to %s role " "but %s next role is %s", colocation->id, role2text(colocation->dependent_role), dependent->id, role2text(dependent->next_role)); return pcmk__coloc_affects_nothing; } if ((colocation->score > 0) && (colocation->primary_role != RSC_ROLE_UNKNOWN) && (colocation->primary_role != primary->next_role)) { crm_trace("Skipping colocation '%s': primary limited to %s role " "but %s next role is %s", colocation->id, role2text(colocation->primary_role), primary->id, role2text(primary->next_role)); return pcmk__coloc_affects_nothing; } if ((colocation->score < 0) && (colocation->dependent_role != RSC_ROLE_UNKNOWN) && (colocation->dependent_role == dependent->next_role)) { crm_trace("Skipping anti-colocation '%s': dependent role %s matches", colocation->id, role2text(colocation->dependent_role)); return pcmk__coloc_affects_nothing; } if ((colocation->score < 0) && (colocation->primary_role != RSC_ROLE_UNKNOWN) && (colocation->primary_role == primary->next_role)) { crm_trace("Skipping anti-colocation '%s': primary role %s matches", colocation->id, role2text(colocation->primary_role)); return pcmk__coloc_affects_nothing; } return pcmk__coloc_affects_location; } /*! * \internal * \brief Apply colocation to dependent for assignment purposes * * Update the allowed node scores of the dependent resource in a colocation, * for the purposes of assigning it to a node. * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint */ void pcmk__apply_coloc_to_scores(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation) { const char *attribute = colocation->node_attribute; const char *value = NULL; GHashTable *work = NULL; GHashTableIter iter; pe_node_t *node = NULL; if (primary->allocated_to != NULL) { value = pe_node_attribute_raw(primary->allocated_to, attribute); } else if (colocation->score < 0) { // Nothing to do (anti-colocation with something that is not running) return; } work = pcmk__copy_node_table(dependent->allowed_nodes); g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (primary->allocated_to == NULL) { node->weight = pcmk__add_scores(-colocation->score, node->weight); pe_rsc_trace(dependent, "Applied %s to %s score on %s (now %s after " "subtracting %s because primary %s inactive)", colocation->id, dependent->id, pe__node_name(node), pcmk_readable_score(node->weight), pcmk_readable_score(colocation->score), primary->id); } else if (pcmk__str_eq(pe_node_attribute_raw(node, attribute), value, pcmk__str_casei)) { /* Add colocation score only if optional (or minus infinity). A * mandatory colocation is a requirement rather than a preference, * so we don't need to consider it for relative assignment purposes. * The resource will simply be forbidden from running on the node if * the primary isn't active there (via the condition above). */ if (colocation->score < CRM_SCORE_INFINITY) { node->weight = pcmk__add_scores(colocation->score, node->weight); pe_rsc_trace(dependent, "Applied %s to %s score on %s (now %s after " "adding %s)", colocation->id, dependent->id, pe__node_name(node), pcmk_readable_score(node->weight), pcmk_readable_score(colocation->score)); } } else if (colocation->score >= CRM_SCORE_INFINITY) { /* Only mandatory colocations are relevant when the colocation * attribute doesn't match, because an attribute not matching is not * a negative preference -- the colocation is simply relevant only * where it matches. */ node->weight = -CRM_SCORE_INFINITY; pe_rsc_trace(dependent, "Banned %s from %s because colocation %s attribute %s " "does not match", dependent->id, pe__node_name(node), colocation->id, attribute); } } if ((colocation->score <= -INFINITY) || (colocation->score >= INFINITY) || pcmk__any_node_available(work)) { g_hash_table_destroy(dependent->allowed_nodes); dependent->allowed_nodes = work; work = NULL; } else { pe_rsc_info(dependent, "%s: Rolling back scores from %s (no available nodes)", dependent->id, primary->id); } if (work != NULL) { g_hash_table_destroy(work); } } /*! * \internal * \brief Apply colocation to dependent for role purposes * * Update the priority of the dependent resource in a colocation, for the * purposes of selecting its role * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint */ void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation) { const char *dependent_value = NULL; const char *primary_value = NULL; const char *attribute = colocation->node_attribute; int score_multiplier = 1; if ((primary->allocated_to == NULL) || (dependent->allocated_to == NULL)) { return; } dependent_value = pe_node_attribute_raw(dependent->allocated_to, attribute); primary_value = pe_node_attribute_raw(primary->allocated_to, attribute); if (!pcmk__str_eq(dependent_value, primary_value, pcmk__str_casei)) { if ((colocation->score == INFINITY) && (colocation->dependent_role == RSC_ROLE_PROMOTED)) { dependent->priority = -INFINITY; } return; } if ((colocation->primary_role != RSC_ROLE_UNKNOWN) && (colocation->primary_role != primary->next_role)) { return; } if (colocation->dependent_role == RSC_ROLE_UNPROMOTED) { score_multiplier = -1; } dependent->priority = pcmk__add_scores(score_multiplier * colocation->score, dependent->priority); pe_rsc_trace(dependent, "Applied %s to %s promotion priority (now %s after %s %s)", colocation->id, dependent->id, pcmk_readable_score(dependent->priority), ((score_multiplier == 1)? "adding" : "subtracting"), pcmk_readable_score(colocation->score)); } /*! * \internal * \brief Find score of highest-scored node that matches colocation attribute * * \param[in] rsc Resource whose allowed nodes should be searched * \param[in] attr Colocation attribute name (must not be NULL) * \param[in] value Colocation attribute value to require */ static int best_node_score_matching_attr(const pe_resource_t *rsc, const char *attr, const char *value) { GHashTableIter iter; pe_node_t *node = NULL; int best_score = -INFINITY; const char *best_node = NULL; // Find best allowed node with matching attribute g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if ((node->weight > best_score) && pcmk__node_available(node, false, false) && pcmk__str_eq(value, pe_node_attribute_raw(node, attr), pcmk__str_casei)) { best_score = node->weight; best_node = node->details->uname; } } if (!pcmk__str_eq(attr, CRM_ATTR_UNAME, pcmk__str_none)) { if (best_node == NULL) { crm_info("No allowed node for %s matches node attribute %s=%s", rsc->id, attr, value); } else { crm_info("Allowed node %s for %s had best score (%d) " "of those matching node attribute %s=%s", best_node, rsc->id, best_score, attr, value); } } return best_score; } /*! * \internal * \brief Check whether a resource is allowed only on a single node * * \param[in] rsc Resource to check * * \return \c true if \p rsc is allowed only on one node, otherwise \c false */ static bool allowed_on_one(const pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *allowed_node = NULL; int allowed_nodes = 0; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &allowed_node)) { if ((allowed_node->weight >= 0) && (++allowed_nodes > 1)) { pe_rsc_trace(rsc, "%s is allowed on multiple nodes", rsc->id); return false; } } pe_rsc_trace(rsc, "%s is allowed %s", rsc->id, ((allowed_nodes == 1)? "on a single node" : "nowhere")); return (allowed_nodes == 1); } /*! * \internal * \brief Add resource's colocation matches to current node assignment scores * * For each node in a given table, if any of a given resource's allowed nodes * have a matching value for the colocation attribute, add the highest of those * nodes' scores to the node's score. * * \param[in,out] nodes Table of nodes with assignment scores so far * \param[in] rsc Resource whose allowed nodes should be compared * \param[in] colocation Original colocation constraint (used to get * configured primary resource's stickiness, and * to get colocation node attribute; pass NULL to * ignore stickiness and use default attribute) * \param[in] factor Factor by which to multiply scores being added * \param[in] only_positive Whether to add only positive scores */ static void add_node_scores_matching_attr(GHashTable *nodes, const pe_resource_t *rsc, pcmk__colocation_t *colocation, float factor, bool only_positive) { GHashTableIter iter; pe_node_t *node = NULL; const char *attr = colocation->node_attribute; // Iterate through each node g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { float delta_f = 0; int delta = 0; int score = 0; int new_score = 0; const char *value = pe_node_attribute_raw(node, attr); score = best_node_score_matching_attr(rsc, attr, value); if ((factor < 0) && (score < 0)) { /* If the dependent is anti-colocated, we generally don't want the * primary to prefer nodes that the dependent avoids. That could * lead to unnecessary shuffling of the primary when the dependent * hits its migration threshold somewhere, for example. * * However, there are cases when it is desirable. If the dependent * can't run anywhere but where the primary is, it would be * worthwhile to move the primary for the sake of keeping the * dependent active. * * We can't know that exactly at this point since we don't know * where the primary will be assigned, but we can limit considering * the preference to when the dependent is allowed only on one node. * This is less than ideal for multiple reasons: * * - the dependent could be allowed on more than one node but have * anti-colocation primaries on each; * - the dependent could be a clone or bundle with multiple * instances, and the dependent as a whole is allowed on multiple * nodes but some instance still can't run * - the dependent has considered node-specific criteria such as * location constraints and stickiness by this point, but might * have other factors that end up disallowing a node * * but the alternative is making the primary move when it doesn't * need to. * * We also consider the primary's stickiness and influence, so the * user has some say in the matter. (This is the configured primary, * not a particular instance of the primary, but that doesn't matter * unless stickiness uses a rule to vary by node, and that seems * acceptable to ignore.) */ if ((colocation->primary->stickiness >= -score) || !pcmk__colocation_has_influence(colocation, NULL) || !allowed_on_one(colocation->dependent)) { crm_trace("%s: Filtering %d + %f * %d " "(double negative disallowed)", pe__node_name(node), node->weight, factor, score); continue; } } if (node->weight == INFINITY_HACK) { crm_trace("%s: Filtering %d + %f * %d (node was marked unusable)", pe__node_name(node), node->weight, factor, score); continue; } delta_f = factor * score; // Round the number; see http://c-faq.com/fp/round.html delta = (int) ((delta_f < 0)? (delta_f - 0.5) : (delta_f + 0.5)); /* Small factors can obliterate the small scores that are often actually * used in configurations. If the score and factor are nonzero, ensure * that the result is nonzero as well. */ if ((delta == 0) && (score != 0)) { if (factor > 0.0) { delta = 1; } else if (factor < 0.0) { delta = -1; } } new_score = pcmk__add_scores(delta, node->weight); if (only_positive && (new_score < 0) && (node->weight > 0)) { crm_trace("%s: Filtering %d + %f * %d = %d " "(negative disallowed, marking node unusable)", pe__node_name(node), node->weight, factor, score, new_score); node->weight = INFINITY_HACK; continue; } if (only_positive && (new_score < 0) && (node->weight == 0)) { crm_trace("%s: Filtering %d + %f * %d = %d (negative disallowed)", pe__node_name(node), node->weight, factor, score, new_score); continue; } crm_trace("%s: %d + %f * %d = %d", pe__node_name(node), node->weight, factor, score, new_score); node->weight = new_score; } } /*! * \internal * \brief Update nodes with scores of colocated resources' nodes * * Given a table of nodes and a resource, update the nodes' scores with the * scores of the best nodes matching the attribute used for each of the * resource's relevant colocations. * * \param[in,out] rsc Resource to check colocations for * \param[in] log_id Resource ID for logs (if NULL, use \p rsc ID) * \param[in,out] nodes Nodes to update (set initial contents to NULL * to copy \p rsc's allowed nodes) * \param[in] colocation Original colocation constraint (used to get * configured primary resource's stickiness, and * to get colocation node attribute; if NULL, * \p rsc's own matching node scores will not be * added, and *nodes must be NULL as well) * \param[in] factor Incorporate scores multiplied by this factor * \param[in] flags Bitmask of enum pcmk__coloc_select values * * \note NULL *nodes, NULL colocation, and the pcmk__coloc_select_this_with * flag are used together (and only by cmp_resources()). * \note The caller remains responsible for freeing \p *nodes. */ void pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id, GHashTable **nodes, pcmk__colocation_t *colocation, float factor, uint32_t flags) { GHashTable *work = NULL; CRM_ASSERT((rsc != NULL) && (nodes != NULL) && ((colocation != NULL) || (*nodes == NULL))); if (log_id == NULL) { log_id = rsc->id; } // Avoid infinite recursion if (pcmk_is_set(rsc->flags, pe_rsc_merging)) { pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", log_id, rsc->id); return; } pe__set_resource_flags(rsc, pe_rsc_merging); if (*nodes == NULL) { work = pcmk__copy_node_table(rsc->allowed_nodes); } else { const bool pos = pcmk_is_set(flags, pcmk__coloc_select_nonnegative); pe_rsc_trace(rsc, "%s: Merging %s scores from %s (at %.6f)", log_id, (pos? "positive" : "all"), rsc->id, factor); work = pcmk__copy_node_table(*nodes); add_node_scores_matching_attr(work, rsc, colocation, factor, pos); } if (work == NULL) { pe__clear_resource_flags(rsc, pe_rsc_merging); return; } if (pcmk__any_node_available(work)) { GList *colocations = NULL; if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) { colocations = pcmk__this_with_colocations(rsc); pe_rsc_trace(rsc, "Checking additional %d optional '%s with' " "constraints", g_list_length(colocations), rsc->id); } else { colocations = pcmk__with_this_colocations(rsc); pe_rsc_trace(rsc, "Checking additional %d optional 'with %s' " "constraints", g_list_length(colocations), rsc->id); } flags |= pcmk__coloc_select_active; for (GList *iter = colocations; iter != NULL; iter = iter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) iter->data; pe_resource_t *other = NULL; float other_factor = factor * constraint->score / (float) INFINITY; if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) { other = constraint->primary; } else if (!pcmk__colocation_has_influence(constraint, NULL)) { continue; } else { other = constraint->dependent; } pe_rsc_trace(rsc, "Optionally merging score of '%s' constraint " "(%s with %s)", constraint->id, constraint->dependent->id, constraint->primary->id); other->cmds->add_colocated_node_scores(other, log_id, &work, constraint, other_factor, flags); pe__show_node_scores(true, NULL, log_id, work, rsc->cluster); } g_list_free(colocations); } else if (pcmk_is_set(flags, pcmk__coloc_select_active)) { pe_rsc_info(rsc, "%s: Rolling back optional scores from %s", log_id, rsc->id); g_hash_table_destroy(work); pe__clear_resource_flags(rsc, pe_rsc_merging); return; } if (pcmk_is_set(flags, pcmk__coloc_select_nonnegative)) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->weight == INFINITY_HACK) { node->weight = 1; } } } if (*nodes != NULL) { g_hash_table_destroy(*nodes); } *nodes = work; pe__clear_resource_flags(rsc, pe_rsc_merging); } /*! * \internal * \brief Apply a "with this" colocation to a resource's allowed node scores * * \param[in,out] data Colocation to apply * \param[in,out] user_data Resource being assigned */ void pcmk__add_dependent_scores(gpointer data, gpointer user_data) { pcmk__colocation_t *colocation = (pcmk__colocation_t *) data; pe_resource_t *rsc = (pe_resource_t *) user_data; pe_resource_t *other = colocation->dependent; const float factor = colocation->score / (float) INFINITY; uint32_t flags = pcmk__coloc_select_active; if (!pcmk__colocation_has_influence(colocation, NULL)) { return; } if (rsc->variant == pe_clone) { flags |= pcmk__coloc_select_nonnegative; } pe_rsc_trace(rsc, "%s: Incorporating attenuated %s assignment scores due " "to colocation %s", rsc->id, other->id, colocation->id); other->cmds->add_colocated_node_scores(other, rsc->id, &rsc->allowed_nodes, colocation, factor, flags); } +/*! + * \internal + * \brief Exclude nodes from a dependent's node table if not in a given list + * + * Given a dependent resource in a colocation and a list of nodes where the + * primary resource will run, set a node's score to \c -INFINITY in the + * dependent's node table if not found in the primary nodes list. + * + * \param[in,out] dependent Dependent resource + * \param[in] primary Primary resource (for logging only) + * \param[in] colocation Colocation constraint (for logging only) + * \param[in] primary_nodes List of nodes where the primary will have + * unblocked instances in a suitable role + * \param[in] merge_scores If \c true and a node is found in both \p table + * and \p list, add the node's score in \p list to + * the node's score in \p table + */ +void +pcmk__colocation_intersect_nodes(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + const GList *primary_nodes, bool merge_scores) +{ + GHashTableIter iter; + pe_node_t *dependent_node = NULL; + + CRM_ASSERT((dependent != NULL) && (primary != NULL) + && (colocation != NULL)); + + g_hash_table_iter_init(&iter, dependent->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &dependent_node)) { + const pe_node_t *primary_node = NULL; + + primary_node = pe_find_node_id(primary_nodes, + dependent_node->details->id); + if (primary_node == NULL) { + dependent_node->weight = -INFINITY; + pe_rsc_trace(dependent, + "Banning %s from %s (no primary instance) for %s", + dependent->id, pe__node_name(dependent_node), + colocation->id); + + } else if (merge_scores) { + dependent_node->weight = pcmk__add_scores(dependent_node->weight, + primary_node->weight); + pe_rsc_trace(dependent, + "Added %s's score %s to %s's score for %s (now %s) " + "for colocation %s", + primary->id, pcmk_readable_score(primary_node->weight), + dependent->id, pe__node_name(dependent_node), + pcmk_readable_score(dependent_node->weight), + colocation->id); + } + } +} + /*! * \internal * \brief Get all colocations affecting a resource as the primary * * \param[in] rsc Resource to get colocations for * * \return Newly allocated list of colocations affecting \p rsc as primary * * \note This is a convenience wrapper for the with_this_colocations() method. */ GList * pcmk__with_this_colocations(const pe_resource_t *rsc) { GList *list = NULL; rsc->cmds->with_this_colocations(rsc, rsc, &list); return list; } /*! * \internal * \brief Get all colocations affecting a resource as the dependent * * \param[in] rsc Resource to get colocations for * * \return Newly allocated list of colocations affecting \p rsc as dependent * * \note This is a convenience wrapper for the this_with_colocations() method. */ GList * pcmk__this_with_colocations(const pe_resource_t *rsc) { GList *list = NULL; rsc->cmds->this_with_colocations(rsc, rsc, &list); return list; } diff --git a/lib/pacemaker/pcmk_sched_fencing.c b/lib/pacemaker/pcmk_sched_fencing.c index a62b8e95f7..7caa2960a4 100644 --- a/lib/pacemaker/pcmk_sched_fencing.c +++ b/lib/pacemaker/pcmk_sched_fencing.c @@ -1,494 +1,494 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Check whether a resource is known on a particular node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return TRUE if resource (or parent if an anonymous clone) is known */ static bool rsc_is_known_on(const pe_resource_t *rsc, const pe_node_t *node) { - if (pe_hash_table_lookup(rsc->known_on, node->details->id)) { + if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) { return TRUE; } else if ((rsc->variant == pe_native) && pe_rsc_is_anon_clone(rsc->parent) - && pe_hash_table_lookup(rsc->parent->known_on, - node->details->id)) { + && (g_hash_table_lookup(rsc->parent->known_on, + node->details->id) != NULL)) { /* We check only the parent, not the uber-parent, because we cannot * assume that the resource is known if it is in an anonymously cloned * group (which may be only partially known). */ return TRUE; } return FALSE; } /*! * \internal * \brief Order a resource's start and promote actions relative to fencing * * \param[in,out] rsc Resource to be ordered * \param[in,out] stonith_op Fence action */ static void order_start_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op) { pe_node_t *target; CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) { pe_action_t *action = iter->data; switch (action->needs) { case rsc_req_nothing: // Anything other than start or promote requires nothing break; case rsc_req_stonith: order_actions(stonith_op, action, pe_order_optional); break; case rsc_req_quorum: if (pcmk__str_eq(action->task, RSC_START, pcmk__str_none) - && pe_hash_table_lookup(rsc->allowed_nodes, - target->details->id) + && (g_hash_table_lookup(rsc->allowed_nodes, + target->details->id) != NULL) && !rsc_is_known_on(rsc, target)) { /* If we don't know the status of the resource on the node * we're about to shoot, we have to assume it may be active * there. Order the resource start after the fencing. This * is analogous to waiting for all the probes for a resource * to complete before starting it. * * The most likely explanation is that the DC died and took * its status with it. */ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, pe__node_name(target)); order_actions(stonith_op, action, pe_order_optional | pe_order_runnable_left); } break; } } } /*! * \internal * \brief Order a resource's stop and demote actions relative to fencing * * \param[in,out] rsc Resource to be ordered * \param[in,out] stonith_op Fence action */ static void order_stop_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op) { GList *iter = NULL; GList *action_list = NULL; bool order_implicit = false; pe_resource_t *top = uber_parent(rsc); pe_action_t *parent_stop = NULL; pe_node_t *target; CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; /* Get a list of stop actions potentially implied by the fencing */ action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE); /* If resource requires fencing, implicit actions must occur after fencing. * * Implied stops and demotes of resources running on guest nodes are always * ordered after fencing, even if the resource does not require fencing, * because guest node "fencing" is actually just a resource stop. */ if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing) || pe__is_guest_node(target)) { order_implicit = true; } if (action_list && order_implicit) { parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); } for (iter = action_list; iter != NULL; iter = iter->next) { pe_action_t *action = iter->data; // The stop would never complete, so convert it into a pseudo-action. pe__set_action_flags(action, pe_action_pseudo|pe_action_runnable); if (order_implicit) { pe__set_action_flags(action, pe_action_implied_by_stonith); /* Order the stonith before the parent stop (if any). * * Also order the stonith before the resource stop, unless the * resource is inside a bundle -- that would cause a graph loop. * We can rely on the parent stop's ordering instead. * * User constraints must not order a resource in a guest node * relative to the guest node container resource. The * pe_order_preserve flag marks constraints as generated by the * cluster and thus immune to that check (and is irrelevant if * target is not a guest). */ if (!pe_rsc_is_bundled(rsc)) { order_actions(stonith_op, action, pe_order_preserve); } order_actions(stonith_op, parent_stop, pe_order_preserve); } if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Stop of failed resource %s is implicit %s %s is fenced", rsc->id, (order_implicit? "after" : "because"), pe__node_name(target)); } else { crm_info("%s is implicit %s %s is fenced", action->uuid, (order_implicit? "after" : "because"), pe__node_name(target)); } if (pcmk_is_set(rsc->flags, pe_rsc_notify)) { pe__order_notifs_after_fencing(action, rsc, stonith_op); } #if 0 /* It might be a good idea to stop healthy resources on a node about to * be fenced, when possible. * * However, fencing must be done before a failed resource's * (pseudo-)stop action, so that could create a loop. For example, given * a group of A and B running on node N with a failed stop of B: * * fence N -> stop B (pseudo-op) -> stop A -> fence N * * The block below creates the stop A -> fence N ordering and therefore * must (at least for now) be disabled. Instead, run the block above and * treat all resources on N as B would be (i.e., as a pseudo-op after * the fencing). * * @TODO Maybe break the "A requires B" dependency in * pcmk__update_action_for_orderings() and use this block for healthy * resources instead of the above. */ crm_info("Moving healthy resource %s off %s before fencing", rsc->id, pe__node_name(node)); pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, strdup(CRM_OP_FENCE), stonith_op, pe_order_optional, rsc->cluster); #endif } g_list_free(action_list); /* Get a list of demote actions potentially implied by the fencing */ action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE); for (iter = action_list; iter != NULL; iter = iter->next) { pe_action_t *action = iter->data; if (!(action->node->details->online) || action->node->details->unclean || pcmk_is_set(rsc->flags, pe_rsc_failed)) { if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_info(rsc, "Demote of failed resource %s is implicit " "after %s is fenced", rsc->id, pe__node_name(target)); } else { pe_rsc_info(rsc, "%s is implicit after %s is fenced", action->uuid, pe__node_name(target)); } /* The demote would never complete and is now implied by the * fencing, so convert it into a pseudo-action. */ pe__set_action_flags(action, pe_action_pseudo|pe_action_runnable); if (pe_rsc_is_bundled(rsc)) { // Recovery will be ordered as usual after parent's implied stop } else if (order_implicit) { order_actions(stonith_op, action, pe_order_preserve|pe_order_optional); } } } g_list_free(action_list); } /*! * \internal * \brief Order resource actions properly relative to fencing * * \param[in,out] rsc Resource whose actions should be ordered * \param[in,out] stonith_op Fencing operation to be ordered against */ static void rsc_stonith_ordering(pe_resource_t *rsc, pe_action_t *stonith_op) { if (rsc->children) { for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child_rsc = iter->data; rsc_stonith_ordering(child_rsc, stonith_op); } } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); } else { order_start_vs_fencing(rsc, stonith_op); order_stop_vs_fencing(rsc, stonith_op); } } /*! * \internal * \brief Order all actions appropriately relative to a fencing operation * * Ensure start operations of affected resources are ordered after fencing, * imply stop and demote operations of affected resources by marking them as * pseudo-actions, etc. * * \param[in,out] stonith_op Fencing operation * \param[in,out] data_set Working set of cluster */ void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set) { CRM_CHECK(stonith_op && data_set, return); for (GList *r = data_set->resources; r != NULL; r = r->next) { rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op); } } /*! * \internal * \brief Order an action after unfencing * * \param[in] rsc Resource that action is for * \param[in,out] node Node that action is on * \param[in,out] action Action to be ordered after unfencing * \param[in] order Ordering flags */ void pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order) { /* When unfencing is in use, we order unfence actions before any probe or * start of resources that require unfencing, and also of fence devices. * * This might seem to violate the principle that fence devices require * only quorum. However, fence agents that unfence often don't have enough * information to even probe or start unless the node is first unfenced. */ if ((pcmk_is_set(rsc->flags, pe_rsc_fence_device) && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing)) || pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) { /* Start with an optional ordering. Requiring unfencing would result in * the node being unfenced, and all its resources being stopped, * whenever a new resource is added -- which would be highly suboptimal. */ pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, node->details->data_set); order_actions(unfence, action, order); if (!pcmk__node_unfenced(node)) { // But unfencing is required if it has never been done char *reason = crm_strdup_printf("required by %s %s", rsc->id, action->task); trigger_unfencing(NULL, node, reason, NULL, node->details->data_set); free(reason); } } } /*! * \internal * \brief Create pseudo-op for guest node fence, and order relative to it * * \param[in,out] node Guest node to fence */ void pcmk__fence_guest(pe_node_t *node) { pe_resource_t *container = NULL; pe_action_t *stop = NULL; pe_action_t *stonith_op = NULL; /* The fence action is just a label; we don't do anything differently for * off vs. reboot. We specify it explicitly, rather than let it default to * cluster's default action, because we are not _initiating_ fencing -- we * are creating a pseudo-event to describe fencing that is already occurring * by other means (container recovery). */ const char *fence_action = "off"; CRM_ASSERT(node != NULL); /* Check whether guest's container resource has any explicit stop or * start (the stop may be implied by fencing of the guest's host). */ container = node->details->remote_rsc->container; if (container) { stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL); if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) { fence_action = "reboot"; } } /* Create a fence pseudo-event, so we have an event to order actions * against, and the controller can always detect it. */ stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, node->details->data_set); pe__set_action_flags(stonith_op, pe_action_pseudo|pe_action_runnable); /* We want to imply stops/demotes after the guest is stopped, not wait until * it is restarted, so we always order pseudo-fencing after stop, not start * (even though start might be closer to what is done for a real reboot). */ if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) { pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, node->details->data_set); crm_info("Implying guest %s is down (action %d) after %s fencing", pe__node_name(node), stonith_op->id, pe__node_name(stop->node)); order_actions(parent_stonith_op, stonith_op, pe_order_runnable_left|pe_order_implies_then); } else if (stop) { order_actions(stop, stonith_op, pe_order_runnable_left|pe_order_implies_then); crm_info("Implying guest %s is down (action %d) " "after container %s is stopped (action %d)", pe__node_name(node), stonith_op->id, container->id, stop->id); } else { /* If we're fencing the guest node but there's no stop for the guest * resource, we must think the guest is already stopped. However, we may * think so because its resource history was just cleaned. To avoid * unnecessarily considering the guest node down if it's really up, * order the pseudo-fencing after any stop of the connection resource, * which will be ordered after any container (re-)probe. */ stop = find_first_action(node->details->remote_rsc->actions, NULL, RSC_STOP, NULL); if (stop) { order_actions(stop, stonith_op, pe_order_optional); crm_info("Implying guest %s is down (action %d) " "after connection is stopped (action %d)", pe__node_name(node), stonith_op->id, stop->id); } else { /* Not sure why we're fencing, but everything must already be * cleanly stopped. */ crm_info("Implying guest %s is down (action %d) ", pe__node_name(node), stonith_op->id); } } // Order/imply other actions relative to pseudo-fence as with real fence pcmk__order_vs_fence(stonith_op, node->details->data_set); } /*! * \internal * \brief Check whether node has already been unfenced * * \param[in] node Node to check * * \return true if node has a nonzero #node-unfenced attribute (or none), * otherwise false */ bool pcmk__node_unfenced(const pe_node_t *node) { const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED); return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches); } /*! * \internal * \brief Order a resource's start and stop relative to unfencing of a node * * \param[in,out] data Node that could be unfenced * \param[in,out] user_data Resource to order */ void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data) { pe_node_t *node = (pe_node_t *) data; pe_resource_t *rsc = (pe_resource_t *) user_data; pe_action_t *unfence = pe_fence_op(node, "on", true, NULL, false, rsc->cluster); crm_debug("Ordering any stops of %s before %s, and any starts after", rsc->id, unfence->uuid); /* * It would be more efficient to order clone resources once, * rather than order each instance, but ordering the instance * allows us to avoid unnecessary dependencies that might conflict * with user constraints. * * @TODO: This constraint can still produce a transition loop if the * resource has a stop scheduled on the node being unfenced, and * there is a user ordering constraint to start some other resource * (which will be ordered after the unfence) before stopping this * resource. An example is "start some slow-starting cloned service * before stopping an associated virtual IP that may be moving to * it": * stop this -> unfencing -> start that -> stop this */ pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, strdup(unfence->uuid), unfence, pe_order_optional|pe_order_same_node, rsc->cluster); pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence, rsc, start_key(rsc), NULL, pe_order_implies_then_on_node|pe_order_same_node, rsc->cluster); } diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c index b2a2e6dca7..8e23290d9d 100644 --- a/lib/pacemaker/pcmk_sched_instances.c +++ b/lib/pacemaker/pcmk_sched_instances.c @@ -1,1656 +1,1657 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ /* This file is intended for code usable with both clone instances and bundle * replica containers. */ #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Check whether a clone or bundle has instances for all available nodes * * \param[in] collective Clone or bundle to check * * \return true if \p collective has enough instances for all of its available * allowed nodes, otherwise false */ static bool can_run_everywhere(const pe_resource_t *collective) { GHashTableIter iter; pe_node_t *node = NULL; int available_nodes = 0; int max_instances = 0; switch (collective->variant) { case pe_clone: max_instances = pe__clone_max(collective); break; case pe_container: max_instances = pe__bundle_max(collective); break; default: return false; // Not actually possible } g_hash_table_iter_init(&iter, collective->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__node_available(node, false, false) && (max_instances < ++available_nodes)) { return false; } } return true; } /*! * \internal * \brief Check whether a node is allowed to run an instance * * \param[in] instance Clone instance or bundle container to check * \param[in] node Node to check * \param[in] max_per_node Maximum number of instances allowed to run on a node * * \return true if \p node is allowed to run \p instance, otherwise false */ static bool can_run_instance(const pe_resource_t *instance, const pe_node_t *node, int max_per_node) { pe_node_t *allowed_node = NULL; if (pcmk_is_set(instance->flags, pe_rsc_orphan)) { pe_rsc_trace(instance, "%s cannot run on %s: orphaned", instance->id, pe__node_name(node)); return false; } if (!pcmk__node_available(node, false, false)) { pe_rsc_trace(instance, "%s cannot run on %s: node cannot run resources", instance->id, pe__node_name(node)); return false; } allowed_node = pcmk__top_allowed_node(instance, node); if (allowed_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", instance->id, pe__node_name(node)); return false; } if (allowed_node->weight < 0) { pe_rsc_trace(instance, "%s cannot run on %s: parent score is %s there", instance->id, pe__node_name(node), pcmk_readable_score(allowed_node->weight)); return false; } if (allowed_node->count >= max_per_node) { pe_rsc_trace(instance, "%s cannot run on %s: node already has %d instance%s", instance->id, pe__node_name(node), max_per_node, pcmk__plural_s(max_per_node)); return false; } pe_rsc_trace(instance, "%s can run on %s (%d already running)", instance->id, pe__node_name(node), allowed_node->count); return true; } /*! * \internal * \brief Ban a clone instance or bundle replica from unavailable allowed nodes * * \param[in,out] instance Clone instance or bundle replica to ban * \param[in] max_per_node Maximum instances allowed to run on a node */ static void ban_unavailable_allowed_nodes(pe_resource_t *instance, int max_per_node) { if (instance->allowed_nodes != NULL) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, instance->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if (!can_run_instance(instance, node, max_per_node)) { pe_rsc_trace(instance, "Banning %s from unavailable node %s", instance->id, pe__node_name(node)); node->weight = -INFINITY; for (GList *child_iter = instance->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; pe_node_t *child_node = NULL; - child_node = pe_hash_table_lookup(child->allowed_nodes, - node->details->id); + child_node = g_hash_table_lookup(child->allowed_nodes, + node->details->id); if (child_node != NULL) { pe_rsc_trace(instance, "Banning %s child %s " "from unavailable node %s", instance->id, child->id, pe__node_name(node)); child_node->weight = -INFINITY; } } } } } } /*! * \internal * \brief Create a hash table with a single node in it * * \param[in] node Node to copy into new table * * \return Newly created hash table containing a copy of \p node * \note The caller is responsible for freeing the result with * g_hash_table_destroy(). */ static GHashTable * new_node_table(pe_node_t *node) { GHashTable *table = pcmk__strkey_table(NULL, free); node = pe__copy_node(node); g_hash_table_insert(table, (gpointer) node->details->id, node); return table; } /*! * \internal * \brief Apply a resource's parent's colocation scores to a node table * * \param[in] rsc Resource whose colocations should be applied * \param[in,out] nodes Node table to apply colocations to */ static void apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes) { GList *iter = NULL; pcmk__colocation_t *colocation = NULL; pe_resource_t *other = NULL; float factor = 0.0; /* Because the this_with_colocations() and with_this_colocations() methods * boil down to copies of rsc_cons and rsc_cons_lhs for clones and bundles, * we can use those here directly for efficiency. */ for (iter = rsc->parent->rsc_cons; iter != NULL; iter = iter->next) { colocation = (pcmk__colocation_t *) iter->data; other = colocation->primary; factor = colocation->score / (float) INFINITY, other->cmds->add_colocated_node_scores(other, rsc->id, nodes, colocation, factor, pcmk__coloc_select_default); } for (iter = rsc->parent->rsc_cons_lhs; iter != NULL; iter = iter->next) { colocation = (pcmk__colocation_t *) iter->data; if (!pcmk__colocation_has_influence(colocation, rsc)) { continue; } other = colocation->dependent; factor = colocation->score / (float) INFINITY, other->cmds->add_colocated_node_scores(other, rsc->id, nodes, colocation, factor, pcmk__coloc_select_nonnegative); } } /*! * \internal * \brief Compare clone or bundle instances based on colocation scores * * Determine the relative order in which two clone or bundle instances should be * assigned to nodes, considering the scores of colocation constraints directly * or indirectly involving them. * * \param[in] instance1 First instance to compare * \param[in] instance2 Second instance to compare * * \return A negative number if \p instance1 should be assigned first, * a positive number if \p instance2 should be assigned first, * or 0 if assignment order doesn't matter */ static int cmp_instance_by_colocation(const pe_resource_t *instance1, const pe_resource_t *instance2) { int rc = 0; pe_node_t *node1 = NULL; pe_node_t *node2 = NULL; pe_node_t *current_node1 = pe__current_node(instance1); pe_node_t *current_node2 = pe__current_node(instance2); GHashTable *colocated_scores1 = NULL; GHashTable *colocated_scores2 = NULL; CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL) && (instance2 != NULL) && (instance2->parent != NULL) && (current_node1 != NULL) && (current_node2 != NULL)); // Create node tables initialized with each node colocated_scores1 = new_node_table(current_node1); colocated_scores2 = new_node_table(current_node2); // Apply parental colocations apply_parent_colocations(instance1, &colocated_scores1); apply_parent_colocations(instance2, &colocated_scores2); // Find original nodes again, with scores updated for colocations node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id); node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id); // Compare nodes by updated scores if (node1->weight < node2->weight) { crm_trace("Assign %s (%d on %s) after %s (%d on %s)", instance1->id, node1->weight, pe__node_name(node1), instance2->id, node2->weight, pe__node_name(node2)); rc = 1; } else if (node1->weight > node2->weight) { crm_trace("Assign %s (%d on %s) before %s (%d on %s)", instance1->id, node1->weight, pe__node_name(node1), instance2->id, node2->weight, pe__node_name(node2)); rc = -1; } g_hash_table_destroy(colocated_scores1); g_hash_table_destroy(colocated_scores2); return rc; } /*! * \internal * \brief Check whether a resource or any of its children are failed * * \param[in] rsc Resource to check * * \return true if \p rsc or any of its children are failed, otherwise false */ static bool did_fail(const pe_resource_t *rsc) { if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { return true; } for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { if (did_fail((const pe_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether a node is allowed to run a resource * * \param[in] rsc Resource to check * \param[in,out] node Node to check (will be set NULL if not allowed) * * \return true if *node is either NULL or allowed for \p rsc, otherwise false */ static bool node_is_allowed(const pe_resource_t *rsc, pe_node_t **node) { if (*node != NULL) { - pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes, - (*node)->details->id); + pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, + (*node)->details->id); + if ((allowed == NULL) || (allowed->weight < 0)) { pe_rsc_trace(rsc, "%s: current location (%s) is unavailable", rsc->id, pe__node_name(*node)); *node = NULL; return false; } } return true; } /*! * \internal * \brief Compare two clone or bundle instances' instance numbers * * \param[in] a First instance to compare * \param[in] b Second instance to compare * * \return A negative number if \p a's instance number is lower, * a positive number if \p b's instance number is lower, * or 0 if their instance numbers are the same */ gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b) { const pe_resource_t *instance1 = (const pe_resource_t *) a; const pe_resource_t *instance2 = (const pe_resource_t *) b; char *div1 = NULL; char *div2 = NULL; CRM_ASSERT((instance1 != NULL) && (instance2 != NULL)); // Clone numbers are after a colon, bundle numbers after a dash div1 = strrchr(instance1->id, ':'); if (div1 == NULL) { div1 = strrchr(instance1->id, '-'); } div2 = strrchr(instance2->id, ':'); if (div2 == NULL) { div2 = strrchr(instance2->id, '-'); } CRM_ASSERT((div1 != NULL) && (div2 != NULL)); return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10)); } /*! * \internal * \brief Compare clone or bundle instances according to assignment order * * Compare two clone or bundle instances according to the order they should be * assigned to nodes, preferring (in order): * * - Active instance that is less multiply active * - Instance that is not active on a disallowed node * - Instance with higher configured priority * - Active instance whose current node can run resources * - Active instance whose parent is allowed on current node * - Active instance whose current node has fewer other instances * - Active instance * - Instance that isn't failed * - Instance whose colocations result in higher score on current node * - Instance with lower ID in lexicographic order * * \param[in] a First instance to compare * \param[in] b Second instance to compare * * \return A negative number if \p a should be assigned first, * a positive number if \p b should be assigned first, * or 0 if assignment order doesn't matter */ gint pcmk__cmp_instance(gconstpointer a, gconstpointer b) { int rc = 0; pe_node_t *node1 = NULL; pe_node_t *node2 = NULL; unsigned int nnodes1 = 0; unsigned int nnodes2 = 0; bool can1 = true; bool can2 = true; const pe_resource_t *instance1 = (const pe_resource_t *) a; const pe_resource_t *instance2 = (const pe_resource_t *) b; CRM_ASSERT((instance1 != NULL) && (instance2 != NULL)); node1 = instance1->fns->active_node(instance1, &nnodes1, NULL); node2 = instance2->fns->active_node(instance2, &nnodes2, NULL); /* If both instances are running and at least one is multiply * active, prefer instance that's running on fewer nodes. */ if ((nnodes1 > 0) && (nnodes2 > 0)) { if (nnodes1 < nnodes2) { crm_trace("Assign %s (active on %d) before %s (active on %d): " "less multiply active", instance1->id, nnodes1, instance2->id, nnodes2); return -1; } else if (nnodes1 > nnodes2) { crm_trace("Assign %s (active on %d) after %s (active on %d): " "more multiply active", instance1->id, nnodes1, instance2->id, nnodes2); return 1; } } /* An instance that is either inactive or active on an allowed node is * preferred over an instance that is active on a no-longer-allowed node. */ can1 = node_is_allowed(instance1, &node1); can2 = node_is_allowed(instance2, &node2); if (can1 && !can2) { crm_trace("Assign %s before %s: not active on a disallowed node", instance1->id, instance2->id); return -1; } else if (!can1 && can2) { crm_trace("Assign %s after %s: active on a disallowed node", instance1->id, instance2->id); return 1; } // Prefer instance with higher configured priority if (instance1->priority > instance2->priority) { crm_trace("Assign %s before %s: priority (%d > %d)", instance1->id, instance2->id, instance1->priority, instance2->priority); return -1; } else if (instance1->priority < instance2->priority) { crm_trace("Assign %s after %s: priority (%d < %d)", instance1->id, instance2->id, instance1->priority, instance2->priority); return 1; } // Prefer active instance if ((node1 == NULL) && (node2 == NULL)) { crm_trace("No assignment preference for %s vs. %s: inactive", instance1->id, instance2->id); return 0; } else if (node1 == NULL) { crm_trace("Assign %s after %s: active", instance1->id, instance2->id); return 1; } else if (node2 == NULL) { crm_trace("Assign %s before %s: active", instance1->id, instance2->id); return -1; } // Prefer instance whose current node can run resources can1 = pcmk__node_available(node1, false, false); can2 = pcmk__node_available(node2, false, false); if (can1 && !can2) { crm_trace("Assign %s before %s: current node can run resources", instance1->id, instance2->id); return -1; } else if (!can1 && can2) { crm_trace("Assign %s after %s: current node can't run resources", instance1->id, instance2->id); return 1; } // Prefer instance whose parent is allowed to run on instance's current node node1 = pcmk__top_allowed_node(instance1, node1); node2 = pcmk__top_allowed_node(instance2, node2); if ((node1 == NULL) && (node2 == NULL)) { crm_trace("No assignment preference for %s vs. %s: " "parent not allowed on either instance's current node", instance1->id, instance2->id); return 0; } else if (node1 == NULL) { crm_trace("Assign %s after %s: parent not allowed on current node", instance1->id, instance2->id); return 1; } else if (node2 == NULL) { crm_trace("Assign %s before %s: parent allowed on current node", instance1->id, instance2->id); return -1; } // Prefer instance whose current node is running fewer other instances if (node1->count < node2->count) { crm_trace("Assign %s before %s: fewer active instances on current node", instance1->id, instance2->id); return -1; } else if (node1->count > node2->count) { crm_trace("Assign %s after %s: more active instances on current node", instance1->id, instance2->id); return 1; } // Prefer instance that isn't failed can1 = did_fail(instance1); can2 = did_fail(instance2); if (!can1 && can2) { crm_trace("Assign %s before %s: not failed", instance1->id, instance2->id); return -1; } else if (can1 && !can2) { crm_trace("Assign %s after %s: failed", instance1->id, instance2->id); return 1; } // Prefer instance with higher cumulative colocation score on current node rc = cmp_instance_by_colocation(instance1, instance2); if (rc != 0) { return rc; } // Prefer instance with lower instance number rc = pcmk__cmp_instance_number(instance1, instance2); if (rc < 0) { crm_trace("Assign %s before %s: instance number", instance1->id, instance2->id); } else if (rc > 0) { crm_trace("Assign %s after %s: instance number", instance1->id, instance2->id); } else { crm_trace("No assignment preference for %s vs. %s", instance1->id, instance2->id); } return rc; } /*! * \internal * \brief Choose a node for an instance * * \param[in,out] instance Clone instance or bundle replica container * \param[in] prefer If not NULL, attempt early assignment to this * node, if still the best choice; otherwise, * perform final assignment * \param[in] max_per_node Assign at most this many instances to one node * * \return true if \p instance could be assigned to a node, otherwise false */ static bool assign_instance(pe_resource_t *instance, const pe_node_t *prefer, int max_per_node) { pe_node_t *chosen = NULL; pe_node_t *allowed = NULL; CRM_ASSERT(instance != NULL); pe_rsc_trace(instance, "Assigning %s (preferring %s)", instance->id, ((prefer == NULL)? "no node" : prefer->details->uname)); CRM_CHECK(pcmk_is_set(instance->flags, pe_rsc_provisional), return (instance->fns->location(instance, NULL, FALSE) != NULL)); if (pcmk_is_set(instance->flags, pe_rsc_allocating)) { pe_rsc_debug(instance, "Assignment loop detected involving %s colocations", instance->id); return false; } if (prefer != NULL) { // Possible early assignment to preferred node // Get preferred node with instance's scores allowed = g_hash_table_lookup(instance->allowed_nodes, prefer->details->id); if ((allowed == NULL) || (allowed->weight < 0)) { pe_rsc_trace(instance, "Not assigning %s to preferred node %s: unavailable", instance->id, pe__node_name(prefer)); return false; } } ban_unavailable_allowed_nodes(instance, max_per_node); if (prefer == NULL) { // Final assignment chosen = instance->cmds->assign(instance, NULL); } else { // Possible early assignment to preferred node GHashTable *backup = NULL; pcmk__copy_node_tables(instance, &backup); chosen = instance->cmds->assign(instance, prefer); // Revert nodes if preferred node won't be assigned if ((chosen != NULL) && !pe__same_node(chosen, prefer)) { crm_info("Not assigning %s to preferred node %s: %s is better", instance->id, pe__node_name(prefer), pe__node_name(chosen)); pcmk__restore_node_tables(instance, backup); pcmk__unassign_resource(instance); chosen = NULL; } g_hash_table_destroy(backup); } // The parent tracks how many instances have been assigned to each node if (chosen != NULL) { allowed = pcmk__top_allowed_node(instance, chosen); if (allowed == NULL) { /* The instance is allowed on the node, but its parent isn't. This * shouldn't be possible if the resource is managed, and we won't be * able to limit the number of instances assigned to the node. */ CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pe_rsc_managed)); } else { allowed->count++; } } return chosen != NULL; } /*! * \internal * \brief Reset the node counts of a resource's allowed nodes to zero * * \param[in,out] rsc Resource to reset * * \return Number of nodes that are available to run resources */ static unsigned int reset_allowed_node_counts(pe_resource_t *rsc) { unsigned int available_nodes = 0; pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { node->count = 0; if (pcmk__node_available(node, false, false)) { available_nodes++; } } return available_nodes; } /*! * \internal * \brief Check whether an instance has a preferred node * * \param[in] rsc Clone or bundle being assigned (for logs only) * \param[in] instance Clone instance or bundle replica container * \param[in] optimal_per_node Optimal number of instances per node * * \return Instance's current node if still available, otherwise NULL */ static const pe_node_t * preferred_node(const pe_resource_t *rsc, const pe_resource_t *instance, int optimal_per_node) { const pe_node_t *node = NULL; const pe_node_t *parent_node = NULL; // Check whether instance is active, healthy, and not yet assigned if ((instance->running_on == NULL) || !pcmk_is_set(instance->flags, pe_rsc_provisional) || pcmk_is_set(instance->flags, pe_rsc_failed)) { return NULL; } // Check whether instance's current node can run resources node = pe__current_node(instance); if (!pcmk__node_available(node, true, false)) { pe_rsc_trace(rsc, "Not assigning %s to %s early (unavailable)", instance->id, pe__node_name(node)); return NULL; } // Check whether node already has optimal number of instances assigned parent_node = pcmk__top_allowed_node(instance, node); if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) { pe_rsc_trace(rsc, "Not assigning %s to %s early " "(optimal instances already assigned)", instance->id, pe__node_name(node)); return NULL; } return node; } /*! * \internal * \brief Assign collective instances to nodes * * \param[in,out] collective Clone or bundle resource being assigned * \param[in,out] instances List of clone instances or bundle containers * \param[in] max_total Maximum instances to assign in total * \param[in] max_per_node Maximum instances to assign to any one node */ void pcmk__assign_instances(pe_resource_t *collective, GList *instances, int max_total, int max_per_node) { // Reuse node count to track number of assigned instances unsigned int available_nodes = reset_allowed_node_counts(collective); int optimal_per_node = 0; int assigned = 0; GList *iter = NULL; pe_resource_t *instance = NULL; const pe_node_t *current = NULL; if (available_nodes > 0) { optimal_per_node = max_total / available_nodes; } if (optimal_per_node < 1) { optimal_per_node = 1; } pe_rsc_debug(collective, "Assigning up to %d %s instance%s to up to %u node%s " "(at most %d per host, %d optimal)", max_total, collective->id, pcmk__plural_s(max_total), available_nodes, pcmk__plural_s(available_nodes), max_per_node, optimal_per_node); // Assign as many instances as possible to their current location for (iter = instances; (iter != NULL) && (assigned < max_total); iter = iter->next) { instance = (pe_resource_t *) iter->data; current = preferred_node(collective, instance, optimal_per_node); if ((current != NULL) && assign_instance(instance, current, max_per_node)) { pe_rsc_trace(collective, "Assigned %s to current node %s", instance->id, pe__node_name(current)); assigned++; } } pe_rsc_trace(collective, "Assigned %d of %d instance%s to current node", assigned, max_total, pcmk__plural_s(max_total)); for (iter = instances; iter != NULL; iter = iter->next) { instance = (pe_resource_t *) iter->data; if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) { continue; // Already assigned } if (instance->running_on != NULL) { current = pe__current_node(instance); if (pcmk__top_allowed_node(instance, current) == NULL) { const char *unmanaged = ""; if (!pcmk_is_set(instance->flags, pe_rsc_managed)) { unmanaged = "Unmanaged resource "; } crm_notice("%s%s is running on %s which is no longer allowed", unmanaged, instance->id, pe__node_name(current)); } } if (assigned >= max_total) { pe_rsc_debug(collective, "Not assigning %s because maximum %d instances " "already assigned", instance->id, max_total); resource_location(instance, NULL, -INFINITY, "collective_limit_reached", collective->cluster); } else if (assign_instance(instance, NULL, max_per_node)) { assigned++; } } pe_rsc_debug(collective, "Assigned %d of %d possible instance%s of %s", assigned, max_total, pcmk__plural_s(max_total), collective->id); } enum instance_state { instance_starting = (1 << 0), instance_stopping = (1 << 1), /* This indicates that some instance is restarting. It's not the same as * instance_starting|instance_stopping, which would indicate that some * instance is starting, and some instance (not necessarily the same one) is * stopping. */ instance_restarting = (1 << 2), instance_active = (1 << 3), instance_all = instance_starting|instance_stopping |instance_restarting|instance_active, }; /*! * \internal * \brief Check whether an instance is active, starting, and/or stopping * * \param[in] instance Clone instance or bundle replica container * \param[in,out] state Whether any instance is starting, stopping, etc. */ static void check_instance_state(const pe_resource_t *instance, uint32_t *state) { const GList *iter = NULL; uint32_t instance_state = 0; // State of just this instance // No need to check further if all conditions have already been detected if (pcmk_all_flags_set(*state, instance_all)) { return; } // If instance is a collective (a cloned group), check its children instead if (instance->variant > pe_native) { for (iter = instance->children; (iter != NULL) && !pcmk_all_flags_set(*state, instance_all); iter = iter->next) { check_instance_state((const pe_resource_t *) iter->data, state); } return; } // If we get here, instance is a primitive if (instance->running_on != NULL) { instance_state |= instance_active; } // Check each of the instance's actions for runnable start or stop for (iter = instance->actions; (iter != NULL) && !pcmk_all_flags_set(instance_state, instance_starting |instance_stopping); iter = iter->next) { const pe_action_t *action = (const pe_action_t *) iter->data; const bool optional = pcmk_is_set(action->flags, pe_action_optional); if (pcmk__str_eq(RSC_START, action->task, pcmk__str_none)) { if (!optional && pcmk_is_set(action->flags, pe_action_runnable)) { pe_rsc_trace(instance, "Instance is starting due to %s", action->uuid); instance_state |= instance_starting; } else { pe_rsc_trace(instance, "%s doesn't affect %s state (%s)", action->uuid, instance->id, (optional? "optional" : "unrunnable")); } } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_none)) { /* Only stop actions can be pseudo-actions for primitives. That * indicates that the node they are on is being fenced, so the stop * is implied rather than actually executed. */ if (!optional && pcmk_any_flags_set(action->flags, pe_action_pseudo|pe_action_runnable)) { pe_rsc_trace(instance, "Instance is stopping due to %s", action->uuid); instance_state |= instance_stopping; } else { pe_rsc_trace(instance, "%s doesn't affect %s state (%s)", action->uuid, instance->id, (optional? "optional" : "unrunnable")); } } } if (pcmk_all_flags_set(instance_state, instance_starting|instance_stopping)) { instance_state |= instance_restarting; } *state |= instance_state; } /*! * \internal * \brief Create actions for collective resource instances * * \param[in,out] collective Clone or bundle resource to create actions for * \param[in,out] instances List of clone instances or bundle containers */ void pcmk__create_instance_actions(pe_resource_t *collective, GList *instances) { uint32_t state = 0; pe_action_t *stop = NULL; pe_action_t *stopped = NULL; pe_action_t *start = NULL; pe_action_t *started = NULL; pe_rsc_trace(collective, "Creating collective instance actions for %s", collective->id); // Create actions for each instance appropriate to its variant for (GList *iter = instances; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; instance->cmds->create_actions(instance); check_instance_state(instance, &state); } // Create pseudo-actions for rsc start and started start = pe__new_rsc_pseudo_action(collective, RSC_START, !pcmk_is_set(state, instance_starting), true); started = pe__new_rsc_pseudo_action(collective, RSC_STARTED, !pcmk_is_set(state, instance_starting), false); started->priority = INFINITY; if (pcmk_any_flags_set(state, instance_active|instance_starting)) { pe__set_action_flags(started, pe_action_runnable); } // Create pseudo-actions for rsc stop and stopped stop = pe__new_rsc_pseudo_action(collective, RSC_STOP, !pcmk_is_set(state, instance_stopping), true); stopped = pe__new_rsc_pseudo_action(collective, RSC_STOPPED, !pcmk_is_set(state, instance_stopping), true); stopped->priority = INFINITY; if (!pcmk_is_set(state, instance_restarting)) { pe__set_action_flags(stop, pe_action_migrate_runnable); } if (collective->variant == pe_clone) { pe__create_clone_notif_pseudo_ops(collective, start, started, stop, stopped); } } /*! * \internal * \brief Get a list of clone instances or bundle replica containers * * \param[in] rsc Clone or bundle resource * * \return Clone instances if \p rsc is a clone, or a newly created list of * \p rsc's replica containers if \p rsc is a bundle * \note The caller must call free_instance_list() on the result when the list * is no longer needed. */ static inline GList * get_instance_list(const pe_resource_t *rsc) { if (rsc->variant == pe_container) { return pe__bundle_containers(rsc); } else { return rsc->children; } } /*! * \internal * \brief Free any memory created by get_instance_list() * * \param[in] rsc Clone or bundle resource passed to get_instance_list() * \param[in,out] list Return value of get_instance_list() for \p rsc */ static inline void free_instance_list(const pe_resource_t *rsc, GList *list) { if (list != rsc->children) { g_list_free(list); } } /*! * \internal * \brief Check whether an instance is compatible with a role and node * * \param[in] instance Clone instance or bundle replica container * \param[in] node Instance must match this node * \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role * \param[in] current If true, compare instance's original node and role, * otherwise compare assigned next node and role * * \return true if \p instance is compatible with \p node and \p role, * otherwise false */ bool pcmk__instance_matches(const pe_resource_t *instance, const pe_node_t *node, enum rsc_role_e role, bool current) { pe_node_t *instance_node = NULL; CRM_CHECK((instance != NULL) && (node != NULL), return false); if ((role != RSC_ROLE_UNKNOWN) && (role != instance->fns->state(instance, current))) { pe_rsc_trace(instance, "%s is not a compatible instance (role is not %s)", instance->id, role2text(role)); return false; } if (!is_set_recursive(instance, pe_rsc_block, true)) { // We only want instances that haven't failed instance_node = instance->fns->location(instance, NULL, current); } if (instance_node == NULL) { pe_rsc_trace(instance, "%s is not a compatible instance (not assigned to a node)", instance->id); return false; } if (!pe__same_node(instance_node, node)) { pe_rsc_trace(instance, "%s is not a compatible instance (assigned to %s not %s)", instance->id, pe__node_name(instance_node), pe__node_name(node)); return false; } return true; } /*! * \internal * \brief Find an instance that matches a given resource by node and role * * \param[in] match_rsc Resource that instance must match (for logging only) * \param[in] rsc Clone or bundle resource to check for matching instance * \param[in] node Instance must match this node * \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role * \param[in] current If true, compare instance's original node and role, * otherwise compare assigned next node and role * * \return \p rsc instance matching \p node and \p role if any, otherwise NULL */ static pe_resource_t * find_compatible_instance_on_node(const pe_resource_t *match_rsc, const pe_resource_t *rsc, const pe_node_t *node, enum rsc_role_e role, bool current) { GList *instances = NULL; instances = get_instance_list(rsc); for (GList *iter = instances; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; if (pcmk__instance_matches(instance, node, role, current)) { pe_rsc_trace(match_rsc, "Found %s %s instance %s compatible with %s on %s", role == RSC_ROLE_UNKNOWN? "matching" : role2text(role), rsc->id, instance->id, match_rsc->id, pe__node_name(node)); free_instance_list(rsc, instances); // Only frees list, not contents return instance; } } free_instance_list(rsc, instances); pe_rsc_trace(match_rsc, "No %s %s instance found compatible with %s on %s", ((role == RSC_ROLE_UNKNOWN)? "matching" : role2text(role)), rsc->id, match_rsc->id, pe__node_name(node)); return NULL; } /*! * \internal * \brief Find a clone instance or bundle container compatible with a resource * * \param[in] match_rsc Resource that instance must match * \param[in] rsc Clone or bundle resource to check for matching instance * \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role * \param[in] current If true, compare instance's original node and role, * otherwise compare assigned next node and role * * \return Compatible (by \p role and \p match_rsc location) instance of \p rsc * if any, otherwise NULL */ pe_resource_t * pcmk__find_compatible_instance(const pe_resource_t *match_rsc, const pe_resource_t *rsc, enum rsc_role_e role, bool current) { pe_resource_t *instance = NULL; GList *nodes = NULL; const pe_node_t *node = match_rsc->fns->location(match_rsc, NULL, current); // If match_rsc has a node, check only that node if (node != NULL) { return find_compatible_instance_on_node(match_rsc, rsc, node, role, current); } // Otherwise check for an instance matching any of match_rsc's allowed nodes nodes = pcmk__sort_nodes(g_hash_table_get_values(match_rsc->allowed_nodes), NULL); for (GList *iter = nodes; (iter != NULL) && (instance == NULL); iter = iter->next) { instance = find_compatible_instance_on_node(match_rsc, rsc, (pe_node_t *) iter->data, role, current); } if (instance == NULL) { pe_rsc_debug(rsc, "No %s instance found compatible with %s", rsc->id, match_rsc->id); } g_list_free(nodes); return instance; } /*! * \internal * \brief Unassign an instance if mandatory ordering has no interleave match * * \param[in] first 'First' action in an ordering * \param[in] then 'Then' action in an ordering * \param[in,out] then_instance 'Then' instance that has no interleave match * \param[in] type Group of enum pe_ordering flags to apply * \param[in] current If true, "then" action is stopped or demoted * * \return true if \p then_instance was unassigned, otherwise false */ static bool unassign_if_mandatory(const pe_action_t *first, const pe_action_t *then, pe_resource_t *then_instance, uint32_t type, bool current) { // Allow "then" instance to go down even without an interleave match if (current) { pe_rsc_trace(then->rsc, "%s has no instance to order before stopping " "or demoting %s", first->rsc->id, then_instance->id); /* If the "first" action must be runnable, but there is no "first" * instance, the "then" instance must not be allowed to come up. */ } else if (pcmk_any_flags_set(type, pe_order_runnable_left |pe_order_implies_then)) { pe_rsc_info(then->rsc, "Inhibiting %s from being active " "because there is no %s instance to interleave", then_instance->id, first->rsc->id); return pcmk__assign_resource(then_instance, NULL, true); } return false; } /*! * \internal * \brief Find first matching action for a clone instance or bundle container * * \param[in] action Action in an interleaved ordering * \param[in] instance Clone instance or bundle container being interleaved * \param[in] action_name Action to look for * \param[in] node If not NULL, require action to be on this node * \param[in] for_first If true, \p instance is the 'first' resource in the * ordering, otherwise it is the 'then' resource * * \return First action for \p instance (or in some cases if \p instance is a * bundle container, its containerized resource) that matches * \p action_name and \p node if any, otherwise NULL */ static pe_action_t * find_instance_action(const pe_action_t *action, const pe_resource_t *instance, const char *action_name, const pe_node_t *node, bool for_first) { const pe_resource_t *rsc = NULL; pe_action_t *matching_action = NULL; /* If instance is a bundle container, sometimes we should interleave the * action for the container itself, and sometimes for the containerized * resource. * * For example, given "start bundle A then bundle B", B likely requires the * service inside A's container to be active, rather than just the * container, so we should interleave the action for A's containerized * resource. On the other hand, it's possible B's container itself requires * something from A, so we should interleave the action for B's container. * * Essentially, for 'first', we should use the containerized resource for * everything except stop, and for 'then', we should use the container for * everything except promote and demote (which can only be performed on the * containerized resource). */ if ((for_first && !pcmk__str_any_of(action->task, CRMD_ACTION_STOP, CRMD_ACTION_STOPPED, NULL)) || (!for_first && pcmk__str_any_of(action->task, CRMD_ACTION_PROMOTE, CRMD_ACTION_PROMOTED, CRMD_ACTION_DEMOTE, CRMD_ACTION_DEMOTED, NULL))) { rsc = pe__get_rsc_in_container(instance); } if (rsc == NULL) { rsc = instance; // No containerized resource, use instance itself } else { node = NULL; // Containerized actions are on bundle-created guest } matching_action = find_first_action(rsc->actions, NULL, action_name, node); if (matching_action != NULL) { return matching_action; } if (pcmk_is_set(instance->flags, pe_rsc_orphan) || pcmk__str_any_of(action_name, RSC_STOP, RSC_DEMOTE, NULL)) { crm_trace("No %s action found for %s%s", action_name, pcmk_is_set(instance->flags, pe_rsc_orphan)? "orphan " : "", instance->id); } else { crm_err("No %s action found for %s to interleave (bug?)", action_name, instance->id); } return NULL; } /*! * \internal * \brief Get the original action name of a bundle or clone action * * Given an action for a bundle or clone, get the original action name, * mapping notify to the action being notified, and if the instances are * primitives, mapping completion actions to the action that was completed * (for example, stopped to stop). * * \param[in] action Clone or bundle action to check * * \return Original action name for \p action */ static const char * orig_action_name(const pe_action_t *action) { const pe_resource_t *instance = action->rsc->children->data; // Any instance char *action_type = NULL; const char *action_name = action->task; enum action_tasks orig_task = no_action; if (pcmk__strcase_any_of(action->task, CRMD_ACTION_NOTIFY, CRMD_ACTION_NOTIFIED, NULL)) { // action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL), return task2text(no_action)); action_name = strstr(action_type, "_notify_"); CRM_CHECK(action_name != NULL, return task2text(no_action)); action_name += strlen("_notify_"); } orig_task = get_complex_task(instance, action_name); free(action_type); return task2text(orig_task); } /*! * \internal * \brief Update two interleaved actions according to an ordering between them * * Given information about an ordering of two interleaved actions, update the * actions' flags (and runnable_before members if appropriate) as appropriate * for the ordering. Effects may cascade to other orderings involving the * actions as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * \param[in] filter Action flags to limit scope of certain updates (may * include pe_action_optional to affect only mandatory * actions, and pe_action_runnable to affect only * runnable actions) * \param[in] type Group of enum pe_ordering flags to apply * * \return Group of enum pcmk__updated flags indicating what was updated */ static uint32_t update_interleaved_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t filter, uint32_t type) { GList *instances = NULL; uint32_t changed = pcmk__updated_none; const char *orig_first_task = orig_action_name(first); // Stops and demotes must be interleaved with instance on current node bool current = pcmk__ends_with(first->uuid, "_" CRMD_ACTION_STOPPED "_0") || pcmk__ends_with(first->uuid, "_" CRMD_ACTION_DEMOTED "_0"); // Update the specified actions for each "then" instance individually instances = get_instance_list(then->rsc); for (GList *iter = instances; iter != NULL; iter = iter->next) { pe_resource_t *first_instance = NULL; pe_resource_t *then_instance = iter->data; pe_action_t *first_action = NULL; pe_action_t *then_action = NULL; // Find a "first" instance to interleave with this "then" instance first_instance = pcmk__find_compatible_instance(then_instance, first->rsc, RSC_ROLE_UNKNOWN, current); if (first_instance == NULL) { // No instance can be interleaved if (unassign_if_mandatory(first, then, then_instance, type, current)) { pcmk__set_updated_flags(changed, first, pcmk__updated_then); } continue; } first_action = find_instance_action(first, first_instance, orig_first_task, node, true); if (first_action == NULL) { continue; } then_action = find_instance_action(then, then_instance, then->task, node, false); if (then_action == NULL) { continue; } if (order_actions(first_action, then_action, type)) { pcmk__set_updated_flags(changed, first, pcmk__updated_first|pcmk__updated_then); } changed |= then_instance->cmds->update_ordered_actions( first_action, then_action, node, first_instance->cmds->action_flags(first_action, node), filter, type, then->rsc->cluster); } free_instance_list(then->rsc, instances); return changed; } /*! * \internal * \brief Check whether two actions in an ordering can be interleaved * * \param[in] first 'First' action in the ordering * \param[in] then 'Then' action in the ordering * * \return true if \p first and \p then can be interleaved, otherwise false */ static bool can_interleave_actions(const pe_action_t *first, const pe_action_t *then) { bool interleave = false; pe_resource_t *rsc = NULL; if ((first->rsc == NULL) || (then->rsc == NULL)) { crm_trace("Not interleaving %s with %s: not resource actions", first->uuid, then->uuid); return false; } if (first->rsc == then->rsc) { crm_trace("Not interleaving %s with %s: same resource", first->uuid, then->uuid); return false; } if ((first->rsc->variant < pe_clone) || (then->rsc->variant < pe_clone)) { crm_trace("Not interleaving %s with %s: not clones or bundles", first->uuid, then->uuid); return false; } if (pcmk__ends_with(then->uuid, "_stop_0") || pcmk__ends_with(then->uuid, "_demote_0")) { rsc = first->rsc; } else { rsc = then->rsc; } interleave = crm_is_true(g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE)); pe_rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)", first->uuid, then->uuid, (interleave? "" : "not "), rsc->id); return interleave; } /*! * \internal * \brief Update non-interleaved instance actions according to an ordering * * Given information about an ordering of two non-interleaved actions, update * the actions' flags (and runnable_before members if appropriate) as * appropriate for the ordering. Effects may cascade to other orderings * involving the actions as well. * * \param[in,out] instance Clone instance or bundle container * \param[in,out] first "First" action in ordering * \param[in] then "Then" action in ordering (for \p instance's parent) * \param[in] node If not NULL, limit scope of ordering to this node * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pe_action_optional to affect only mandatory * actions, and pe_action_runnable to affect only * runnable actions) * \param[in] type Group of enum pe_ordering flags to apply * * \return Group of enum pcmk__updated flags indicating what was updated */ static uint32_t update_noninterleaved_actions(pe_resource_t *instance, pe_action_t *first, const pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type) { pe_action_t *instance_action = NULL; uint32_t instance_flags = 0; uint32_t changed = pcmk__updated_none; // Check whether instance has an equivalent of "then" action instance_action = find_first_action(instance->actions, NULL, then->task, node); if (instance_action == NULL) { return changed; } // Check whether action is runnable instance_flags = instance->cmds->action_flags(instance_action, node); if (!pcmk_is_set(instance_flags, pe_action_runnable)) { return changed; } // If so, update actions for the instance changed = instance->cmds->update_ordered_actions(first, instance_action, node, flags, filter, type, instance->cluster); // Propagate any changes to later actions if (pcmk_is_set(changed, pcmk__updated_then)) { for (GList *after_iter = instance_action->actions_after; after_iter != NULL; after_iter = after_iter->next) { pe_action_wrapper_t *after = after_iter->data; pcmk__update_action_for_orderings(after->action, instance->cluster); } } return changed; } /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two clone or bundle actions, update * the actions' flags (and runnable_before members if appropriate) as * appropriate for the ordering. Effects may cascade to other orderings * involving the actions as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pe_action_optional to affect only mandatory * actions, and pe_action_runnable to affect only * runnable actions) * \param[in] type Group of enum pe_ordering flags to apply * \param[in,out] data_set Cluster working set * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t pcmk__instance_update_ordered_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set) { CRM_ASSERT((first != NULL) && (then != NULL) && (data_set != NULL)); if (then->rsc == NULL) { return pcmk__updated_none; } else if (can_interleave_actions(first, then)) { return update_interleaved_actions(first, then, node, filter, type); } else { uint32_t changed = pcmk__updated_none; GList *instances = get_instance_list(then->rsc); // Update actions for the clone or bundle resource itself changed |= pcmk__update_ordered_actions(first, then, node, flags, filter, type, data_set); // Update the 'then' clone instances or bundle containers individually for (GList *iter = instances; iter != NULL; iter = iter->next) { pe_resource_t *instance = iter->data; changed |= update_noninterleaved_actions(instance, first, then, node, flags, filter, type); } free_instance_list(then->rsc, instances); return changed; } } #define pe__clear_action_summary_flags(flags, action, flag) do { \ flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Action summary", action->rsc->id, \ flags, flag, #flag); \ } while (0) /*! * \internal * \brief Return action flags for a given clone or bundle action * * \param[in,out] action Action for a clone or bundle * \param[in] instances Clone instances or bundle containers * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node */ uint32_t pcmk__collective_action_flags(pe_action_t *action, const GList *instances, const pe_node_t *node) { bool any_runnable = false; const char *action_name = orig_action_name(action); // Set original assumptions (optional and runnable may be cleared below) uint32_t flags = pe_action_optional|pe_action_runnable|pe_action_pseudo; for (const GList *iter = instances; iter != NULL; iter = iter->next) { const pe_resource_t *instance = iter->data; const pe_node_t *instance_node = NULL; pe_action_t *instance_action = NULL; uint32_t instance_flags; // Node is relevant only to primitive instances if (instance->variant == pe_native) { instance_node = node; } instance_action = find_first_action(instance->actions, NULL, action_name, instance_node); if (instance_action == NULL) { pe_rsc_trace(action->rsc, "%s has no %s action on %s", instance->id, action_name, pe__node_name(node)); continue; } pe_rsc_trace(action->rsc, "%s has %s for %s on %s", instance->id, instance_action->uuid, action_name, pe__node_name(node)); instance_flags = instance->cmds->action_flags(instance_action, node); // If any instance action is mandatory, so is the collective action if (pcmk_is_set(flags, pe_action_optional) && !pcmk_is_set(instance_flags, pe_action_optional)) { pe_rsc_trace(instance, "%s is mandatory because %s is", action->uuid, instance_action->uuid); pe__clear_action_summary_flags(flags, action, pe_action_optional); pe__clear_action_flags(action, pe_action_optional); } // If any instance action is runnable, so is the collective action if (pcmk_is_set(instance_flags, pe_action_runnable)) { any_runnable = true; } } if (!any_runnable) { pe_rsc_trace(action->rsc, "%s is not runnable because no instance can run %s", action->uuid, action_name); pe__clear_action_summary_flags(flags, action, pe_action_runnable); if (node == NULL) { pe__clear_action_flags(action, pe_action_runnable); } } return flags; } /*! * \internal * \brief Add a collective resource's colocations to a list for an instance * * \param[in,out] list Colocation list to add to * \param[in] instance Clone or bundle instance or instance group member * \param[in] collective Clone or bundle resource with colocations to add * \param[in] with_this If true, add collective's "with this" colocations, * otherwise add its "this with" colocations */ void pcmk__add_collective_constraints(GList **list, const pe_resource_t *instance, const pe_resource_t *collective, bool with_this) { const GList *colocations = NULL; bool everywhere = false; CRM_CHECK((list != NULL) && (instance != NULL), return); if (collective == NULL) { return; } switch (collective->variant) { case pe_clone: case pe_container: break; default: return; } everywhere = can_run_everywhere(collective); if (with_this) { colocations = collective->rsc_cons_lhs; } else { colocations = collective->rsc_cons; } for (const GList *iter = colocations; iter != NULL; iter = iter->next) { const pcmk__colocation_t *colocation = iter->data; if (with_this && !pcmk__colocation_has_influence(colocation, instance)) { continue; } if (!everywhere || (colocation->score < 0) || (!with_this && (colocation->score == INFINITY))) { if (with_this) { pcmk__add_with_this(list, colocation, instance); } else { pcmk__add_this_with(list, colocation, instance); } } } } diff --git a/lib/pacemaker/pcmk_sched_location.c b/lib/pacemaker/pcmk_sched_location.c index 1b3ab3bd93..6f2e8dd6ba 100644 --- a/lib/pacemaker/pcmk_sched_location.c +++ b/lib/pacemaker/pcmk_sched_location.c @@ -1,674 +1,673 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "libpacemaker_private.h" static int get_node_score(const char *rule, const char *score, bool raw, pe_node_t *node, pe_resource_t *rsc) { int score_f = 0; if (score == NULL) { pe_err("Rule %s: no score specified. Assuming 0.", rule); } else if (raw) { score_f = char2score(score); } else { const char *attr_score = NULL; attr_score = pe_node_attribute_calculated(node, score, rsc, pe__rsc_node_current); if (attr_score == NULL) { crm_debug("Rule %s: %s did not have a value for %s", rule, pe__node_name(node), score); score_f = -INFINITY; } else { crm_debug("Rule %s: %s had value %s for %s", rule, pe__node_name(node), attr_score, score); score_f = char2score(attr_score); } } return score_f; } static pe__location_t * generate_location_rule(pe_resource_t *rsc, xmlNode *rule_xml, const char *discovery, crm_time_t *next_change, pe_re_match_data_t *re_match_data) { const char *rule_id = NULL; const char *score = NULL; const char *boolean = NULL; const char *role = NULL; GList *iter = NULL; GList *nodes = NULL; bool do_and = true; bool accept = true; bool raw_score = true; bool score_allocated = false; pe__location_t *location_rule = NULL; rule_xml = expand_idref(rule_xml, rsc->cluster->input); if (rule_xml == NULL) { return NULL; } rule_id = crm_element_value(rule_xml, XML_ATTR_ID); boolean = crm_element_value(rule_xml, XML_RULE_ATTR_BOOLEAN_OP); role = crm_element_value(rule_xml, XML_RULE_ATTR_ROLE); crm_trace("Processing rule: %s", rule_id); if ((role != NULL) && (text2role(role) == RSC_ROLE_UNKNOWN)) { pe_err("Bad role specified for %s: %s", rule_id, role); return NULL; } score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE); if (score == NULL) { score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE_ATTRIBUTE); if (score != NULL) { raw_score = false; } } if (pcmk__str_eq(boolean, "or", pcmk__str_casei)) { do_and = false; } location_rule = pcmk__new_location(rule_id, rsc, 0, discovery, NULL); if (location_rule == NULL) { return NULL; } if ((re_match_data != NULL) && (re_match_data->nregs > 0) && (re_match_data->pmatch[0].rm_so != -1) && !raw_score) { char *result = pe_expand_re_matches(score, re_match_data); if (result != NULL) { score = result; score_allocated = true; } } if (role != NULL) { crm_trace("Setting role filter: %s", role); location_rule->role_filter = text2role(role); if (location_rule->role_filter == RSC_ROLE_UNPROMOTED) { /* Any promotable clone cannot be promoted without being in the * unpromoted role first. Ergo, any constraint for the unpromoted * role applies to every role. */ location_rule->role_filter = RSC_ROLE_UNKNOWN; } } if (do_and) { nodes = pcmk__copy_node_list(rsc->cluster->nodes, true); for (iter = nodes; iter != NULL; iter = iter->next) { pe_node_t *node = iter->data; node->weight = get_node_score(rule_id, score, raw_score, node, rsc); } } for (iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) { int score_f = 0; pe_node_t *node = iter->data; pe_match_data_t match_data = { .re = re_match_data, .params = pe_rsc_params(rsc, node, rsc->cluster), .meta = rsc->meta, }; accept = pe_test_rule(rule_xml, node->details->attrs, RSC_ROLE_UNKNOWN, rsc->cluster->now, next_change, &match_data); crm_trace("Rule %s %s on %s", ID(rule_xml), accept? "passed" : "failed", pe__node_name(node)); score_f = get_node_score(rule_id, score, raw_score, node, rsc); if (accept) { pe_node_t *local = pe_find_node_id(nodes, node->details->id); if ((local == NULL) && do_and) { continue; } else if (local == NULL) { local = pe__copy_node(node); nodes = g_list_append(nodes, local); } if (!do_and) { local->weight = pcmk__add_scores(local->weight, score_f); } crm_trace("%s has score %s after %s", pe__node_name(node), pcmk_readable_score(local->weight), rule_id); } else if (do_and && !accept) { // Remove it pe_node_t *delete = pe_find_node_id(nodes, node->details->id); if (delete != NULL) { nodes = g_list_remove(nodes, delete); crm_trace("%s did not match", pe__node_name(node)); } free(delete); } } if (score_allocated) { free((char *)score); } location_rule->node_list_rh = nodes; if (location_rule->node_list_rh == NULL) { crm_trace("No matching nodes for rule %s", rule_id); return NULL; } crm_trace("%s: %d nodes matched", rule_id, g_list_length(location_rule->node_list_rh)); return location_rule; } static void unpack_rsc_location(xmlNode *xml_obj, pe_resource_t *rsc, const char *role, const char *score, pe_re_match_data_t *re_match_data) { pe__location_t *location = NULL; const char *rsc_id = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE); const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *node = crm_element_value(xml_obj, XML_CIB_TAG_NODE); const char *discovery = crm_element_value(xml_obj, XML_LOCATION_ATTR_DISCOVERY); if (rsc == NULL) { pcmk__config_warn("Ignoring constraint '%s' because resource '%s' " "does not exist", id, rsc_id); return; } if (score == NULL) { score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); } if ((node != NULL) && (score != NULL)) { int score_i = char2score(score); pe_node_t *match = pe_find_node(rsc->cluster->nodes, node); if (!match) { return; } location = pcmk__new_location(id, rsc, score_i, discovery, match); } else { bool empty = true; crm_time_t *next_change = crm_time_new_undefined(); /* This loop is logically parallel to pe_evaluate_rules(), except * instead of checking whether any rule is active, we set up location * constraints for each active rule. */ for (xmlNode *rule_xml = first_named_child(xml_obj, XML_TAG_RULE); rule_xml != NULL; rule_xml = crm_next_same_xml(rule_xml)) { empty = false; crm_trace("Unpacking %s/%s", id, ID(rule_xml)); generate_location_rule(rsc, rule_xml, discovery, next_change, re_match_data); } if (empty) { pcmk__config_err("Ignoring constraint '%s' because it contains " "no rules", id); } /* If there is a point in the future when the evaluation of a rule will * change, make sure the scheduler is re-run by that time. */ if (crm_time_is_defined(next_change)) { time_t t = (time_t) crm_time_get_seconds_since_epoch(next_change); pe__update_recheck_time(t, rsc->cluster); } crm_time_free(next_change); return; } if (role == NULL) { role = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE); } if ((location != NULL) && (role != NULL)) { if (text2role(role) == RSC_ROLE_UNKNOWN) { pe_err("Invalid constraint %s: Bad role %s", id, role); return; } else { enum rsc_role_e r = text2role(role); switch (r) { case RSC_ROLE_UNKNOWN: case RSC_ROLE_STARTED: case RSC_ROLE_UNPROMOTED: /* Applies to all */ location->role_filter = RSC_ROLE_UNKNOWN; break; default: location->role_filter = r; break; } } } } static void unpack_simple_location(xmlNode *xml_obj, pe_working_set_t *data_set) { const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *value = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE); if (value) { pe_resource_t *rsc; rsc = pcmk__find_constraint_resource(data_set->resources, value); unpack_rsc_location(xml_obj, rsc, NULL, NULL, NULL); } value = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE_PATTERN); if (value) { regex_t *r_patt = calloc(1, sizeof(regex_t)); bool invert = false; if (value[0] == '!') { value++; invert = true; } if (regcomp(r_patt, value, REG_EXTENDED) != 0) { pcmk__config_err("Ignoring constraint '%s' because " XML_LOC_ATTR_SOURCE_PATTERN " has invalid value '%s'", id, value); free(r_patt); return; } for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *r = iter->data; int nregs = 0; regmatch_t *pmatch = NULL; int status; if (r_patt->re_nsub > 0) { nregs = r_patt->re_nsub + 1; } else { nregs = 1; } pmatch = calloc(nregs, sizeof(regmatch_t)); status = regexec(r_patt, r->id, nregs, pmatch, 0); if (!invert && (status == 0)) { pe_re_match_data_t re_match_data = { .string = r->id, .nregs = nregs, .pmatch = pmatch }; crm_debug("'%s' matched '%s' for %s", r->id, value, id); unpack_rsc_location(xml_obj, r, NULL, NULL, &re_match_data); } else if (invert && (status != 0)) { crm_debug("'%s' is an inverted match of '%s' for %s", r->id, value, id); unpack_rsc_location(xml_obj, r, NULL, NULL, NULL); } else { crm_trace("'%s' does not match '%s' for %s", r->id, value, id); } free(pmatch); } regfree(r_patt); free(r_patt); } } // \return Standard Pacemaker return code static int unpack_location_tags(xmlNode *xml_obj, xmlNode **expanded_xml, pe_working_set_t *data_set) { const char *id = NULL; const char *rsc_id = NULL; const char *state = NULL; pe_resource_t *rsc = NULL; pe_tag_t *tag = NULL; xmlNode *rsc_set = NULL; *expanded_xml = NULL; CRM_CHECK(xml_obj != NULL, return EINVAL); id = ID(xml_obj); if (id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, crm_element_name(xml_obj)); return pcmk_rc_unpack_error; } // Check whether there are any resource sets with template or tag references *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set); if (*expanded_xml != NULL) { crm_log_xml_trace(*expanded_xml, "Expanded rsc_location"); return pcmk_rc_ok; } rsc_id = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE); if (rsc_id == NULL) { return pcmk_rc_ok; } if (!pcmk__valid_resource_or_tag(data_set, rsc_id, &rsc, &tag)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", id, rsc_id); return pcmk_rc_unpack_error; } else if (rsc != NULL) { // No template is referenced return pcmk_rc_ok; } state = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE); *expanded_xml = copy_xml(xml_obj); // Convert any template or tag reference into constraint resource_set if (!pcmk__tag_to_set(*expanded_xml, &rsc_set, XML_LOC_ATTR_SOURCE, false, data_set)) { free_xml(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_unpack_error; } if (rsc_set != NULL) { if (state != NULL) { // Move "rsc-role" into converted resource_set as "role" attribute crm_xml_add(rsc_set, "role", state); xml_remove_prop(*expanded_xml, XML_RULE_ATTR_ROLE); } crm_log_xml_trace(*expanded_xml, "Expanded rsc_location"); } else { // No sets free_xml(*expanded_xml); *expanded_xml = NULL; } return pcmk_rc_ok; } // \return Standard Pacemaker return code static int unpack_location_set(xmlNode *location, xmlNode *set, pe_working_set_t *data_set) { xmlNode *xml_rsc = NULL; pe_resource_t *resource = NULL; const char *set_id; const char *role; const char *local_score; CRM_CHECK(set != NULL, return EINVAL); set_id = ID(set); if (set_id == NULL) { pcmk__config_err("Ignoring " XML_CONS_TAG_RSC_SET " without " XML_ATTR_ID " in constraint '%s'", pcmk__s(ID(location), "(missing ID)")); return pcmk_rc_unpack_error; } role = crm_element_value(set, "role"); local_score = crm_element_value(set, XML_RULE_ATTR_SCORE); for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { resource = pcmk__find_constraint_resource(data_set->resources, ID(xml_rsc)); if (resource == NULL) { pcmk__config_err("%s: No resource found for %s", set_id, ID(xml_rsc)); return pcmk_rc_unpack_error; } unpack_rsc_location(location, resource, role, local_score, NULL); } return pcmk_rc_ok; } void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set) { xmlNode *set = NULL; bool any_sets = false; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; if (unpack_location_tags(xml_obj, &expanded_xml, data_set) != pcmk_rc_ok) { return; } if (expanded_xml) { orig_xml = xml_obj; xml_obj = expanded_xml; } for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL; set = crm_next_same_xml(set)) { any_sets = true; set = expand_idref(set, data_set->input); if ((set == NULL) // Configuration error, message already logged || (unpack_location_set(xml_obj, set, data_set) != pcmk_rc_ok)) { if (expanded_xml) { free_xml(expanded_xml); } return; } } if (expanded_xml) { free_xml(expanded_xml); xml_obj = orig_xml; } if (!any_sets) { unpack_simple_location(xml_obj, data_set); } } /*! * \internal * \brief Add a new location constraint to a cluster working set * * \param[in] id XML ID of location constraint * \param[in,out] rsc Resource in location constraint * \param[in] node_score Constraint score * \param[in] discover_mode Resource discovery option for constraint * \param[in] node Node in constraint (or NULL if rule-based) * * \return Newly allocated location constraint * \note The result will be added to the cluster (via \p rsc) and should not be * freed separately. */ pe__location_t * pcmk__new_location(const char *id, pe_resource_t *rsc, int node_score, const char *discover_mode, pe_node_t *node) { pe__location_t *new_con = NULL; if (id == NULL) { pe_err("Invalid constraint: no ID specified"); return NULL; } else if (rsc == NULL) { pe_err("Invalid constraint %s: no resource specified", id); return NULL; } else if (node == NULL) { CRM_CHECK(node_score == 0, return NULL); } new_con = calloc(1, sizeof(pe__location_t)); if (new_con != NULL) { new_con->id = strdup(id); new_con->rsc_lh = rsc; new_con->node_list_rh = NULL; new_con->role_filter = RSC_ROLE_UNKNOWN; if (pcmk__str_eq(discover_mode, "always", pcmk__str_null_matches|pcmk__str_casei)) { new_con->discover_mode = pe_discover_always; } else if (pcmk__str_eq(discover_mode, "never", pcmk__str_casei)) { new_con->discover_mode = pe_discover_never; } else if (pcmk__str_eq(discover_mode, "exclusive", pcmk__str_casei)) { new_con->discover_mode = pe_discover_exclusive; rsc->exclusive_discover = TRUE; } else { pe_err("Invalid " XML_LOCATION_ATTR_DISCOVERY " value %s " "in location constraint", discover_mode); } if (node != NULL) { pe_node_t *copy = pe__copy_node(node); copy->weight = node_score; new_con->node_list_rh = g_list_prepend(NULL, copy); } rsc->cluster->placement_constraints = g_list_prepend( rsc->cluster->placement_constraints, new_con); rsc->rsc_location = g_list_prepend(rsc->rsc_location, new_con); } return new_con; } /*! * \internal * \brief Apply all location constraints * * \param[in,out] data_set Cluster working set */ void pcmk__apply_locations(pe_working_set_t *data_set) { for (GList *iter = data_set->placement_constraints; iter != NULL; iter = iter->next) { pe__location_t *location = iter->data; location->rsc_lh->cmds->apply_location(location->rsc_lh, location); } } /*! * \internal * \brief Apply a location constraint to a resource's allowed node scores * * \param[in,out] rsc Resource to apply constraint to * \param[in,out] location Location constraint to apply * * \note This does not consider the resource's children, so the resource's * apply_location() method should be used instead in most cases. */ void pcmk__apply_location(pe_resource_t *rsc, pe__location_t *location) { bool need_role = false; CRM_ASSERT((rsc != NULL) && (location != NULL)); // If a role was specified, ensure constraint is applicable need_role = (location->role_filter > RSC_ROLE_UNKNOWN); if (need_role && (location->role_filter != rsc->next_role)) { pe_rsc_trace(rsc, "Not applying %s to %s because role will be %s not %s", location->id, rsc->id, role2text(rsc->next_role), role2text(location->role_filter)); return; } if (location->node_list_rh == NULL) { pe_rsc_trace(rsc, "Not applying %s to %s because no nodes match", location->id, rsc->id); return; } pe_rsc_trace(rsc, "Applying %s%s%s to %s", location->id, (need_role? " for role " : ""), (need_role? role2text(location->role_filter) : ""), rsc->id); for (GList *iter = location->node_list_rh; iter != NULL; iter = iter->next) { pe_node_t *node = iter->data; - pe_node_t *allowed_node = NULL; + pe_node_t *allowed_node = g_hash_table_lookup(rsc->allowed_nodes, + node->details->id); - allowed_node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, - node->details->id); if (allowed_node == NULL) { pe_rsc_trace(rsc, "* = %d on %s", node->weight, pe__node_name(node)); allowed_node = pe__copy_node(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) allowed_node->details->id, allowed_node); } else { pe_rsc_trace(rsc, "* + %d on %s", node->weight, pe__node_name(node)); allowed_node->weight = pcmk__add_scores(allowed_node->weight, node->weight); } if (allowed_node->rsc_discover_mode < location->discover_mode) { if (location->discover_mode == pe_discover_exclusive) { rsc->exclusive_discover = TRUE; } /* exclusive > never > always... always is default */ allowed_node->rsc_discover_mode = location->discover_mode; } } } diff --git a/lib/pacemaker/pcmk_sched_nodes.c b/lib/pacemaker/pcmk_sched_nodes.c index 92230d1e32..cf9e3a6182 100644 --- a/lib/pacemaker/pcmk_sched_nodes.c +++ b/lib/pacemaker/pcmk_sched_nodes.c @@ -1,430 +1,430 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // lrmd_event_data_t #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Check whether a node is available to run resources * * \param[in] node Node to check * \param[in] consider_score If true, consider a negative score unavailable * \param[in] consider_guest If true, consider a guest node unavailable whose * resource will not be active * * \return true if node is online and not shutting down, unclean, or in standby * or maintenance mode, otherwise false */ bool pcmk__node_available(const pe_node_t *node, bool consider_score, bool consider_guest) { if ((node == NULL) || (node->details == NULL) || !node->details->online || node->details->shutdown || node->details->unclean || node->details->standby || node->details->maintenance) { return false; } if (consider_score && (node->weight < 0)) { return false; } // @TODO Go through all callers to see which should set consider_guest if (consider_guest && pe__is_guest_node(node)) { pe_resource_t *guest = node->details->remote_rsc->container; if (guest->fns->location(guest, NULL, FALSE) == NULL) { return false; } } return true; } /*! * \internal * \brief Copy a hash table of node objects * * \param[in] nodes Hash table to copy * * \return New copy of nodes (or NULL if nodes is NULL) */ GHashTable * pcmk__copy_node_table(GHashTable *nodes) { GHashTable *new_table = NULL; GHashTableIter iter; pe_node_t *node = NULL; if (nodes == NULL) { return NULL; } new_table = pcmk__strkey_table(NULL, free); g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { pe_node_t *new_node = pe__copy_node(node); g_hash_table_insert(new_table, (gpointer) new_node->details->id, new_node); } return new_table; } /*! * \internal * \brief Free a table of node tables * * \param[in,out] data Table to free * * \note This is a \c GDestroyNotify wrapper for \c g_hash_table_destroy(). */ static void destroy_node_tables(gpointer data) { g_hash_table_destroy((GHashTable *) data); } /*! * \internal * \brief Recursively copy the node tables of a resource * * Build a hash table containing copies of the allowed nodes tables of \p rsc * and its entire tree of descendants. The key is the resource ID, and the value * is a copy of the resource's node table. * * \param[in] rsc Resource whose node table to copy * \param[in,out] copy Where to store the copied node tables * * \note \p *copy should be \c NULL for the top-level call. * \note The caller is responsible for freeing \p copy using * \c g_hash_table_destroy(). */ void pcmk__copy_node_tables(const pe_resource_t *rsc, GHashTable **copy) { CRM_ASSERT((rsc != NULL) && (copy != NULL)); if (*copy == NULL) { *copy = pcmk__strkey_table(NULL, destroy_node_tables); } g_hash_table_insert(*copy, rsc->id, pcmk__copy_node_table(rsc->allowed_nodes)); for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) { pcmk__copy_node_tables((const pe_resource_t *) iter->data, copy); } } /*! * \internal * \brief Recursively restore the node tables of a resource from backup * * Given a hash table containing backup copies of the allowed nodes tables of * \p rsc and its entire tree of descendants, replace the resources' current * node tables with the backed-up copies. * * \param[in,out] rsc Resource whose node tables to restore * \param[in] backup Table of backup node tables (created by * \c pcmk__copy_node_tables()) * * \note This function frees the resources' current node tables. */ void pcmk__restore_node_tables(pe_resource_t *rsc, GHashTable *backup) { CRM_ASSERT((rsc != NULL) && (backup != NULL)); g_hash_table_destroy(rsc->allowed_nodes); // Copy to avoid danger with multiple restores rsc->allowed_nodes = g_hash_table_lookup(backup, rsc->id); rsc->allowed_nodes = pcmk__copy_node_table(rsc->allowed_nodes); for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pcmk__restore_node_tables((pe_resource_t *) iter->data, backup); } } /*! * \internal * \brief Copy a list of node objects * * \param[in] list List to copy * \param[in] reset Set copies' scores to 0 * * \return New list of shallow copies of nodes in original list */ GList * pcmk__copy_node_list(const GList *list, bool reset) { GList *result = NULL; for (const GList *iter = list; iter != NULL; iter = iter->next) { pe_node_t *new_node = NULL; pe_node_t *this_node = iter->data; new_node = pe__copy_node(this_node); if (reset) { new_node->weight = 0; } result = g_list_prepend(result, new_node); } return result; } /*! * \internal * \brief Compare two nodes for assignment preference * * Given two nodes, check which one is more preferred by assignment criteria * such as node score and utilization. * * \param[in] a First node to compare * \param[in] b Second node to compare * \param[in] data Node that resource being assigned is active on, if any * * \return -1 if \p a is preferred, +1 if \p b is preferred, or 0 if they are * equally preferred */ static gint compare_nodes(gconstpointer a, gconstpointer b, gpointer data) { const pe_node_t *node1 = (const pe_node_t *) a; const pe_node_t *node2 = (const pe_node_t *) b; const pe_node_t *active = (const pe_node_t *) data; int node1_score = -INFINITY; int node2_score = -INFINITY; int result = 0; if (a == NULL) { return 1; } if (b == NULL) { return -1; } // Compare node scores if (pcmk__node_available(node1, false, false)) { node1_score = node1->weight; } if (pcmk__node_available(node2, false, false)) { node2_score = node2->weight; } if (node1_score > node2_score) { crm_trace("%s (%d) > %s (%d) : score", pe__node_name(node1), node1_score, pe__node_name(node2), node2_score); return -1; } if (node1_score < node2_score) { crm_trace("%s (%d) < %s (%d) : score", pe__node_name(node1), node1_score, pe__node_name(node2), node2_score); return 1; } crm_trace("%s (%d) == %s (%d) : score", pe__node_name(node1), node1_score, pe__node_name(node2), node2_score); // If appropriate, compare node utilization if (pcmk__str_eq(node1->details->data_set->placement_strategy, "minimal", pcmk__str_casei)) { goto equal; } if (pcmk__str_eq(node1->details->data_set->placement_strategy, "balanced", pcmk__str_casei)) { result = pcmk__compare_node_capacities(node1, node2); if (result < 0) { crm_trace("%s > %s : capacity (%d)", pe__node_name(node1), pe__node_name(node2), result); return -1; } else if (result > 0) { crm_trace("%s < %s : capacity (%d)", pe__node_name(node1), pe__node_name(node2), result); return 1; } } // Compare number of resources already assigned to node if (node1->details->num_resources < node2->details->num_resources) { crm_trace("%s (%d) > %s (%d) : resources", pe__node_name(node1), node1->details->num_resources, pe__node_name(node2), node2->details->num_resources); return -1; } else if (node1->details->num_resources > node2->details->num_resources) { crm_trace("%s (%d) < %s (%d) : resources", pe__node_name(node1), node1->details->num_resources, pe__node_name(node2), node2->details->num_resources); return 1; } // Check whether one node is already running desired resource if (active != NULL) { if (pe__same_node(active, node1)) { crm_trace("%s (%d) > %s (%d) : active", pe__node_name(node1), node1->details->num_resources, pe__node_name(node2), node2->details->num_resources); return -1; } else if (pe__same_node(active, node2)) { crm_trace("%s (%d) < %s (%d) : active", pe__node_name(node1), node1->details->num_resources, pe__node_name(node2), node2->details->num_resources); return 1; } } // If all else is equal, prefer node with lowest-sorting name equal: crm_trace("%s = %s", pe__node_name(node1), pe__node_name(node2)); return strcmp(node1->details->uname, node2->details->uname); } /*! * \internal * \brief Sort a list of nodes by assigment preference * * \param[in,out] nodes Node list to sort * \param[in] active_node Node where resource being assigned is active * * \return New head of sorted list */ GList * pcmk__sort_nodes(GList *nodes, pe_node_t *active_node) { return g_list_sort_with_data(nodes, compare_nodes, active_node); } /*! * \internal * \brief Check whether any node is available to run resources * * \param[in] nodes Nodes to check * * \return true if any node in \p nodes is available to run resources, * otherwise false */ bool pcmk__any_node_available(GHashTable *nodes) { GHashTableIter iter; const pe_node_t *node = NULL; if (nodes == NULL) { return false; } g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if (pcmk__node_available(node, true, false)) { return true; } } return false; } /*! * \internal * \brief Apply node health values for all nodes in cluster * * \param[in,out] data_set Cluster working set */ void pcmk__apply_node_health(pe_working_set_t *data_set) { int base_health = 0; enum pcmk__health_strategy strategy; const char *strategy_str = pe_pref(data_set->config_hash, PCMK__OPT_NODE_HEALTH_STRATEGY); strategy = pcmk__parse_health_strategy(strategy_str); if (strategy == pcmk__health_strategy_none) { return; } crm_info("Applying node health strategy '%s'", strategy_str); // The progressive strategy can use a base health score if (strategy == pcmk__health_strategy_progressive) { base_health = pe__health_score(PCMK__OPT_NODE_HEALTH_BASE, data_set); } for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; int health = pe__sum_node_health_scores(node, base_health); // An overall health score of 0 has no effect if (health == 0) { continue; } crm_info("Overall system health of %s is %d", pe__node_name(node), health); // Use node health as a location score for each resource on the node for (GList *r = data_set->resources; r != NULL; r = r->next) { pe_resource_t *rsc = (pe_resource_t *) r->data; bool constrain = true; if (health < 0) { /* Negative health scores do not apply to resources with * allow-unhealthy-nodes=true. */ constrain = !crm_is_true(g_hash_table_lookup(rsc->meta, PCMK__META_ALLOW_UNHEALTHY_NODES)); } if (constrain) { pcmk__new_location(strategy_str, rsc, health, NULL, node); } else { pe_rsc_trace(rsc, "%s is immune from health ban on %s", rsc->id, pe__node_name(node)); } } } } /*! * \internal * \brief Check for a node in a resource's parent's allowed nodes * * \param[in] rsc Resource whose parent should be checked * \param[in] node Node to check for * * \return Equivalent of \p node from \p rsc's parent's allowed nodes if any, * otherwise NULL */ pe_node_t * pcmk__top_allowed_node(const pe_resource_t *rsc, const pe_node_t *node) { GHashTable *allowed_nodes = NULL; if ((rsc == NULL) || (node == NULL)) { return NULL; } else if (rsc->parent == NULL) { allowed_nodes = rsc->allowed_nodes; } else { allowed_nodes = rsc->parent->allowed_nodes; } - return pe_hash_table_lookup(allowed_nodes, node->details->id); + return g_hash_table_lookup(allowed_nodes, node->details->id); } diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c index 1df4729af1..9857ea1c92 100644 --- a/lib/pacemaker/pcmk_sched_promotable.c +++ b/lib/pacemaker/pcmk_sched_promotable.c @@ -1,1284 +1,1282 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Add implicit promotion ordering for a promotable instance * * \param[in,out] clone Clone resource * \param[in,out] child Instance of \p clone being ordered * \param[in,out] last Previous instance ordered (NULL if \p child is first) */ static void order_instance_promotion(pe_resource_t *clone, pe_resource_t *child, pe_resource_t *last) { // "Promote clone" -> promote instance -> "clone promoted" pcmk__order_resource_actions(clone, RSC_PROMOTE, child, RSC_PROMOTE, pe_order_optional); pcmk__order_resource_actions(child, RSC_PROMOTE, clone, RSC_PROMOTED, pe_order_optional); // If clone is ordered, order this instance relative to last if ((last != NULL) && pe__clone_is_ordered(clone)) { pcmk__order_resource_actions(last, RSC_PROMOTE, child, RSC_PROMOTE, pe_order_optional); } } /*! * \internal * \brief Add implicit demotion ordering for a promotable instance * * \param[in,out] clone Clone resource * \param[in,out] child Instance of \p clone being ordered * \param[in] last Previous instance ordered (NULL if \p child is first) */ static void order_instance_demotion(pe_resource_t *clone, pe_resource_t *child, pe_resource_t *last) { // "Demote clone" -> demote instance -> "clone demoted" pcmk__order_resource_actions(clone, RSC_DEMOTE, child, RSC_DEMOTE, pe_order_implies_first_printed); pcmk__order_resource_actions(child, RSC_DEMOTE, clone, RSC_DEMOTED, pe_order_implies_then_printed); // If clone is ordered, order this instance relative to last if ((last != NULL) && pe__clone_is_ordered(clone)) { pcmk__order_resource_actions(child, RSC_DEMOTE, last, RSC_DEMOTE, pe_order_optional); } } /*! * \internal * \brief Check whether an instance will be promoted or demoted * * \param[in] rsc Instance to check * \param[out] demoting If \p rsc will be demoted, this will be set to true * \param[out] promoting If \p rsc will be promoted, this will be set to true */ static void check_for_role_change(const pe_resource_t *rsc, bool *demoting, bool *promoting) { const GList *iter = NULL; // If this is a cloned group, check group members recursively if (rsc->children != NULL) { for (iter = rsc->children; iter != NULL; iter = iter->next) { check_for_role_change((const pe_resource_t *) iter->data, demoting, promoting); } return; } for (iter = rsc->actions; iter != NULL; iter = iter->next) { const pe_action_t *action = (const pe_action_t *) iter->data; if (*promoting && *demoting) { return; } else if (pcmk_is_set(action->flags, pe_action_optional)) { continue; } else if (pcmk__str_eq(RSC_DEMOTE, action->task, pcmk__str_none)) { *demoting = true; } else if (pcmk__str_eq(RSC_PROMOTE, action->task, pcmk__str_none)) { *promoting = true; } } } /*! * \internal * \brief Add promoted-role location constraint scores to an instance's priority * * Adjust a promotable clone instance's promotion priority by the scores of any * location constraints in a list that are both limited to the promoted role and * for the node where the instance will be placed. * * \param[in,out] child Promotable clone instance * \param[in] location_constraints List of location constraints to apply * \param[in] chosen Node where \p child will be placed */ static void apply_promoted_locations(pe_resource_t *child, const GList *location_constraints, const pe_node_t *chosen) { for (const GList *iter = location_constraints; iter; iter = iter->next) { const pe__location_t *location = iter->data; const pe_node_t *constraint_node = NULL; if (location->role_filter == RSC_ROLE_PROMOTED) { constraint_node = pe_find_node_id(location->node_list_rh, chosen->details->id); } if (constraint_node != NULL) { int new_priority = pcmk__add_scores(child->priority, constraint_node->weight); pe_rsc_trace(child, "Applying location %s to %s promotion priority on %s: " "%s + %s = %s", location->id, child->id, pe__node_name(constraint_node), pcmk_readable_score(child->priority), pcmk_readable_score(constraint_node->weight), pcmk_readable_score(new_priority)); child->priority = new_priority; } } } /*! * \internal * \brief Get the node that an instance will be promoted on * * \param[in] rsc Promotable clone instance to check * * \return Node that \p rsc will be promoted on, or NULL if none */ static pe_node_t * node_to_be_promoted_on(const pe_resource_t *rsc) { pe_node_t *node = NULL; pe_node_t *local_node = NULL; const pe_resource_t *parent = NULL; // If this is a cloned group, bail if any group member can't be promoted for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child = (pe_resource_t *) iter->data; if (node_to_be_promoted_on(child) == NULL) { pe_rsc_trace(rsc, "%s can't be promoted because member %s can't", rsc->id, child->id); return NULL; } } node = rsc->fns->location(rsc, NULL, FALSE); if (node == NULL) { pe_rsc_trace(rsc, "%s can't be promoted because it won't be active", rsc->id); return NULL; } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { if (rsc->fns->state(rsc, TRUE) == RSC_ROLE_PROMOTED) { crm_notice("Unmanaged instance %s will be left promoted on %s", rsc->id, pe__node_name(node)); } else { pe_rsc_trace(rsc, "%s can't be promoted because it is unmanaged", rsc->id); return NULL; } } else if (rsc->priority < 0) { pe_rsc_trace(rsc, "%s can't be promoted because its promotion priority %d " "is negative", rsc->id, rsc->priority); return NULL; } else if (!pcmk__node_available(node, false, true)) { pe_rsc_trace(rsc, "%s can't be promoted because %s can't run resources", rsc->id, pe__node_name(node)); return NULL; } parent = pe__const_top_resource(rsc, false); - local_node = pe_hash_table_lookup(parent->allowed_nodes, node->details->id); + local_node = g_hash_table_lookup(parent->allowed_nodes, node->details->id); if (local_node == NULL) { /* It should not be possible for the scheduler to have assigned the * instance to a node where its parent is not allowed, but it's good to * have a fail-safe. */ if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { crm_warn("%s can't be promoted because %s is not allowed on %s " "(scheduler bug?)", rsc->id, parent->id, pe__node_name(node)); } // else the instance is unmanaged and already promoted return NULL; } else if ((local_node->count >= pe__clone_promoted_node_max(parent)) && pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "%s can't be promoted because %s has " "maximum promoted instances already", rsc->id, pe__node_name(node)); return NULL; } return local_node; } /*! * \internal * \brief Compare two promotable clone instances by promotion priority * * \param[in] a First instance to compare * \param[in] b Second instance to compare * * \return A negative number if \p a has higher promotion priority, * a positive number if \p b has higher promotion priority, * or 0 if promotion priorities are equal */ static gint cmp_promotable_instance(gconstpointer a, gconstpointer b) { const pe_resource_t *rsc1 = (const pe_resource_t *) a; const pe_resource_t *rsc2 = (const pe_resource_t *) b; enum rsc_role_e role1 = RSC_ROLE_UNKNOWN; enum rsc_role_e role2 = RSC_ROLE_UNKNOWN; CRM_ASSERT((rsc1 != NULL) && (rsc2 != NULL)); // Check sort index set by pcmk__set_instance_roles() if (rsc1->sort_index > rsc2->sort_index) { pe_rsc_trace(rsc1, "%s has higher promotion priority than %s " "(sort index %d > %d)", rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index); return -1; } else if (rsc1->sort_index < rsc2->sort_index) { pe_rsc_trace(rsc1, "%s has lower promotion priority than %s " "(sort index %d < %d)", rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index); return 1; } // If those are the same, prefer instance whose current role is higher role1 = rsc1->fns->state(rsc1, TRUE); role2 = rsc2->fns->state(rsc2, TRUE); if (role1 > role2) { pe_rsc_trace(rsc1, "%s has higher promotion priority than %s " "(higher current role)", rsc1->id, rsc2->id); return -1; } else if (role1 < role2) { pe_rsc_trace(rsc1, "%s has lower promotion priority than %s " "(lower current role)", rsc1->id, rsc2->id); return 1; } // Finally, do normal clone instance sorting return pcmk__cmp_instance(a, b); } /*! * \internal * \brief Add a promotable clone instance's sort index to its node's score * * Add a promotable clone instance's sort index (which sums its promotion * preferences and scores of relevant location constraints for the promoted * role) to the node score of the instance's assigned node. * * \param[in] data Promotable clone instance * \param[in,out] user_data Clone parent of \p data */ static void add_sort_index_to_node_score(gpointer data, gpointer user_data) { const pe_resource_t *child = (const pe_resource_t *) data; pe_resource_t *clone = (pe_resource_t *) user_data; pe_node_t *node = NULL; const pe_node_t *chosen = NULL; if (child->sort_index < 0) { pe_rsc_trace(clone, "Not adding sort index of %s: negative", child->id); return; } chosen = child->fns->location(child, NULL, FALSE); if (chosen == NULL) { pe_rsc_trace(clone, "Not adding sort index of %s: inactive", child->id); return; } - node = (pe_node_t *) pe_hash_table_lookup(clone->allowed_nodes, - chosen->details->id); + node = g_hash_table_lookup(clone->allowed_nodes, chosen->details->id); CRM_ASSERT(node != NULL); node->weight = pcmk__add_scores(child->sort_index, node->weight); pe_rsc_trace(clone, "Added cumulative priority of %s (%s) to score on %s (now %s)", child->id, pcmk_readable_score(child->sort_index), pe__node_name(node), pcmk_readable_score(node->weight)); } /*! * \internal * \brief Apply colocation to dependent's node scores if for promoted role * * \param[in,out] data Colocation constraint to apply * \param[in,out] user_data Promotable clone that is constraint's dependent */ static void apply_coloc_to_dependent(gpointer data, gpointer user_data) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) data; pe_resource_t *clone = (pe_resource_t *) user_data; pe_resource_t *primary = constraint->primary; uint32_t flags = pcmk__coloc_select_default; float factor = constraint->score / (float) INFINITY; if (constraint->dependent_role != RSC_ROLE_PROMOTED) { return; } if (constraint->score < INFINITY) { flags = pcmk__coloc_select_active; } pe_rsc_trace(clone, "Applying colocation %s (promoted %s with %s) @%s", constraint->id, constraint->dependent->id, constraint->primary->id, pcmk_readable_score(constraint->score)); primary->cmds->add_colocated_node_scores(primary, clone->id, &clone->allowed_nodes, constraint, factor, flags); } /*! * \internal * \brief Apply colocation to primary's node scores if for promoted role * * \param[in,out] data Colocation constraint to apply * \param[in,out] user_data Promotable clone that is constraint's primary */ static void apply_coloc_to_primary(gpointer data, gpointer user_data) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) data; pe_resource_t *clone = (pe_resource_t *) user_data; pe_resource_t *dependent = constraint->dependent; const float factor = constraint->score / (float) INFINITY; const uint32_t flags = pcmk__coloc_select_active |pcmk__coloc_select_nonnegative; if ((constraint->primary_role != RSC_ROLE_PROMOTED) || !pcmk__colocation_has_influence(constraint, NULL)) { return; } pe_rsc_trace(clone, "Applying colocation %s (%s with promoted %s) @%s", constraint->id, constraint->dependent->id, constraint->primary->id, pcmk_readable_score(constraint->score)); dependent->cmds->add_colocated_node_scores(dependent, clone->id, &clone->allowed_nodes, constraint, factor, flags); } /*! * \internal * \brief Set clone instance's sort index to its node's score * * \param[in,out] data Promotable clone instance * \param[in] user_data Parent clone of \p data */ static void set_sort_index_to_node_score(gpointer data, gpointer user_data) { pe_resource_t *child = (pe_resource_t *) data; const pe_resource_t *clone = (const pe_resource_t *) user_data; pe_node_t *chosen = child->fns->location(child, NULL, FALSE); if (!pcmk_is_set(child->flags, pe_rsc_managed) && (child->next_role == RSC_ROLE_PROMOTED)) { child->sort_index = INFINITY; pe_rsc_trace(clone, "Final sort index for %s is INFINITY (unmanaged promoted)", child->id); } else if ((chosen == NULL) || (child->sort_index < 0)) { pe_rsc_trace(clone, "Final sort index for %s is %d (ignoring node score)", child->id, child->sort_index); } else { - const pe_node_t *node = NULL; + const pe_node_t *node = g_hash_table_lookup(clone->allowed_nodes, + chosen->details->id); - node = pe_hash_table_lookup(clone->allowed_nodes, chosen->details->id); CRM_ASSERT(node != NULL); - child->sort_index = node->weight; pe_rsc_trace(clone, "Adding scores for %s: final sort index for %s is %d", clone->id, child->id, child->sort_index); } } /*! * \internal * \brief Sort a promotable clone's instances by descending promotion priority * * \param[in,out] clone Promotable clone to sort */ static void sort_promotable_instances(pe_resource_t *clone) { if (pe__set_clone_flag(clone, pe__clone_promotion_constrained) == pcmk_rc_already) { return; } pe__set_resource_flags(clone, pe_rsc_merging); for (GList *iter = clone->children; iter != NULL; iter = iter->next) { pe_resource_t *child = (pe_resource_t *) iter->data; pe_rsc_trace(clone, "Adding scores for %s: initial sort index for %s is %d", clone->id, child->id, child->sort_index); } pe__show_node_scores(true, clone, "Before", clone->allowed_nodes, clone->cluster); /* Because the this_with_colocations() and with_this_colocations() methods * boil down to copies of rsc_cons and rsc_cons_lhs for clones, we can use * those here directly for efficiency. */ g_list_foreach(clone->children, add_sort_index_to_node_score, clone); g_list_foreach(clone->rsc_cons, apply_coloc_to_dependent, clone); g_list_foreach(clone->rsc_cons_lhs, apply_coloc_to_primary, clone); // Ban resource from all nodes if it needs a ticket but doesn't have it pcmk__require_promotion_tickets(clone); pe__show_node_scores(true, clone, "After", clone->allowed_nodes, clone->cluster); // Reset sort indexes to final node scores g_list_foreach(clone->children, set_sort_index_to_node_score, clone); // Finally, sort instances in descending order of promotion priority clone->children = g_list_sort(clone->children, cmp_promotable_instance); pe__clear_resource_flags(clone, pe_rsc_merging); } /*! * \internal * \brief Find the active instance (if any) of an anonymous clone on a node * * \param[in] clone Anonymous clone to check * \param[in] id Instance ID (without instance number) to check * \param[in] node Node to check * * \return */ static pe_resource_t * find_active_anon_instance(const pe_resource_t *clone, const char *id, const pe_node_t *node) { for (GList *iter = clone->children; iter; iter = iter->next) { pe_resource_t *child = iter->data; pe_resource_t *active = NULL; // Use ->find_rsc() in case this is a cloned group active = clone->fns->find_rsc(child, id, node, pe_find_clone|pe_find_current); if (active != NULL) { return active; } } return NULL; } /* * \brief Check whether an anonymous clone instance is known on a node * * \param[in] clone Anonymous clone to check * \param[in] id Instance ID (without instance number) to check * \param[in] node Node to check * * \return true if \p id instance of \p clone is known on \p node, * otherwise false */ static bool anonymous_known_on(const pe_resource_t *clone, const char *id, const pe_node_t *node) { for (GList *iter = clone->children; iter; iter = iter->next) { pe_resource_t *child = iter->data; /* Use ->find_rsc() because this might be a cloned group, and knowing * that other members of the group are known here implies nothing. */ child = clone->fns->find_rsc(child, id, NULL, pe_find_clone); CRM_LOG_ASSERT(child != NULL); if (child != NULL) { if (g_hash_table_lookup(child->known_on, node->details->id)) { return true; } } } return false; } /*! * \internal * \brief Check whether a node is allowed to run a resource * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p node is allowed to run \p rsc, otherwise false */ static bool is_allowed(const pe_resource_t *rsc, const pe_node_t *node) { - pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes, - node->details->id); + pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, + node->details->id); return (allowed != NULL) && (allowed->weight >= 0); } /*! * \brief Check whether a clone instance's promotion score should be considered * * \param[in] rsc Promotable clone instance to check * \param[in] node Node where score would be applied * * \return true if \p rsc's promotion score should be considered on \p node, * otherwise false */ static bool promotion_score_applies(const pe_resource_t *rsc, const pe_node_t *node) { char *id = clone_strip(rsc->id); const pe_resource_t *parent = pe__const_top_resource(rsc, false); pe_resource_t *active = NULL; const char *reason = "allowed"; // Some checks apply only to anonymous clone instances if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { // If instance is active on the node, its score definitely applies active = find_active_anon_instance(parent, id, node); if (active == rsc) { reason = "active"; goto check_allowed; } /* If *no* instance is active on this node, this instance's score will * count if it has been probed on this node. */ if ((active == NULL) && anonymous_known_on(parent, id, node)) { reason = "probed"; goto check_allowed; } } /* If this clone's status is unknown on *all* nodes (e.g. cluster startup), * take all instances' scores into account, to make sure we use any * permanent promotion scores. */ if ((rsc->running_on == NULL) && (g_hash_table_size(rsc->known_on) == 0)) { reason = "none probed"; goto check_allowed; } /* Otherwise, we've probed and/or started the resource *somewhere*, so * consider promotion scores on nodes where we know the status. */ - if ((pe_hash_table_lookup(rsc->known_on, node->details->id) != NULL) + if ((g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) || (pe_find_node_id(rsc->running_on, node->details->id) != NULL)) { reason = "known"; } else { pe_rsc_trace(rsc, "Ignoring %s promotion score (for %s) on %s: not probed", rsc->id, id, pe__node_name(node)); free(id); return false; } check_allowed: if (is_allowed(rsc, node)) { pe_rsc_trace(rsc, "Counting %s promotion score (for %s) on %s: %s", rsc->id, id, pe__node_name(node), reason); free(id); return true; } pe_rsc_trace(rsc, "Ignoring %s promotion score (for %s) on %s: not allowed", rsc->id, id, pe__node_name(node)); free(id); return false; } /*! * \internal * \brief Get the value of a promotion score node attribute * * \param[in] rsc Promotable clone instance to get promotion score for * \param[in] node Node to get promotion score for * \param[in] name Resource name to use in promotion score attribute name * * \return Value of promotion score node attribute for \p rsc on \p node */ static const char * promotion_attr_value(const pe_resource_t *rsc, const pe_node_t *node, const char *name) { char *attr_name = NULL; const char *attr_value = NULL; enum pe__rsc_node node_type = pe__rsc_node_assigned; if (pcmk_is_set(rsc->flags, pe_rsc_provisional)) { // Not assigned yet node_type = pe__rsc_node_current; } attr_name = pcmk_promotion_score_name(name); attr_value = pe_node_attribute_calculated(node, attr_name, rsc, node_type); free(attr_name); return attr_value; } /*! * \internal * \brief Get the promotion score for a clone instance on a node * * \param[in] rsc Promotable clone instance to get score for * \param[in] node Node to get score for * \param[out] is_default If non-NULL, will be set true if no score available * * \return Promotion score for \p rsc on \p node (or 0 if none) */ static int promotion_score(const pe_resource_t *rsc, const pe_node_t *node, bool *is_default) { char *name = NULL; const char *attr_value = NULL; if (is_default != NULL) { *is_default = true; } CRM_CHECK((rsc != NULL) && (node != NULL), return 0); /* If this is an instance of a cloned group, the promotion score is the sum * of all members' promotion scores. */ if (rsc->children != NULL) { int score = 0; for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) { const pe_resource_t *child = (const pe_resource_t *) iter->data; bool child_default = false; int child_score = promotion_score(child, node, &child_default); if (!child_default && (is_default != NULL)) { *is_default = false; } score += child_score; } return score; } if (!promotion_score_applies(rsc, node)) { return 0; } /* For the promotion score attribute name, use the name the resource is * known as in resource history, since that's what crm_attribute --promotion * would have used. */ name = (rsc->clone_name == NULL)? rsc->id : rsc->clone_name; attr_value = promotion_attr_value(rsc, node, name); if (attr_value != NULL) { pe_rsc_trace(rsc, "Promotion score for %s on %s = %s", name, pe__node_name(node), pcmk__s(attr_value, "(unset)")); } else if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { /* If we don't have any resource history yet, we won't have clone_name. * In that case, for anonymous clones, try the resource name without * any instance number. */ name = clone_strip(rsc->id); if (strcmp(rsc->id, name) != 0) { attr_value = promotion_attr_value(rsc, node, name); pe_rsc_trace(rsc, "Promotion score for %s on %s (for %s) = %s", name, pe__node_name(node), rsc->id, pcmk__s(attr_value, "(unset)")); } free(name); } if (attr_value == NULL) { return 0; } if (is_default != NULL) { *is_default = false; } return char2score(attr_value); } /*! * \internal * \brief Include promotion scores in instances' node scores and priorities * * \param[in,out] rsc Promotable clone resource to update */ void pcmk__add_promotion_scores(pe_resource_t *rsc) { if (pe__set_clone_flag(rsc, pe__clone_promotion_added) == pcmk_rc_already) { return; } for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child_rsc = (pe_resource_t *) iter->data; GHashTableIter iter; pe_node_t *node = NULL; int score, new_score; g_hash_table_iter_init(&iter, child_rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if (!pcmk__node_available(node, false, false)) { /* This node will never be promoted, so don't apply the * promotion score, as that may lead to clone shuffling. */ continue; } score = promotion_score(child_rsc, node, NULL); if (score > 0) { new_score = pcmk__add_scores(node->weight, score); if (new_score != node->weight) { // Could remain INFINITY node->weight = new_score; pe_rsc_trace(rsc, "Added %s promotion priority (%s) to score " "on %s (now %s)", child_rsc->id, pcmk_readable_score(score), pe__node_name(node), pcmk_readable_score(new_score)); } } if (score > child_rsc->priority) { pe_rsc_trace(rsc, "Updating %s priority to promotion score (%d->%d)", child_rsc->id, child_rsc->priority, score); child_rsc->priority = score; } } } } /*! * \internal * \brief If a resource's current role is started, change it to unpromoted * * \param[in,out] data Resource to update * \param[in] user_data Ignored */ static void set_current_role_unpromoted(void *data, void *user_data) { pe_resource_t *rsc = (pe_resource_t *) data; if (rsc->role == RSC_ROLE_STARTED) { // Promotable clones should use unpromoted role instead of started rsc->role = RSC_ROLE_UNPROMOTED; } g_list_foreach(rsc->children, set_current_role_unpromoted, NULL); } /*! * \internal * \brief Set a resource's next role to unpromoted (or stopped if unassigned) * * \param[in,out] data Resource to update * \param[in] user_data Ignored */ static void set_next_role_unpromoted(void *data, void *user_data) { pe_resource_t *rsc = (pe_resource_t *) data; GList *assigned = NULL; rsc->fns->location(rsc, &assigned, FALSE); if (assigned == NULL) { pe__set_next_role(rsc, RSC_ROLE_STOPPED, "stopped instance"); } else { pe__set_next_role(rsc, RSC_ROLE_UNPROMOTED, "unpromoted instance"); g_list_free(assigned); } g_list_foreach(rsc->children, set_next_role_unpromoted, NULL); } /*! * \internal * \brief Set a resource's next role to promoted if not already set * * \param[in,out] data Resource to update * \param[in] user_data Ignored */ static void set_next_role_promoted(void *data, gpointer user_data) { pe_resource_t *rsc = (pe_resource_t *) data; if (rsc->next_role == RSC_ROLE_UNKNOWN) { pe__set_next_role(rsc, RSC_ROLE_PROMOTED, "promoted instance"); } g_list_foreach(rsc->children, set_next_role_promoted, NULL); } /*! * \internal * \brief Show instance's promotion score on node where it will be active * * \param[in,out] instance Promotable clone instance to show */ static void show_promotion_score(pe_resource_t *instance) { pe_node_t *chosen = instance->fns->location(instance, NULL, FALSE); if (pcmk_is_set(instance->cluster->flags, pe_flag_show_scores) && !pcmk__is_daemon && (instance->cluster->priv != NULL)) { pcmk__output_t *out = instance->cluster->priv; out->message(out, "promotion-score", instance, chosen, pcmk_readable_score(instance->sort_index)); } else { pe_rsc_debug(pe__const_top_resource(instance, false), "%s promotion score on %s: sort=%s priority=%s", instance->id, ((chosen == NULL)? "none" : pe__node_name(chosen)), pcmk_readable_score(instance->sort_index), pcmk_readable_score(instance->priority)); } } /*! * \internal * \brief Set a clone instance's promotion priority * * \param[in,out] data Promotable clone instance to update * \param[in] user_data Instance's parent clone */ static void set_instance_priority(gpointer data, gpointer user_data) { pe_resource_t *instance = (pe_resource_t *) data; const pe_resource_t *clone = (const pe_resource_t *) user_data; const pe_node_t *chosen = NULL; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; GList *list = NULL; pe_rsc_trace(clone, "Assigning priority for %s: %s", instance->id, role2text(instance->next_role)); if (instance->fns->state(instance, TRUE) == RSC_ROLE_STARTED) { set_current_role_unpromoted(instance, NULL); } // Only an instance that will be active can be promoted chosen = instance->fns->location(instance, &list, FALSE); if (pcmk__list_of_multiple(list)) { pcmk__config_err("Cannot promote non-colocated child %s", instance->id); } g_list_free(list); if (chosen == NULL) { return; } next_role = instance->fns->state(instance, FALSE); switch (next_role) { case RSC_ROLE_STARTED: case RSC_ROLE_UNKNOWN: // Set instance priority to its promotion score (or -1 if none) { bool is_default = false; instance->priority = promotion_score(instance, chosen, &is_default); if (is_default) { /* * Default to -1 if no value is set. This allows * instances eligible for promotion to be specified * based solely on rsc_location constraints, but * prevents any instance from being promoted if neither * a constraint nor a promotion score is present */ instance->priority = -1; } } break; case RSC_ROLE_UNPROMOTED: case RSC_ROLE_STOPPED: // Instance can't be promoted instance->priority = -INFINITY; break; case RSC_ROLE_PROMOTED: // Nothing needed (re-creating actions after scheduling fencing) break; default: CRM_CHECK(FALSE, crm_err("Unknown resource role %d for %s", next_role, instance->id)); } // Add relevant location constraint scores for promoted role apply_promoted_locations(instance, instance->rsc_location, chosen); apply_promoted_locations(instance, clone->rsc_location, chosen); // Consider instance's role-based colocations with other resources list = pcmk__this_with_colocations(instance); for (GList *iter = list; iter != NULL; iter = iter->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) iter->data; instance->cmds->apply_coloc_score(instance, cons->primary, cons, true); } g_list_free(list); instance->sort_index = instance->priority; if (next_role == RSC_ROLE_PROMOTED) { instance->sort_index = INFINITY; } pe_rsc_trace(clone, "Assigning %s priority = %d", instance->id, instance->priority); } /*! * \internal * \brief Set a promotable clone instance's role * * \param[in,out] data Promotable clone instance to update * \param[in,out] user_data Pointer to count of instances chosen for promotion */ static void set_instance_role(gpointer data, gpointer user_data) { pe_resource_t *instance = (pe_resource_t *) data; int *count = (int *) user_data; const pe_resource_t *clone = pe__const_top_resource(instance, false); pe_node_t *chosen = NULL; show_promotion_score(instance); if (instance->sort_index < 0) { pe_rsc_trace(clone, "Not supposed to promote instance %s", instance->id); } else if ((*count < pe__clone_promoted_max(instance)) || !pcmk_is_set(clone->flags, pe_rsc_managed)) { chosen = node_to_be_promoted_on(instance); } if (chosen == NULL) { set_next_role_unpromoted(instance, NULL); return; } if ((instance->role < RSC_ROLE_PROMOTED) && !pcmk_is_set(instance->cluster->flags, pe_flag_have_quorum) && (instance->cluster->no_quorum_policy == no_quorum_freeze)) { crm_notice("Clone instance %s cannot be promoted without quorum", instance->id); set_next_role_unpromoted(instance, NULL); return; } chosen->count++; pe_rsc_info(clone, "Choosing %s (%s) on %s for promotion", instance->id, role2text(instance->role), pe__node_name(chosen)); set_next_role_promoted(instance, NULL); (*count)++; } /*! * \internal * \brief Set roles for all instances of a promotable clone * * \param[in,out] rsc Promotable clone resource to update */ void pcmk__set_instance_roles(pe_resource_t *rsc) { int promoted = 0; GHashTableIter iter; pe_node_t *node = NULL; // Repurpose count to track the number of promoted instances assigned g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { node->count = 0; } // Set instances' promotion priorities and sort by highest priority first g_list_foreach(rsc->children, set_instance_priority, rsc); sort_promotable_instances(rsc); // Choose the first N eligible instances to be promoted g_list_foreach(rsc->children, set_instance_role, &promoted); pe_rsc_info(rsc, "%s: Promoted %d instances of a possible %d", rsc->id, promoted, pe__clone_promoted_max(rsc)); } /*! * * \internal * \brief Create actions for promotable clone instances * * \param[in,out] clone Promotable clone to create actions for * \param[out] any_promoting Will be set true if any instance is promoting * \param[out] any_demoting Will be set true if any instance is demoting */ static void create_promotable_instance_actions(pe_resource_t *clone, bool *any_promoting, bool *any_demoting) { for (GList *iter = clone->children; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; instance->cmds->create_actions(instance); check_for_role_change(instance, any_demoting, any_promoting); } } /*! * \internal * \brief Reset each promotable instance's resource priority * * Reset the priority of each instance of a promotable clone to the clone's * priority (after promotion actions are scheduled, when instance priorities * were repurposed as promotion scores). * * \param[in,out] clone Promotable clone to reset */ static void reset_instance_priorities(pe_resource_t *clone) { for (GList *iter = clone->children; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; instance->priority = clone->priority; } } /*! * \internal * \brief Create actions specific to promotable clones * * \param[in,out] clone Promotable clone to create actions for */ void pcmk__create_promotable_actions(pe_resource_t *clone) { bool any_promoting = false; bool any_demoting = false; // Create actions for each clone instance individually create_promotable_instance_actions(clone, &any_promoting, &any_demoting); // Create pseudo-actions for clone as a whole pe__create_promotable_pseudo_ops(clone, any_promoting, any_demoting); // Undo our temporary repurposing of resource priority for instances reset_instance_priorities(clone); } /*! * \internal * \brief Create internal orderings for a promotable clone's instances * * \param[in,out] clone Promotable clone instance to order */ void pcmk__order_promotable_instances(pe_resource_t *clone) { pe_resource_t *previous = NULL; // Needed for ordered clones pcmk__promotable_restart_ordering(clone); for (GList *iter = clone->children; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; // Demote before promote pcmk__order_resource_actions(instance, RSC_DEMOTE, instance, RSC_PROMOTE, pe_order_optional); order_instance_promotion(clone, instance, previous); order_instance_demotion(clone, instance, previous); previous = instance; } } /*! * \internal * \brief Update dependent's allowed nodes for colocation with promotable * * \param[in,out] dependent Dependent resource to update * \param[in] primary_node Node where an instance of the primary will be * \param[in] colocation Colocation constraint to apply */ static void update_dependent_allowed_nodes(pe_resource_t *dependent, const pe_node_t *primary_node, const pcmk__colocation_t *colocation) { GHashTableIter iter; pe_node_t *node = NULL; const char *primary_value = NULL; const char *attr = colocation->node_attribute; if (colocation->score >= INFINITY) { return; // Colocation is mandatory, so allowed node scores don't matter } // Get value of primary's colocation node attribute primary_value = pe_node_attribute_raw(primary_node, attr); pe_rsc_trace(colocation->primary, "Applying %s (%s with %s on %s by %s @%d) to %s", colocation->id, colocation->dependent->id, colocation->primary->id, pe__node_name(primary_node), attr, colocation->score, dependent->id); g_hash_table_iter_init(&iter, dependent->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { const char *dependent_value = pe_node_attribute_raw(node, attr); if (pcmk__str_eq(primary_value, dependent_value, pcmk__str_casei)) { node->weight = pcmk__add_scores(node->weight, colocation->score); pe_rsc_trace(colocation->primary, "Added %s score (%s) to %s (now %s)", colocation->id, pcmk_readable_score(colocation->score), pe__node_name(node), pcmk_readable_score(node->weight)); } } } /*! * \brief Update dependent for a colocation with a promotable clone * * \param[in] primary Primary resource in the colocation * \param[in,out] dependent Dependent resource in the colocation * \param[in] colocation Colocation constraint to apply */ void pcmk__update_dependent_with_promotable(const pe_resource_t *primary, pe_resource_t *dependent, const pcmk__colocation_t *colocation) { GList *affected_nodes = NULL; /* Build a list of all nodes where an instance of the primary will be, and * (for optional colocations) update the dependent's allowed node scores for * each one. */ for (GList *iter = primary->children; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; pe_node_t *node = instance->fns->location(instance, NULL, FALSE); if (node == NULL) { continue; } if (instance->fns->state(instance, FALSE) == colocation->primary_role) { update_dependent_allowed_nodes(dependent, node, colocation); affected_nodes = g_list_prepend(affected_nodes, node); } } /* For mandatory colocations, add the primary's node score to the * dependent's node score for each affected node, and ban the dependent * from all other nodes. * * However, skip this for promoted-with-promoted colocations, otherwise * inactive dependent instances can't start (in the unpromoted role). */ if ((colocation->score >= INFINITY) && ((colocation->dependent_role != RSC_ROLE_PROMOTED) || (colocation->primary_role != RSC_ROLE_PROMOTED))) { pe_rsc_trace(colocation->primary, "Applying %s (mandatory %s with %s) to %s", colocation->id, colocation->dependent->id, colocation->primary->id, dependent->id); - node_list_exclude(dependent->allowed_nodes, affected_nodes, - TRUE); + pcmk__colocation_intersect_nodes(dependent, primary, colocation, + affected_nodes, true); } g_list_free(affected_nodes); } /*! * \internal * \brief Update dependent priority for colocation with promotable * * \param[in] primary Primary resource in the colocation * \param[in,out] dependent Dependent resource in the colocation * \param[in] colocation Colocation constraint to apply */ void pcmk__update_promotable_dependent_priority(const pe_resource_t *primary, pe_resource_t *dependent, const pcmk__colocation_t *colocation) { pe_resource_t *primary_instance = NULL; // Look for a primary instance where dependent will be primary_instance = pcmk__find_compatible_instance(dependent, primary, colocation->primary_role, false); if (primary_instance != NULL) { // Add primary instance's priority to dependent's int new_priority = pcmk__add_scores(dependent->priority, colocation->score); pe_rsc_trace(colocation->primary, "Applying %s (%s with %s) to %s priority (%s + %s = %s)", colocation->id, colocation->dependent->id, colocation->primary->id, dependent->id, pcmk_readable_score(dependent->priority), pcmk_readable_score(colocation->score), pcmk_readable_score(new_priority)); dependent->priority = new_priority; } else if (colocation->score >= INFINITY) { // Mandatory colocation, but primary won't be here pe_rsc_trace(colocation->primary, "Applying %s (%s with %s) to %s: can't be promoted", colocation->id, colocation->dependent->id, colocation->primary->id, dependent->id); dependent->priority = -INFINITY; } } diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c index 5850c4dc38..849a2ee2ff 100644 --- a/lib/pacemaker/pcmk_scheduler.c +++ b/lib/pacemaker/pcmk_scheduler.c @@ -1,813 +1,813 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" CRM_TRACE_INIT_DATA(pacemaker); /*! * \internal * \brief Do deferred action checks after assignment * * When unpacking the resource history, the scheduler checks for resource * configurations that have changed since an action was run. However, at that * time, bundles using the REMOTE_CONTAINER_HACK don't have their final * parameter information, so instead they add a deferred check to a list. This * function processes one entry in that list. * * \param[in,out] rsc Resource that action history is for * \param[in,out] node Node that action history is for * \param[in] rsc_op Action history entry * \param[in] check Type of deferred check to do */ static void check_params(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_op, enum pe_check_parameters check) { const char *reason = NULL; op_digest_cache_t *digest_data = NULL; switch (check) { case pe_check_active: if (pcmk__check_action_config(rsc, node, rsc_op) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL)) { reason = "action definition changed"; } break; case pe_check_last_failure: digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, rsc->cluster); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s has " "no digest to compare", rsc->id, ID(rsc_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: reason = "resource parameters have changed"; break; } break; } if (reason != NULL) { pe__clear_failcount(rsc, node, reason, rsc->cluster); } } /*! * \internal * \brief Check whether a resource has failcount clearing scheduled on a node * * \param[in] node Node to check * \param[in] rsc Resource to check * * \return true if \p rsc has failcount clearing scheduled on \p node, * otherwise false */ static bool failcount_clear_action_exists(const pe_node_t *node, const pe_resource_t *rsc) { GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE); if (list != NULL) { g_list_free(list); return true; } return false; } /*! * \internal * \brief Ban a resource from a node if it reached its failure threshold there * * \param[in,out] data Resource to check failure threshold for * \param[in] user_data Node to check resource on */ static void check_failure_threshold(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; const pe_node_t *node = user_data; // If this is a collective resource, apply recursively to children instead if (rsc->children != NULL) { g_list_foreach(rsc->children, check_failure_threshold, user_data); return; } if (!failcount_clear_action_exists(node, rsc)) { /* Don't force the resource away from this node due to a failcount * that's going to be cleared. * * @TODO Failcount clearing can be scheduled in * pcmk__handle_rsc_config_changes() via process_rsc_history(), or in * schedule_resource_actions() via check_params(). This runs well before * then, so it cannot detect those, meaning we might check the migration * threshold when we shouldn't. Worst case, we stop or move the * resource, then move it back in the next transition. */ pe_resource_t *failed = NULL; if (pcmk__threshold_reached(rsc, node, &failed)) { resource_location(failed, node, -INFINITY, "__fail_limit__", rsc->cluster); } } } /*! * \internal * \brief If resource has exclusive discovery, ban node if not allowed * * Location constraints have a resource-discovery option that allows users to * specify where probes are done for the affected resource. If this is set to * exclusive, probes will only be done on nodes listed in exclusive constraints. * This function bans the resource from the node if the node is not listed. * * \param[in,out] data Resource to check * \param[in] user_data Node to check resource on */ static void apply_exclusive_discovery(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; const pe_node_t *node = user_data; if (rsc->exclusive_discover || pe__const_top_resource(rsc, false)->exclusive_discover) { pe_node_t *match = NULL; // If this is a collective resource, apply recursively to children g_list_foreach(rsc->children, apply_exclusive_discovery, user_data); match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if ((match != NULL) && (match->rsc_discover_mode != pe_discover_exclusive)) { match->weight = -INFINITY; } } } /*! * \internal * \brief Apply stickiness to a resource if appropriate * * \param[in,out] data Resource to check for stickiness * \param[in] user_data Ignored */ static void apply_stickiness(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; pe_node_t *node = NULL; // If this is a collective resource, apply recursively to children instead if (rsc->children != NULL) { g_list_foreach(rsc->children, apply_stickiness, NULL); return; } /* A resource is sticky if it is managed, has stickiness configured, and is * active on a single node. */ if (!pcmk_is_set(rsc->flags, pe_rsc_managed) || (rsc->stickiness < 1) || !pcmk__list_of_1(rsc->running_on)) { return; } node = rsc->running_on->data; /* In a symmetric cluster, stickiness can always be used. In an * asymmetric cluster, we have to check whether the resource is still * allowed on the node, so we don't keep the resource somewhere it is no * longer explicitly enabled. */ if (!pcmk_is_set(rsc->cluster->flags, pe_flag_symmetric_cluster) - && (pe_hash_table_lookup(rsc->allowed_nodes, - node->details->id) == NULL)) { + && (g_hash_table_lookup(rsc->allowed_nodes, + node->details->id) == NULL)) { pe_rsc_debug(rsc, "Ignoring %s stickiness because the cluster is " "asymmetric and %s is not explicitly allowed", rsc->id, pe__node_name(node)); return; } pe_rsc_debug(rsc, "Resource %s has %d stickiness on %s", rsc->id, rsc->stickiness, pe__node_name(node)); resource_location(rsc, node, rsc->stickiness, "stickiness", rsc->cluster); } /*! * \internal * \brief Apply shutdown locks for all resources as appropriate * * \param[in,out] data_set Cluster working set */ static void apply_shutdown_locks(pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { return; } for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; rsc->cmds->shutdown_lock(rsc); } } /*! * \internal * \brief Calculate the number of available nodes in the cluster * * \param[in,out] data_set Cluster working set */ static void count_available_nodes(pe_working_set_t *data_set) { if (pcmk_is_set(data_set->flags, pe_flag_no_compat)) { return; } // @COMPAT for API backward compatibility only (cluster does not use value) for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; if ((node != NULL) && (node->weight >= 0) && node->details->online && (node->details->type != node_ping)) { data_set->max_valid_nodes++; } } crm_trace("Online node count: %d", data_set->max_valid_nodes); } /* * \internal * \brief Apply node-specific scheduling criteria * * After the CIB has been unpacked, process node-specific scheduling criteria * including shutdown locks, location constraints, resource stickiness, * migration thresholds, and exclusive resource discovery. */ static void apply_node_criteria(pe_working_set_t *data_set) { crm_trace("Applying node-specific scheduling criteria"); apply_shutdown_locks(data_set); count_available_nodes(data_set); pcmk__apply_locations(data_set); g_list_foreach(data_set->resources, apply_stickiness, NULL); for (GList *node_iter = data_set->nodes; node_iter != NULL; node_iter = node_iter->next) { for (GList *rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { check_failure_threshold(rsc_iter->data, node_iter->data); apply_exclusive_discovery(rsc_iter->data, node_iter->data); } } } /*! * \internal * \brief Assign resources to nodes * * \param[in,out] data_set Cluster working set */ static void assign_resources(pe_working_set_t *data_set) { GList *iter = NULL; crm_trace("Assigning resources to nodes"); if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) { pcmk__sort_resources(data_set); } pcmk__show_node_capacities("Original", data_set); if (pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) { /* Assign remote connection resources first (which will also assign any * colocation dependencies). If the connection is migrating, always * prefer the partial migration target. */ for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Assigning remote connection resource '%s'", rsc->id); rsc->cmds->assign(rsc, rsc->partial_migration_target); } } } /* now do the rest of the resources */ for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; if (!rsc->is_remote_node) { pe_rsc_trace(rsc, "Assigning %s resource '%s'", crm_element_name(rsc->xml), rsc->id); rsc->cmds->assign(rsc, NULL); } } pcmk__show_node_capacities("Remaining", data_set); } /*! * \internal * \brief Schedule fail count clearing on online nodes if resource is orphaned * * \param[in,out] data Resource to check * \param[in] user_data Ignored */ static void clear_failcounts_if_orphaned(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { return; } crm_trace("Clear fail counts for orphaned resource %s", rsc->id); /* There's no need to recurse into rsc->children because those * should just be unassigned clone instances. */ for (GList *iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; pe_action_t *clear_op = NULL; if (!node->details->online) { continue; } if (pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL) == 0) { continue; } clear_op = pe__clear_failcount(rsc, node, "it is orphaned", rsc->cluster); /* We can't use order_action_then_stop() here because its * pe_order_preserve breaks things */ pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pe_order_optional, rsc->cluster); } } /*! * \internal * \brief Schedule any resource actions needed * * \param[in,out] data_set Cluster working set */ static void schedule_resource_actions(pe_working_set_t *data_set) { // Process deferred action checks pe__foreach_param_check(data_set, check_params); pe__free_param_checks(data_set); if (pcmk_is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Scheduling probes"); pcmk__schedule_probes(data_set); } if (pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) { g_list_foreach(data_set->resources, clear_failcounts_if_orphaned, NULL); } crm_trace("Scheduling resource actions"); for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; rsc->cmds->create_actions(rsc); } } /*! * \internal * \brief Check whether a resource or any of its descendants are managed * * \param[in] rsc Resource to check * * \return true if resource or any descendant is managed, otherwise false */ static bool is_managed(const pe_resource_t *rsc) { if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { return true; } for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { if (is_managed((pe_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether any resources in the cluster are managed * * \param[in] data_set Cluster working set * * \return true if any resource is managed, otherwise false */ static bool any_managed_resources(const pe_working_set_t *data_set) { for (const GList *iter = data_set->resources; iter != NULL; iter = iter->next) { if (is_managed((const pe_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether a node requires fencing * * \param[in] node Node to check * \param[in] have_managed Whether any resource in cluster is managed * * \return true if \p node should be fenced, otherwise false */ static bool needs_fencing(const pe_node_t *node, bool have_managed) { return have_managed && node->details->unclean && pe_can_fence(node->details->data_set, node); } /*! * \internal * \brief Check whether a node requires shutdown * * \param[in] node Node to check * * \return true if \p node should be shut down, otherwise false */ static bool needs_shutdown(const pe_node_t *node) { if (pe__is_guest_or_remote_node(node)) { /* Do not send shutdown actions for Pacemaker Remote nodes. * @TODO We might come up with a good use for this in the future. */ return false; } return node->details->online && node->details->shutdown; } /*! * \internal * \brief Track and order non-DC fencing * * \param[in,out] list List of existing non-DC fencing actions * \param[in,out] action Fencing action to prepend to \p list * \param[in] data_set Cluster working set * * \return (Possibly new) head of \p list */ static GList * add_nondc_fencing(GList *list, pe_action_t *action, const pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing) && (list != NULL)) { /* Concurrent fencing is disabled, so order each non-DC * fencing in a chain. If there is any DC fencing or * shutdown, it will be ordered after the last action in the * chain later. */ order_actions((pe_action_t *) list->data, action, pe_order_optional); } return g_list_prepend(list, action); } /*! * \internal * \brief Schedule a node for fencing * * \param[in,out] node Node that requires fencing */ static pe_action_t * schedule_fencing(pe_node_t *node) { pe_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, node->details->data_set); pe_warn("Scheduling node %s for fencing", pe__node_name(node)); pcmk__order_vs_fence(fencing, node->details->data_set); return fencing; } /*! * \internal * \brief Create and order node fencing and shutdown actions * * \param[in,out] data_set Cluster working set */ static void schedule_fencing_and_shutdowns(pe_working_set_t *data_set) { pe_action_t *dc_down = NULL; bool integrity_lost = false; bool have_managed = any_managed_resources(data_set); GList *fencing_ops = NULL; GList *shutdown_ops = NULL; crm_trace("Scheduling fencing and shutdowns as needed"); if (!have_managed) { crm_notice("No fencing will be done until there are resources " "to manage"); } // Check each node for whether it needs fencing or shutdown for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; pe_action_t *fencing = NULL; /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (pe__is_guest_node(node)) { if (node->details->remote_requires_reset && have_managed && pe_can_fence(data_set, node)) { pcmk__fence_guest(node); } continue; } if (needs_fencing(node, have_managed)) { fencing = schedule_fencing(node); // Track DC and non-DC fence actions separately if (node->details->is_dc) { dc_down = fencing; } else { fencing_ops = add_nondc_fencing(fencing_ops, fencing, data_set); } } else if (needs_shutdown(node)) { pe_action_t *down_op = pcmk__new_shutdown_action(node); // Track DC and non-DC shutdown actions separately if (node->details->is_dc) { dc_down = down_op; } else { shutdown_ops = g_list_prepend(shutdown_ops, down_op); } } if ((fencing == NULL) && node->details->unclean) { integrity_lost = true; pe_warn("Node %s is unclean but cannot be fenced", pe__node_name(node)); } } if (integrity_lost) { if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { pe_warn("Resource functionality and data integrity cannot be " "guaranteed (configure, enable, and test fencing to " "correct this)"); } else if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { crm_notice("Unclean nodes will not be fenced until quorum is " "attained or no-quorum-policy is set to ignore"); } } if (dc_down != NULL) { /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated * DC elections. However, we don't want to order non-DC shutdowns before * a DC *fencing*, because even though we don't want a node that's * shutting down to become DC, the DC fencing could be ordered before a * clone stop that's also ordered before the shutdowns, thus leading to * a graph loop. */ if (pcmk__str_eq(dc_down->task, CRM_OP_SHUTDOWN, pcmk__str_none)) { pcmk__order_after_each(dc_down, shutdown_ops); } // Order any non-DC fencing before any DC fencing or shutdown if (pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)) { /* With concurrent fencing, order each non-DC fencing action * separately before any DC fencing or shutdown. */ pcmk__order_after_each(dc_down, fencing_ops); } else if (fencing_ops != NULL) { /* Without concurrent fencing, the non-DC fencing actions are * already ordered relative to each other, so we just need to order * the DC fencing after the last action in the chain (which is the * first item in the list). */ order_actions((pe_action_t *) fencing_ops->data, dc_down, pe_order_optional); } } g_list_free(fencing_ops); g_list_free(shutdown_ops); } static void log_resource_details(pe_working_set_t *data_set) { pcmk__output_t *out = data_set->priv; GList *all = NULL; /* Due to the `crm_mon --node=` feature, out->message() for all the * resource-related messages expects a list of nodes that we are allowed to * output information for. Here, we create a wildcard to match all nodes. */ all = g_list_prepend(all, (gpointer) "*"); for (GList *item = data_set->resources; item != NULL; item = item->next) { pe_resource_t *rsc = (pe_resource_t *) item->data; // Log all resources except inactive orphans if (!pcmk_is_set(rsc->flags, pe_rsc_orphan) || (rsc->role != RSC_ROLE_STOPPED)) { out->message(out, crm_map_element_name(rsc->xml), 0, rsc, all, all); } } g_list_free(all); } static void log_all_actions(pe_working_set_t *data_set) { /* This only ever outputs to the log, so ignore whatever output object was * previously set and just log instead. */ pcmk__output_t *prev_out = data_set->priv; pcmk__output_t *out = NULL; if (pcmk__log_output_new(&out) != pcmk_rc_ok) { return; } pe__register_messages(out); pcmk__register_lib_messages(out); pcmk__output_set_log_level(out, LOG_NOTICE); data_set->priv = out; out->begin_list(out, NULL, NULL, "Actions"); pcmk__output_actions(data_set); out->end_list(out); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); data_set->priv = prev_out; } /*! * \internal * \brief Log all required but unrunnable actions at trace level * * \param[in] data_set Cluster working set */ static void log_unrunnable_actions(const pe_working_set_t *data_set) { const uint64_t flags = pe_action_optional |pe_action_runnable |pe_action_pseudo; crm_trace("Required but unrunnable actions:"); for (const GList *iter = data_set->actions; iter != NULL; iter = iter->next) { const pe_action_t *action = (const pe_action_t *) iter->data; if (!pcmk_any_flags_set(action->flags, flags)) { pcmk__log_action("\t", action, true); } } } /*! * \internal * \brief Unpack the CIB for scheduling * * \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked) * \param[in] flags Working set flags to set in addition to defaults * \param[in,out] data_set Cluster working set */ static void unpack_cib(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) { const char* localhost_save = NULL; if (pcmk_is_set(data_set->flags, pe_flag_have_status)) { crm_trace("Reusing previously calculated cluster status"); pe__set_working_set_flags(data_set, flags); return; } if (data_set->localhost) { localhost_save = data_set->localhost; } CRM_ASSERT(cib != NULL); crm_trace("Calculating cluster status"); /* This will zero the entire struct without freeing anything first, so * callers should never call pcmk__schedule_actions() with a populated data * set unless pe_flag_have_status is set (i.e. cluster_status() was * previously called, whether directly or via pcmk__schedule_actions()). */ set_working_set_defaults(data_set); if (localhost_save) { data_set->localhost = localhost_save; } pe__set_working_set_flags(data_set, flags); data_set->input = cib; cluster_status(data_set); // Sets pe_flag_have_status } /*! * \internal * \brief Run the scheduler for a given CIB * * \param[in,out] cib CIB XML to use as scheduler input * \param[in] flags Working set flags to set in addition to defaults * \param[in,out] data_set Cluster working set */ void pcmk__schedule_actions(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) { unpack_cib(cib, flags, data_set); pcmk__set_assignment_methods(data_set); pcmk__apply_node_health(data_set); pcmk__unpack_constraints(data_set); if (pcmk_is_set(data_set->flags, pe_flag_check_config)) { return; } if (!pcmk_is_set(data_set->flags, pe_flag_quick_location) && pcmk__is_daemon) { log_resource_details(data_set); } apply_node_criteria(data_set); if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) { return; } pcmk__create_internal_constraints(data_set); pcmk__handle_rsc_config_changes(data_set); assign_resources(data_set); schedule_resource_actions(data_set); /* Remote ordering constraints need to happen prior to calculating fencing * because it is one more place we can mark nodes as needing fencing. */ pcmk__order_remote_connection_actions(data_set); schedule_fencing_and_shutdowns(data_set); pcmk__apply_orderings(data_set); log_all_actions(data_set); pcmk__create_graph(data_set); if (get_crm_log_level() == LOG_TRACE) { log_unrunnable_actions(data_set); } } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 8fb8a3c651..8d3ca93a83 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,938 +1,897 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "pe_status_private.h" extern bool pcmk__is_daemon; gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); /*! * \internal * \brief Check whether we can fence a particular node * * \param[in] data_set Working set for cluster * \param[in] node Name of node to check * * \return true if node can be fenced, false otherwise */ bool pe_can_fence(const pe_working_set_t *data_set, const pe_node_t *node) { if (pe__is_guest_node(node)) { /* Guest nodes are fenced by stopping their container resource. We can * do that if the container's host is either online or fenceable. */ pe_resource_t *rsc = node->details->remote_rsc->container; for (GList *n = rsc->running_on; n != NULL; n = n->next) { pe_node_t *container_node = n->data; if (!container_node->details->online && !pe_can_fence(data_set, container_node)) { return false; } } return true; } else if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { return false; /* Turned off */ } else if (!pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) { return false; /* No devices */ } else if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { return true; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return true; } else if(node == NULL) { return false; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", pe__node_name(node)); return true; } crm_trace("Cannot fence %s", pe__node_name(node)); return false; } /*! * \internal * \brief Copy a node object * * \param[in] this_node Node object to copy * * \return Newly allocated shallow copy of this_node * \note This function asserts on errors and is guaranteed to return non-NULL. */ pe_node_t * pe__copy_node(const pe_node_t *this_node) { pe_node_t *new_node = NULL; CRM_ASSERT(this_node != NULL); new_node = calloc(1, sizeof(pe_node_t)); CRM_ASSERT(new_node != NULL); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; // @COMPAT deprecated and unused new_node->details = this_node->details; return new_node; } -/* any node in list1 or list2 and not in the other gets a score of -INFINITY */ -void -node_list_exclude(GHashTable * hash, GList *list, gboolean merge_scores) -{ - GHashTable *result = hash; - pe_node_t *other_node = NULL; - GList *gIter = list; - - GHashTableIter iter; - pe_node_t *node = NULL; - - g_hash_table_iter_init(&iter, hash); - while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { - - other_node = pe_find_node_id(list, node->details->id); - if (other_node == NULL) { - node->weight = -INFINITY; - crm_trace("Banning dependent from %s (no primary instance)", - pe__node_name(node)); - } else if (merge_scores) { - node->weight = pcmk__add_scores(node->weight, other_node->weight); - crm_trace("Added primary's score %s to dependent's score for %s " - "(now %s)", pcmk_readable_score(other_node->weight), - pe__node_name(node), pcmk_readable_score(node->weight)); - } - } - - for (; gIter != NULL; gIter = gIter->next) { - pe_node_t *node = (pe_node_t *) gIter->data; - - other_node = pe_hash_table_lookup(result, node->details->id); - - if (other_node == NULL) { - pe_node_t *new_node = pe__copy_node(node); - - new_node->weight = -INFINITY; - g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); - } - } -} - /*! * \internal * \brief Create a node hash table from a node list * * \param[in] list Node list * * \return Hash table equivalent of node list */ GHashTable * pe__node_list2table(const GList *list) { GHashTable *result = NULL; result = pcmk__strkey_table(NULL, free); for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *new_node = pe__copy_node((const pe_node_t *) gIter->data); g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } return result; } /*! * \internal * \brief Compare two nodes by name, with numeric portions sorted numerically * * Sort two node names case-insensitively like strcasecmp(), but with any * numeric portions of the name sorted numerically. For example, "node10" will * sort higher than "node9" but lower than "remotenode9". * * \param[in] a First node to compare (can be \c NULL) * \param[in] b Second node to compare (can be \c NULL) * * \retval -1 \c a comes before \c b (or \c a is \c NULL and \c b is not) * \retval 0 \c a and \c b are equal (or both are \c NULL) * \retval 1 \c a comes after \c b (or \c b is \c NULL and \c a is not) */ gint pe__cmp_node_name(gconstpointer a, gconstpointer b) { const pe_node_t *node1 = (const pe_node_t *) a; const pe_node_t *node2 = (const pe_node_t *) b; if ((node1 == NULL) && (node2 == NULL)) { return 0; } if (node1 == NULL) { return -1; } if (node2 == NULL) { return 1; } return pcmk__numeric_strcasecmp(node1->details->uname, node2->details->uname); } /*! * \internal * \brief Output node weights to stdout * * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes * \param[in,out] data_set Cluster working set */ static void pe__output_node_weights(const pe_resource_t *rsc, const char *comment, GHashTable *nodes, pe_working_set_t *data_set) { pcmk__output_t *out = data_set->priv; // Sort the nodes so the output is consistent for regression tests GList *list = g_list_sort(g_hash_table_get_values(nodes), pe__cmp_node_name); for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) { const pe_node_t *node = (const pe_node_t *) gIter->data; out->message(out, "node-weight", rsc, comment, node->details->uname, pcmk_readable_score(node->weight)); } g_list_free(list); } /*! * \internal * \brief Log node weights at trace level * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] rsc If not NULL, include this resource's ID in logs * \param[in] comment Text description to prefix lines with * \param[in] nodes Nodes whose scores should be logged */ static void pe__log_node_weights(const char *file, const char *function, int line, const pe_resource_t *rsc, const char *comment, GHashTable *nodes) { GHashTableIter iter; pe_node_t *node = NULL; // Don't waste time if we're not tracing at this point pcmk__if_tracing({}, return); g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if (rsc) { qb_log_from_external_source(function, file, "%s: %s allocation score on %s: %s", LOG_TRACE, line, 0, comment, rsc->id, pe__node_name(node), pcmk_readable_score(node->weight)); } else { qb_log_from_external_source(function, file, "%s: %s = %s", LOG_TRACE, line, 0, comment, pe__node_name(node), pcmk_readable_score(node->weight)); } } } /*! * \internal * \brief Log or output node weights * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] to_log Log if true, otherwise output * \param[in] rsc If not NULL, use this resource's ID in logs, * and show scores recursively for any children * \param[in] comment Text description to prefix lines with * \param[in] nodes Nodes whose scores should be shown * \param[in,out] data_set Cluster working set */ void pe__show_node_scores_as(const char *file, const char *function, int line, bool to_log, const pe_resource_t *rsc, const char *comment, GHashTable *nodes, pe_working_set_t *data_set) { if (rsc != NULL && pcmk_is_set(rsc->flags, pe_rsc_orphan)) { // Don't show allocation scores for orphans return; } if (nodes == NULL) { // Nothing to show return; } if (to_log) { pe__log_node_weights(file, function, line, rsc, comment, nodes); } else { pe__output_node_weights(rsc, comment, nodes, data_set); } // If this resource has children, repeat recursively for each if (rsc && rsc->children) { for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; pe__show_node_scores_as(file, function, line, to_log, child, comment, child->allowed_nodes, data_set); } } } /*! * \internal * \brief Compare two resources by priority * * \param[in] a First resource to compare (can be \c NULL) * \param[in] b Second resource to compare (can be \c NULL) * * \retval -1 \c a->priority > \c b->priority (or \c b is \c NULL and \c a is * not) * \retval 0 \c a->priority == \c b->priority (or both \c a and \c b are * \c NULL) * \retval 1 \c a->priority < \c b->priority (or \c a is \c NULL and \c b is * not) */ gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } static void resource_node_score(pe_resource_t *rsc, const pe_node_t *node, int score, const char *tag) { pe_node_t *match = NULL; if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never)) && pcmk__str_eq(tag, "symmetric_default", pcmk__str_casei)) { /* This string comparision may be fragile, but exclusive resources and * exclusive nodes should not have the symmetric_default constraint * applied to them. */ return; } else if (rsc->children) { GList *gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } - match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); + match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = pe__copy_node(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = pcmk__add_scores(match->weight, score); pe_rsc_trace(rsc, "Enabling %s preference (%s) for %s on %s (now %s)", tag, pcmk_readable_score(score), rsc->id, pe__node_name(node), pcmk_readable_score(match->weight)); } void resource_location(pe_resource_t *rsc, const pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GList *gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node_iter = (pe_node_t *) gIter->data; resource_node_score(rsc, node_iter, score, tag); } } else { GHashTableIter iter; pe_node_t *node_iter = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) { resource_node_score(rsc, node_iter, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, pe__node_name(rsc->allocated_to)); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } gboolean get_target_role(const pe_resource_t *rsc, enum rsc_role_e *role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (pcmk__str_eq(value, "started", pcmk__str_null_matches | pcmk__str_casei) || pcmk__str_eq("default", value, pcmk__str_casei)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' is not valid", rsc->id, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags, pe_rsc_promotable)) { if (local_role > RSC_ROLE_UNPROMOTED) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' only makes sense for promotable " "clones", rsc->id, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order) { GList *gIter = NULL; pe_action_wrapper_t *wrapper = NULL; GList *list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Creating action wrappers for ordering: %s then %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... it's happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *after = (pe_action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } void destroy_ticket(gpointer data) { pe_ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } pe_ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { pe_ticket_t *ticket = NULL; if (pcmk__str_empty(ticket_id)) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = pcmk__strkey_table(free, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(pe_ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = pcmk__strkey_table(free, free); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } const char * rsc_printable_id(const pe_resource_t *rsc) { return pcmk_is_set(rsc->flags, pe_rsc_unique)? rsc->id : ID(rsc->xml); } void pe__clear_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags) { pe__clear_resource_flags(rsc, flags); for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe__clear_resource_flags_recursive((pe_resource_t *) gIter->data, flags); } } void pe__clear_resource_flags_on_all(pe_working_set_t *data_set, uint64_t flag) { for (GList *lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *r = (pe_resource_t *) lpc->data; pe__clear_resource_flags_recursive(r, flag); } } void pe__set_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags) { pe__set_resource_flags(rsc, flags); for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe__set_resource_flags_recursive((pe_resource_t *) gIter->data, flags); } } void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_fence_device)) { /* Wasn't a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { pe_action_t *unfence = pe_fence_op(node, "on", FALSE, reason, FALSE, data_set); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { pe_tag_t *tag = NULL; GList *gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(pe_tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (pcmk__str_eq(existing_ref, obj_ref, pcmk__str_none)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; } /*! * \internal * \brief Check whether shutdown has been requested for a node * * \param[in] node Node to check * * \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise * \note This differs from simply using node->details->shutdown in that it can * be used before that has been determined (and in fact to determine it), * and it can also be used to distinguish requested shutdown from implicit * shutdown of remote nodes by virtue of their connection stopping. */ bool pe__shutdown_requested(const pe_node_t *node) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); return !pcmk__str_eq(shutdown, "0", pcmk__str_null_matches); } /*! * \internal * \brief Update a data set's "recheck by" time * * \param[in] recheck Epoch time when recheck should happen * \param[in,out] data_set Current working set */ void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set) { if ((recheck > get_effective_time(data_set)) && ((data_set->recheck_by == 0) || (data_set->recheck_by > recheck))) { data_set->recheck_by = recheck; } } /*! * \internal * \brief Extract nvpair blocks contained by a CIB XML element into a hash table * * \param[in] xml_obj XML element containing blocks of nvpair elements * \param[in] set_name If not NULL, only use blocks of this element * \param[in] rule_data Matching parameters to use when unpacking * \param[out] hash Where to store extracted name/value pairs * \param[in] always_first If not NULL, process block with this ID first * \param[in] overwrite Whether to replace existing values with same name * \param[in,out] data_set Cluster working set containing \p xml_obj */ void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set) { crm_time_t *next_change = crm_time_new_undefined(); pe_eval_nvpairs(data_set->input, xml_obj, set_name, rule_data, hash, always_first, overwrite, next_change); if (crm_time_is_defined(next_change)) { time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change); pe__update_recheck_time(recheck, data_set); } crm_time_free(next_change); } bool pe__resource_is_disabled(const pe_resource_t *rsc) { const char *target_role = NULL; CRM_CHECK(rsc != NULL, return false); target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if (target_role) { enum rsc_role_e target_role_e = text2role(target_role); if ((target_role_e == RSC_ROLE_STOPPED) || ((target_role_e == RSC_ROLE_UNPROMOTED) && pcmk_is_set(pe__const_top_resource(rsc, false)->flags, pe_rsc_promotable))) { return true; } } return false; } /*! * \internal * \brief Check whether a resource is running only on given node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p rsc is running only on \p node, otherwise false */ bool pe__rsc_running_on_only(const pe_resource_t *rsc, const pe_node_t *node) { return (rsc != NULL) && pcmk__list_of_1(rsc->running_on) && pe__same_node((const pe_node_t *) rsc->running_on->data, node); } bool pe__rsc_running_on_any(pe_resource_t *rsc, GList *node_list) { for (GList *ele = rsc->running_on; ele; ele = ele->next) { pe_node_t *node = (pe_node_t *) ele->data; if (pcmk__str_in_list(node->details->uname, node_list, pcmk__str_star_matches|pcmk__str_casei)) { return true; } } return false; } bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GList *only_node) { return (rsc->fns->active(rsc, FALSE) && !pe__rsc_running_on_any(rsc, only_node)); } GList * pe__filter_rsc_list(GList *rscs, GList *filter) { GList *retval = NULL; for (GList *gIter = rscs; gIter; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* I think the second condition is safe here for all callers of this * function. If not, it needs to move into pe__node_text. */ if (pcmk__str_in_list(rsc_printable_id(rsc), filter, pcmk__str_star_matches) || (rsc->parent && pcmk__str_in_list(rsc_printable_id(rsc->parent), filter, pcmk__str_star_matches))) { retval = g_list_prepend(retval, rsc); } } return retval; } GList * pe__build_node_name_list(pe_working_set_t *data_set, const char *s) { GList *nodes = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { /* Nothing was given so return a list of all node names. Or, '*' was * given. This would normally fall into the pe__unames_with_tag branch * where it will return an empty list. Catch it here instead. */ nodes = g_list_prepend(nodes, strdup("*")); } else { pe_node_t *node = pe_find_node(data_set->nodes, s); if (node) { /* The given string was a valid uname for a node. Return a * singleton list containing just that uname. */ nodes = g_list_prepend(nodes, strdup(s)); } else { /* The given string was not a valid uname. It's either a tag or * it's a typo or something. In the first case, we'll return a * list of all the unames of the nodes with the given tag. In the * second case, we'll return a NULL pointer and nothing will * get displayed. */ nodes = pe__unames_with_tag(data_set, s); } } return nodes; } GList * pe__build_rsc_list(pe_working_set_t *data_set, const char *s) { GList *resources = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { resources = g_list_prepend(resources, strdup("*")); } else { pe_resource_t *rsc = pe_find_resource_with_flags(data_set->resources, s, pe_find_renamed|pe_find_any); if (rsc) { /* A colon in the name we were given means we're being asked to filter * on a specific instance of a cloned resource. Put that exact string * into the filter list. Otherwise, use the printable ID of whatever * resource was found that matches what was asked for. */ if (strstr(s, ":") != NULL) { resources = g_list_prepend(resources, strdup(rsc->id)); } else { resources = g_list_prepend(resources, strdup(rsc_printable_id(rsc))); } } else { /* The given string was not a valid resource name. It's a tag or a * typo or something. See pe__build_node_name_list() for more * detail. */ resources = pe__rscs_with_tag(data_set, s); } } return resources; } xmlNode * pe__failed_probe_for_rsc(const pe_resource_t *rsc, const char *name) { const pe_resource_t *parent = pe__const_top_resource(rsc, false); const char *rsc_id = rsc->id; if (parent->variant == pe_clone) { rsc_id = pe__clone_child_id(parent); } for (xmlNode *xml_op = pcmk__xml_first_child(rsc->cluster->failed); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { const char *value = NULL; char *op_id = NULL; /* This resource operation is not a failed probe. */ if (!pcmk_xe_mask_probe_failure(xml_op)) { continue; } /* This resource operation was not run on the given node. Note that if name is * NULL, this will always succeed. */ value = crm_element_value(xml_op, XML_LRM_ATTR_TARGET); if (value == NULL || !pcmk__str_eq(value, name, pcmk__str_casei|pcmk__str_null_matches)) { continue; } if (!parse_op_key(pe__xe_history_key(xml_op), &op_id, NULL, NULL)) { continue; // This history entry is missing an operation key } /* This resource operation's ID does not match the rsc_id we are looking for. */ if (!pcmk__str_eq(op_id, rsc_id, pcmk__str_none)) { free(op_id); continue; } free(op_id); return xml_op; } return NULL; }