diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
index 3fb04bdf22..53de81fb42 100644
--- a/cts/cli/regression.crm_mon.exp
+++ b/cts/cli/regression.crm_mon.exp
@@ -1,5071 +1,5071 @@
=#=#=#= Begin test: Basic output =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 cluster02 ]
* Fencing (stonith:fence_xvm): Started cluster01
* dummy (ocf:pacemaker:Dummy): Started cluster02
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
* Email (lsb:exim): Started cluster02
* Clone Set: mysql-clone-group [mysql-group]:
* Started: [ cluster01 cluster02 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* Promoted: [ cluster02 ]
* Unpromoted: [ cluster01 ]
=#=#=#= End test: Basic output - OK (0) =#=#=#=
* Passed: crm_mon - Basic output
=#=#=#= Begin test: Basic output (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
+
-
+
-
-
+
+
-
-
-
-
+
+
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Basic output (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Basic output (XML)
=#=#=#= Begin test: Output without node section =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Active Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 cluster02 ]
* Fencing (stonith:fence_xvm): Started cluster01
* dummy (ocf:pacemaker:Dummy): Started cluster02
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
* Email (lsb:exim): Started cluster02
* Clone Set: mysql-clone-group [mysql-group]:
* Started: [ cluster01 cluster02 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* Promoted: [ cluster02 ]
* Unpromoted: [ cluster01 ]
=#=#=#= End test: Output without node section - OK (0) =#=#=#=
* Passed: crm_mon - Output without node section
=#=#=#= Begin test: Output without node section (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
+
-
+
-
-
+
+
-
-
-
-
+
+
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output without node section (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output without node section (XML)
=#=#=#= Begin test: Output with only the node section =#=#=#=
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
=#=#=#= End test: Output with only the node section - OK (0) =#=#=#=
* Passed: crm_mon - Output with only the node section
=#=#=#= Begin test: Complete text output =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 cluster02 ]
* Fencing (stonith:fence_xvm): Started cluster01
* dummy (ocf:pacemaker:Dummy): Started cluster02
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
* Email (lsb:exim): Started cluster02
* Clone Set: mysql-clone-group [mysql-group]:
* Started: [ cluster01 cluster02 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* Promoted: [ cluster02 ]
* Unpromoted: [ cluster01 ]
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster02:
* ping: migration-threshold=1000000:
* (11) start
* (12) monitor: interval="10000ms"
* dummy: migration-threshold=1000000:
* (18) start
* (19) monitor: interval="60000ms"
* Public-IP: migration-threshold=1000000:
* (2) start
* Email: migration-threshold=1000000:
* (2) start
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (4) monitor: interval="10000ms"
* (5) cancel: interval="10000ms"
* (6) promote
* (7) monitor: interval="5000ms"
* httpd-bundle-ip-192.168.122.132: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: cluster01:
* ping: migration-threshold=1000000:
* (17) start
* (18) monitor: interval="10000ms"
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
* dummy: migration-threshold=1000000:
* (16) stop
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (2) start
* (4) monitor: interval="10000ms"
* httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: httpd-bundle-0:
* httpd: migration-threshold=1000000:
* (1) start
* Node: httpd-bundle-1:
* httpd: migration-threshold=1000000:
* (1) start
Negative Location Constraints:
* not-on-cluster1 prevents dummy from running on cluster01
=#=#=#= End test: Complete text output - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output
=#=#=#= Begin test: Complete text output with detail =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster01: online
* GuestNode httpd-bundle-1@cluster02: online
* GuestNode httpd-bundle-2@: OFFLINE
Active Resources:
* Clone Set: ping-clone [ping]:
* ping (ocf:pacemaker:ping): Started cluster02
* ping (ocf:pacemaker:ping): Started cluster01
* Fencing (stonith:fence_xvm): Started cluster01
* dummy (ocf:pacemaker:Dummy): Started cluster02
* Container bundle set: httpd-bundle [pcmk:http]:
* Replica[0]
* httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster01
* httpd (ocf:heartbeat:apache): Started httpd-bundle-0
* httpd-bundle-docker-0 (ocf:heartbeat:docker): Started cluster01
* httpd-bundle-0 (ocf:pacemaker:remote): Started cluster01
* Replica[1]
* httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started cluster02
* httpd (ocf:heartbeat:apache): Started httpd-bundle-1
* httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster02
* httpd-bundle-1 (ocf:pacemaker:remote): Started cluster02
* Replica[2]
* httpd-bundle-ip-192.168.122.133 (ocf:heartbeat:IPaddr2): Stopped
* httpd (ocf:heartbeat:apache): Stopped
* httpd-bundle-docker-2 (ocf:heartbeat:docker): Stopped
* httpd-bundle-2 (ocf:pacemaker:remote): Stopped
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
* Email (lsb:exim): Started cluster02
* Clone Set: mysql-clone-group [mysql-group]:
* Resource Group: mysql-group:0:
* mysql-proxy (lsb:mysql-proxy): Started cluster02
* Resource Group: mysql-group:1:
* mysql-proxy (lsb:mysql-proxy): Started cluster01
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* promotable-rsc (ocf:pacemaker:Stateful): Promoted cluster02 (test_description)
* promotable-rsc (ocf:pacemaker:Stateful): Unpromoted cluster01 (test_description)
* promotable-rsc (ocf:pacemaker:Stateful): Stopped (test_description)
* promotable-rsc (ocf:pacemaker:Stateful): Stopped (test_description)
* promotable-rsc (ocf:pacemaker:Stateful): Stopped (test_description)
Node Attributes:
* Node: cluster01 (1):
* location : office
* pingd : 1000
* Node: cluster02 (2):
* pingd : 1000
Operations:
* Node: cluster02 (2):
* ping: migration-threshold=1000000:
* (11) start
* (12) monitor: interval="10000ms"
* dummy: migration-threshold=1000000:
* (18) start
* (19) monitor: interval="60000ms"
* Public-IP: migration-threshold=1000000:
* (2) start
* Email: migration-threshold=1000000:
* (2) start
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (4) monitor: interval="10000ms"
* (5) cancel: interval="10000ms"
* (6) promote
* (7) monitor: interval="5000ms"
* httpd-bundle-ip-192.168.122.132: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: cluster01 (1):
* ping: migration-threshold=1000000:
* (17) start
* (18) monitor: interval="10000ms"
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
* dummy: migration-threshold=1000000:
* (16) stop
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (2) start
* (4) monitor: interval="10000ms"
* httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: httpd-bundle-0@cluster01:
* httpd: migration-threshold=1000000:
* (1) start
* Node: httpd-bundle-1@cluster02:
* httpd: migration-threshold=1000000:
* (1) start
Negative Location Constraints:
* not-on-cluster1 prevents dummy from running on cluster01 (1)
=#=#=#= End test: Complete text output with detail - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output with detail
=#=#=#= Begin test: Complete brief text output =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* 1 (ocf:pacemaker:Dummy): Active cluster02
* 1 (stonith:fence_xvm): Active cluster01
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 cluster02 ]
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Resource Group: exim-group:
* 1/1 (lsb:exim): Active cluster02
* 1/1 (ocf:heartbeat:IPaddr): Active cluster02
* Clone Set: mysql-clone-group [mysql-group]:
* Started: [ cluster01 cluster02 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* Promoted: [ cluster02 ]
* Unpromoted: [ cluster01 ]
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster02:
* ping: migration-threshold=1000000:
* (11) start
* (12) monitor: interval="10000ms"
* dummy: migration-threshold=1000000:
* (18) start
* (19) monitor: interval="60000ms"
* Public-IP: migration-threshold=1000000:
* (2) start
* Email: migration-threshold=1000000:
* (2) start
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (4) monitor: interval="10000ms"
* (5) cancel: interval="10000ms"
* (6) promote
* (7) monitor: interval="5000ms"
* httpd-bundle-ip-192.168.122.132: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: cluster01:
* ping: migration-threshold=1000000:
* (17) start
* (18) monitor: interval="10000ms"
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
* dummy: migration-threshold=1000000:
* (16) stop
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (2) start
* (4) monitor: interval="10000ms"
* httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: httpd-bundle-0:
* httpd: migration-threshold=1000000:
* (1) start
* Node: httpd-bundle-1:
* httpd: migration-threshold=1000000:
* (1) start
Negative Location Constraints:
* not-on-cluster1 prevents dummy from running on cluster01
=#=#=#= End test: Complete brief text output - OK (0) =#=#=#=
* Passed: crm_mon - Complete brief text output
=#=#=#= Begin test: Complete text output grouped by node =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster01: online:
* Resources:
* ping (ocf:pacemaker:ping): Started
* Fencing (stonith:fence_xvm): Started
* mysql-proxy (lsb:mysql-proxy): Started
* promotable-rsc (ocf:pacemaker:Stateful): Unpromoted (test_description)
* httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started
* httpd-bundle-docker-0 (ocf:heartbeat:docker): Started
* Node cluster02: online:
* Resources:
* ping (ocf:pacemaker:ping): Started
* dummy (ocf:pacemaker:Dummy): Started
* Public-IP (ocf:heartbeat:IPaddr): Started
* Email (lsb:exim): Started
* mysql-proxy (lsb:mysql-proxy): Started
* promotable-rsc (ocf:pacemaker:Stateful): Promoted (test_description)
* httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started
* httpd-bundle-docker-1 (ocf:heartbeat:docker): Started
* GuestNode httpd-bundle-0: online:
* Resources:
* httpd (ocf:heartbeat:apache): Started
* GuestNode httpd-bundle-1: online:
* Resources:
* httpd (ocf:heartbeat:apache): Started
* GuestNode httpd-bundle-2: OFFLINE:
* Resources:
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster02:
* ping: migration-threshold=1000000:
* (11) start
* (12) monitor: interval="10000ms"
* dummy: migration-threshold=1000000:
* (18) start
* (19) monitor: interval="60000ms"
* Public-IP: migration-threshold=1000000:
* (2) start
* Email: migration-threshold=1000000:
* (2) start
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (4) monitor: interval="10000ms"
* (5) cancel: interval="10000ms"
* (6) promote
* (7) monitor: interval="5000ms"
* httpd-bundle-ip-192.168.122.132: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: cluster01:
* ping: migration-threshold=1000000:
* (17) start
* (18) monitor: interval="10000ms"
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
* dummy: migration-threshold=1000000:
* (16) stop
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (2) start
* (4) monitor: interval="10000ms"
* httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: httpd-bundle-0:
* httpd: migration-threshold=1000000:
* (1) start
* Node: httpd-bundle-1:
* httpd: migration-threshold=1000000:
* (1) start
Negative Location Constraints:
* not-on-cluster1 prevents dummy from running on cluster01
=#=#=#= End test: Complete text output grouped by node - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output grouped by node
=#=#=#= Begin test: Complete brief text output grouped by node =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster01: online:
* Resources:
* 1 (lsb:mysql-proxy): Active
* 1 (ocf:heartbeat:IPaddr2): Active
* 1 (ocf:heartbeat:docker): Active
* 1 (ocf:pacemaker:Stateful): Active
* 1 (ocf:pacemaker:ping): Active
* 1 (ocf:pacemaker:remote): Active
* 1 (stonith:fence_xvm): Active
* Node cluster02: online:
* Resources:
* 1 (lsb:exim): Active
* 1 (lsb:mysql-proxy): Active
* 1 (ocf:heartbeat:IPaddr): Active
* 1 (ocf:heartbeat:IPaddr2): Active
* 1 (ocf:heartbeat:docker): Active
* 1 (ocf:pacemaker:Dummy): Active
* 1 (ocf:pacemaker:Stateful): Active
* 1 (ocf:pacemaker:ping): Active
* 1 (ocf:pacemaker:remote): Active
* GuestNode httpd-bundle-0: online:
* Resources:
* 1 (ocf:heartbeat:apache): Active
* GuestNode httpd-bundle-1: online:
* Resources:
* 1 (ocf:heartbeat:apache): Active
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster02:
* ping: migration-threshold=1000000:
* (11) start
* (12) monitor: interval="10000ms"
* dummy: migration-threshold=1000000:
* (18) start
* (19) monitor: interval="60000ms"
* Public-IP: migration-threshold=1000000:
* (2) start
* Email: migration-threshold=1000000:
* (2) start
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (4) monitor: interval="10000ms"
* (5) cancel: interval="10000ms"
* (6) promote
* (7) monitor: interval="5000ms"
* httpd-bundle-ip-192.168.122.132: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: cluster01:
* ping: migration-threshold=1000000:
* (17) start
* (18) monitor: interval="10000ms"
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
* dummy: migration-threshold=1000000:
* (16) stop
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (2) start
* (4) monitor: interval="10000ms"
* httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: httpd-bundle-0:
* httpd: migration-threshold=1000000:
* (1) start
* Node: httpd-bundle-1:
* httpd: migration-threshold=1000000:
* (1) start
Negative Location Constraints:
* not-on-cluster1 prevents dummy from running on cluster01
=#=#=#= End test: Complete brief text output grouped by node - OK (0) =#=#=#=
* Passed: crm_mon - Complete brief text output grouped by node
=#=#=#= Begin test: Output grouped by node (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
+
-
+
-
-
+
+
-
-
-
-
+
+
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output grouped by node (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output grouped by node (XML)
=#=#=#= Begin test: Complete output filtered by node =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 ]
Active Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 ]
* Fencing (stonith:fence_xvm): Started cluster01
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Clone Set: mysql-clone-group [mysql-group]:
* Started: [ cluster01 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* Unpromoted: [ cluster01 ]
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
Operations:
* Node: cluster01:
* ping: migration-threshold=1000000:
* (17) start
* (18) monitor: interval="10000ms"
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
* dummy: migration-threshold=1000000:
* (16) stop
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (2) start
* (4) monitor: interval="10000ms"
* httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
Negative Location Constraints:
* not-on-cluster1 prevents dummy from running on cluster01
=#=#=#= End test: Complete output filtered by node - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by node
=#=#=#= Begin test: Complete output filtered by node (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
=#=#=#= End test: Complete output filtered by node (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by node (XML)
=#=#=#= Begin test: Complete output filtered by tag =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster02 ]
Active Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster02 ]
* dummy (ocf:pacemaker:Dummy): Started cluster02
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
* Email (lsb:exim): Started cluster02
* Clone Set: mysql-clone-group [mysql-group]:
* Started: [ cluster02 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* Promoted: [ cluster02 ]
Node Attributes:
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster02:
* ping: migration-threshold=1000000:
* (11) start
* (12) monitor: interval="10000ms"
* dummy: migration-threshold=1000000:
* (18) start
* (19) monitor: interval="60000ms"
* Public-IP: migration-threshold=1000000:
* (2) start
* Email: migration-threshold=1000000:
* (2) start
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* promotable-rsc: migration-threshold=1000000:
* (4) monitor: interval="10000ms"
* (5) cancel: interval="10000ms"
* (6) promote
* (7) monitor: interval="5000ms"
* httpd-bundle-ip-192.168.122.132: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
Negative Location Constraints:
* not-on-cluster1 prevents dummy from running on cluster01
=#=#=#= End test: Complete output filtered by tag - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by tag
=#=#=#= Begin test: Complete output filtered by tag (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
+
-
+
-
-
+
+
-
-
-
-
+
+
+
+
-
-
+
+
-
-
+
+
-
-
+
+
=#=#=#= End test: Complete output filtered by tag (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by tag (XML)
=#=#=#= Begin test: Complete output filtered by resource tag =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Fencing (stonith:fence_xvm): Started cluster01
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster01:
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
=#=#=#= End test: Complete output filtered by resource tag - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by resource tag
=#=#=#= Begin test: Complete output filtered by resource tag (XML) =#=#=#=
-
-
+
+
=#=#=#= End test: Complete output filtered by resource tag (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by resource tag (XML)
=#=#=#= Begin test: Output filtered by node that doesn't exist =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Active Resources:
* No active resources
=#=#=#= End test: Output filtered by node that doesn't exist - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by node that doesn't exist
=#=#=#= Begin test: Output filtered by node that doesn't exist (XML) =#=#=#=
=#=#=#= End test: Output filtered by node that doesn't exist (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by node that doesn't exist (XML)
=#=#=#= Begin test: Basic text output with inactive resources =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Full List of Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 cluster02 ]
* Fencing (stonith:fence_xvm): Started cluster01
* dummy (ocf:pacemaker:Dummy): Started cluster02
* Clone Set: inactive-clone [inactive-dhcpd] (disabled):
* Stopped (disabled): [ cluster01 cluster02 ]
* Resource Group: inactive-group (disabled):
* inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled)
* inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled)
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
* Email (lsb:exim): Started cluster02
* Clone Set: mysql-clone-group [mysql-group]:
* Started: [ cluster01 cluster02 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* Promoted: [ cluster02 ]
* Unpromoted: [ cluster01 ]
=#=#=#= End test: Basic text output with inactive resources - OK (0) =#=#=#=
* Passed: crm_mon - Basic text output with inactive resources
=#=#=#= Begin test: Basic text output with inactive resources, filtered by node =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster02 ]
Full List of Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster02 ]
* dummy (ocf:pacemaker:Dummy): Started cluster02
* Clone Set: inactive-clone [inactive-dhcpd] (disabled):
* Stopped (disabled): [ cluster02 ]
* Resource Group: inactive-group (disabled):
* inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled)
* inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled)
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
* Email (lsb:exim): Started cluster02
* Clone Set: mysql-clone-group [mysql-group]:
* Started: [ cluster02 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* Promoted: [ cluster02 ]
=#=#=#= End test: Basic text output with inactive resources, filtered by node - OK (0) =#=#=#=
* Passed: crm_mon - Basic text output with inactive resources, filtered by node
=#=#=#= Begin test: Complete output filtered by primitive resource =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Fencing (stonith:fence_xvm): Started cluster01
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster01:
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
=#=#=#= End test: Complete output filtered by primitive resource - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by primitive resource
=#=#=#= Begin test: Complete output filtered by primitive resource (XML) =#=#=#=
-
-
+
+
=#=#=#= End test: Complete output filtered by primitive resource (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by primitive resource (XML)
=#=#=#= Begin test: Complete output filtered by group resource =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
* Email (lsb:exim): Started cluster02
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster02:
* Public-IP: migration-threshold=1000000:
* (2) start
* Email: migration-threshold=1000000:
* (2) start
=#=#=#= End test: Complete output filtered by group resource - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by group resource
=#=#=#= Begin test: Complete output filtered by group resource (XML) =#=#=#=
-
+
-
+
=#=#=#= End test: Complete output filtered by group resource (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by group resource (XML)
=#=#=#= Begin test: Complete text output filtered by group resource member =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster02:
* Public-IP: migration-threshold=1000000:
* (2) start
=#=#=#= End test: Complete text output filtered by group resource member - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output filtered by group resource member
=#=#=#= Begin test: Output filtered by group resource member (XML) =#=#=#=
-
+
=#=#=#= End test: Output filtered by group resource member (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by group resource member (XML)
=#=#=#= Begin test: Complete output filtered by clone resource =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 cluster02 ]
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster02:
* ping: migration-threshold=1000000:
* (11) start
* (12) monitor: interval="10000ms"
* Node: cluster01:
* ping: migration-threshold=1000000:
* (17) start
* (18) monitor: interval="10000ms"
=#=#=#= End test: Complete output filtered by clone resource - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by clone resource
=#=#=#= Begin test: Complete output filtered by clone resource (XML) =#=#=#=
-
-
+
+
-
-
+
+
=#=#=#= End test: Complete output filtered by clone resource (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by clone resource (XML)
=#=#=#= Begin test: Complete output filtered by clone resource instance =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 cluster02 ]
Node Attributes:
* Node: cluster01:
* location : office
* pingd : 1000
* Node: cluster02:
* pingd : 1000
Operations:
* Node: cluster02:
* ping: migration-threshold=1000000:
* (11) start
* (12) monitor: interval="10000ms"
* Node: cluster01:
* ping: migration-threshold=1000000:
* (17) start
* (18) monitor: interval="10000ms"
=#=#=#= End test: Complete output filtered by clone resource instance - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by clone resource instance
=#=#=#= Begin test: Complete output filtered by clone resource instance (XML) =#=#=#=
-
-
+
+
-
-
+
+
=#=#=#= End test: Complete output filtered by clone resource instance (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Complete output filtered by clone resource instance (XML)
=#=#=#= Begin test: Complete text output filtered by exact clone resource instance =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster01: online
* GuestNode httpd-bundle-1@cluster02: online
* GuestNode httpd-bundle-2@: OFFLINE
Active Resources:
* Clone Set: ping-clone [ping]:
* ping (ocf:pacemaker:ping): Started cluster02
Node Attributes:
* Node: cluster01 (1):
* location : office
* pingd : 1000
* Node: cluster02 (2):
* pingd : 1000
Operations:
* Node: cluster02 (2):
* ping: migration-threshold=1000000:
* (11) start
* (12) monitor: interval="10000ms"
* Node: cluster01 (1):
* ping: migration-threshold=1000000:
* (17) start
* (18) monitor: interval="10000ms"
=#=#=#= End test: Complete text output filtered by exact clone resource instance - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output filtered by exact clone resource instance
=#=#=#= Begin test: Output filtered by exact clone resource instance (XML) =#=#=#=
-
-
+
+
-
-
+
+
=#=#=#= End test: Output filtered by exact clone resource instance (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by exact clone resource instance (XML)
=#=#=#= Begin test: Output filtered by resource that doesn't exist =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* No active resources
=#=#=#= End test: Output filtered by resource that doesn't exist - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by resource that doesn't exist
=#=#=#= Begin test: Output filtered by resource that doesn't exist (XML) =#=#=#=
=#=#=#= End test: Output filtered by resource that doesn't exist (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by resource that doesn't exist (XML)
=#=#=#= Begin test: Basic text output with inactive resources, filtered by tag =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Full List of Resources:
* Clone Set: inactive-clone [inactive-dhcpd] (disabled):
* Stopped (disabled): [ cluster01 cluster02 ]
* Resource Group: inactive-group (disabled):
* inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled)
* inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled)
=#=#=#= End test: Basic text output with inactive resources, filtered by tag - OK (0) =#=#=#=
* Passed: crm_mon - Basic text output with inactive resources, filtered by tag
=#=#=#= Begin test: Basic text output with inactive resources, filtered by bundle resource =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Full List of Resources:
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
=#=#=#= End test: Basic text output with inactive resources, filtered by bundle resource - OK (0) =#=#=#=
* Passed: crm_mon - Basic text output with inactive resources, filtered by bundle resource
=#=#=#= Begin test: Output filtered by inactive bundle resource (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output filtered by inactive bundle resource (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by inactive bundle resource (XML)
=#=#=#= Begin test: Basic text output with inactive resources, filtered by bundled IP address resource =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Full List of Resources:
* Container bundle set: httpd-bundle [pcmk:http]:
* Replica[0]
* httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster01
=#=#=#= End test: Basic text output with inactive resources, filtered by bundled IP address resource - OK (0) =#=#=#=
* Passed: crm_mon - Basic text output with inactive resources, filtered by bundled IP address resource
=#=#=#= Begin test: Output filtered by bundled IP address resource (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output filtered by bundled IP address resource (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by bundled IP address resource (XML)
=#=#=#= Begin test: Basic text output with inactive resources, filtered by bundled container =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Full List of Resources:
* Container bundle set: httpd-bundle [pcmk:http]:
* Replica[1]
* httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster02
=#=#=#= End test: Basic text output with inactive resources, filtered by bundled container - OK (0) =#=#=#=
* Passed: crm_mon - Basic text output with inactive resources, filtered by bundled container
=#=#=#= Begin test: Output filtered by bundled container (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output filtered by bundled container (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by bundled container (XML)
=#=#=#= Begin test: Basic text output with inactive resources, filtered by bundle connection =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Full List of Resources:
* Container bundle set: httpd-bundle [pcmk:http]:
* Replica[0]
* httpd-bundle-0 (ocf:pacemaker:remote): Started cluster01
=#=#=#= End test: Basic text output with inactive resources, filtered by bundle connection - OK (0) =#=#=#=
* Passed: crm_mon - Basic text output with inactive resources, filtered by bundle connection
=#=#=#= Begin test: Output filtered by bundle connection (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output filtered by bundle connection (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by bundle connection (XML)
=#=#=#= Begin test: Basic text output with inactive resources, filtered by bundled primitive resource =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Full List of Resources:
* Container bundle set: httpd-bundle [pcmk:http]:
* Replica[0]
* httpd (ocf:heartbeat:apache): Started httpd-bundle-0
* Replica[1]
* httpd (ocf:heartbeat:apache): Started httpd-bundle-1
* Replica[2]
* httpd (ocf:heartbeat:apache): Stopped
=#=#=#= End test: Basic text output with inactive resources, filtered by bundled primitive resource - OK (0) =#=#=#=
* Passed: crm_mon - Basic text output with inactive resources, filtered by bundled primitive resource
=#=#=#= Begin test: Output filtered by bundled primitive resource (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output filtered by bundled primitive resource (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output filtered by bundled primitive resource (XML)
=#=#=#= Begin test: Complete text output, filtered by clone name in cloned group =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster01: online
* GuestNode httpd-bundle-1@cluster02: online
* GuestNode httpd-bundle-2@: OFFLINE
Active Resources:
* Clone Set: mysql-clone-group [mysql-group]:
* Resource Group: mysql-group:0:
* mysql-proxy (lsb:mysql-proxy): Started cluster02
* Resource Group: mysql-group:1:
* mysql-proxy (lsb:mysql-proxy): Started cluster01
Node Attributes:
* Node: cluster01 (1):
* location : office
* pingd : 1000
* Node: cluster02 (2):
* pingd : 1000
Operations:
* Node: cluster02 (2):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* Node: cluster01 (1):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
=#=#=#= End test: Complete text output, filtered by clone name in cloned group - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output, filtered by clone name in cloned group
=#=#=#= Begin test: Output, filtered by clone name in cloned group (XML) =#=#=#=
-
-
+
+
-
-
+
+
=#=#=#= End test: Output, filtered by clone name in cloned group (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output, filtered by clone name in cloned group (XML)
=#=#=#= Begin test: Complete text output, filtered by group name in cloned group =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster01: online
* GuestNode httpd-bundle-1@cluster02: online
* GuestNode httpd-bundle-2@: OFFLINE
Active Resources:
* Clone Set: mysql-clone-group [mysql-group]:
* Resource Group: mysql-group:0:
* mysql-proxy (lsb:mysql-proxy): Started cluster02
* Resource Group: mysql-group:1:
* mysql-proxy (lsb:mysql-proxy): Started cluster01
Node Attributes:
* Node: cluster01 (1):
* location : office
* pingd : 1000
* Node: cluster02 (2):
* pingd : 1000
Operations:
* Node: cluster02 (2):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* Node: cluster01 (1):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
=#=#=#= End test: Complete text output, filtered by group name in cloned group - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output, filtered by group name in cloned group
=#=#=#= Begin test: Output, filtered by group name in cloned group (XML) =#=#=#=
-
-
+
+
-
-
+
+
=#=#=#= End test: Output, filtered by group name in cloned group (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output, filtered by group name in cloned group (XML)
=#=#=#= Begin test: Complete text output, filtered by exact group instance name in cloned group =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster01: online
* GuestNode httpd-bundle-1@cluster02: online
* GuestNode httpd-bundle-2@: OFFLINE
Active Resources:
* Clone Set: mysql-clone-group [mysql-group]:
* Resource Group: mysql-group:1:
* mysql-proxy (lsb:mysql-proxy): Started cluster01
Node Attributes:
* Node: cluster01 (1):
* location : office
* pingd : 1000
* Node: cluster02 (2):
* pingd : 1000
Operations:
* Node: cluster02 (2):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* Node: cluster01 (1):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
=#=#=#= End test: Complete text output, filtered by exact group instance name in cloned group - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output, filtered by exact group instance name in cloned group
=#=#=#= Begin test: Output, filtered by exact group instance name in cloned group (XML) =#=#=#=
-
-
+
+
-
-
+
+
=#=#=#= End test: Output, filtered by exact group instance name in cloned group (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output, filtered by exact group instance name in cloned group (XML)
=#=#=#= Begin test: Complete text output, filtered by primitive name in cloned group =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster01: online
* GuestNode httpd-bundle-1@cluster02: online
* GuestNode httpd-bundle-2@: OFFLINE
Active Resources:
* Clone Set: mysql-clone-group [mysql-group]:
* Resource Group: mysql-group:0:
* mysql-proxy (lsb:mysql-proxy): Started cluster02
* Resource Group: mysql-group:1:
* mysql-proxy (lsb:mysql-proxy): Started cluster01
Node Attributes:
* Node: cluster01 (1):
* location : office
* pingd : 1000
* Node: cluster02 (2):
* pingd : 1000
Operations:
* Node: cluster02 (2):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* Node: cluster01 (1):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
=#=#=#= End test: Complete text output, filtered by primitive name in cloned group - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output, filtered by primitive name in cloned group
=#=#=#= Begin test: Output, filtered by primitive name in cloned group (XML) =#=#=#=
-
-
+
+
-
-
+
+
=#=#=#= End test: Output, filtered by primitive name in cloned group (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output, filtered by primitive name in cloned group (XML)
=#=#=#= Begin test: Complete text output, filtered by exact primitive instance name in cloned group =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster01: online
* GuestNode httpd-bundle-1@cluster02: online
* GuestNode httpd-bundle-2@: OFFLINE
Active Resources:
* Clone Set: mysql-clone-group [mysql-group]:
* Resource Group: mysql-group:1:
* mysql-proxy (lsb:mysql-proxy): Started cluster01
Node Attributes:
* Node: cluster01 (1):
* location : office
* pingd : 1000
* Node: cluster02 (2):
* pingd : 1000
Operations:
* Node: cluster02 (2):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* Node: cluster01 (1):
* mysql-proxy: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
=#=#=#= End test: Complete text output, filtered by exact primitive instance name in cloned group - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output, filtered by exact primitive instance name in cloned group
=#=#=#= Begin test: Output, filtered by exact primitive instance name in cloned group (XML) =#=#=#=
-
-
+
+
-
-
+
+
=#=#=#= End test: Output, filtered by exact primitive instance name in cloned group (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output, filtered by exact primitive instance name in cloned group (XML)
=#=#=#= Begin test: Check that CIB_file="-" works =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 cluster02 ]
* Fencing (stonith:fence_xvm): Started cluster01
* dummy (ocf:pacemaker:Dummy): Started cluster02
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02
* Email (lsb:exim): Started cluster02
* Clone Set: mysql-clone-group [mysql-group]:
* Started: [ cluster01 cluster02 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* Promoted: [ cluster02 ]
* Unpromoted: [ cluster01 ]
=#=#=#= End test: Check that CIB_file="-" works - OK (0) =#=#=#=
* Passed: crm_mon - Check that CIB_file="-" works
=#=#=#= Begin test: Output of partially active resources =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 16 resource instances configured (1 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster02: online
* GuestNode httpd-bundle-1@cluster01: online
Active Resources:
* Clone Set: ping-clone [ping]:
* ping (ocf:pacemaker:ping): Started cluster01
- * ping (ocf:pacemaker:ping): Stopped (not installed)
+ * ping (ocf:pacemaker:ping): Stopped (Not installed)
* Fencing (stonith:fence_xvm): Started cluster01
* Container bundle set: httpd-bundle [pcmk:http]:
* Replica[0]
* httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster02
* httpd (ocf:heartbeat:apache): Started httpd-bundle-0
* httpd-bundle-docker-0 (ocf:heartbeat:docker): Started cluster02
* httpd-bundle-0 (ocf:pacemaker:remote): Started cluster02
* Replica[1]
* httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started cluster01
* httpd (ocf:heartbeat:apache): FAILED httpd-bundle-1
* httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster01
* httpd-bundle-1 (ocf:pacemaker:remote): Started cluster01
* Resource Group: partially-active-group (2 members inactive):
* dummy-1 (ocf:pacemaker:Dummy): Started cluster02
* dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02
Failed Resource Actions:
- * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='Done', queued=0ms, exec=33ms
+ * dummy-2_monitor_0 on cluster02 'Unimplemented' (3): call=2, status='Done', queued=0ms, exec=33ms
=#=#=#= End test: Output of partially active resources - OK (0) =#=#=#=
* Passed: crm_mon - Output of partially active resources
=#=#=#= Begin test: Output of partially active resources (XML) =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
-
+
-
+
-
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
-
+
=#=#=#= End test: Output of partially active resources (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output of partially active resources (XML)
=#=#=#= Begin test: Output of partially active resources, with inactive resources =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 16 resource instances configured (1 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster02: online
* GuestNode httpd-bundle-1@cluster01: online
Full List of Resources:
* Clone Set: ping-clone [ping]:
* ping (ocf:pacemaker:ping): Started cluster01
- * ping (ocf:pacemaker:ping): Stopped (not installed)
+ * ping (ocf:pacemaker:ping): Stopped (Not installed)
* Fencing (stonith:fence_xvm): Started cluster01
* Container bundle set: httpd-bundle [pcmk:http]:
* Replica[0]
* httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster02
* httpd (ocf:heartbeat:apache): Started httpd-bundle-0
* httpd-bundle-docker-0 (ocf:heartbeat:docker): Started cluster02
* httpd-bundle-0 (ocf:pacemaker:remote): Started cluster02
* Replica[1]
* httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started cluster01
* httpd (ocf:heartbeat:apache): FAILED httpd-bundle-1
* httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster01
* httpd-bundle-1 (ocf:pacemaker:remote): Started cluster01
* Resource Group: partially-active-group:
* dummy-1 (ocf:pacemaker:Dummy): Started cluster02
* dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02
* dummy-3 (ocf:pacemaker:Dummy): Stopped (disabled)
- * dummy-4 (ocf:pacemaker:Dummy): Stopped (not installed)
- * smart-mon (ocf:pacemaker:HealthSMART): Stopped (not installed)
+ * dummy-4 (ocf:pacemaker:Dummy): Stopped (Not installed)
+ * smart-mon (ocf:pacemaker:HealthSMART): Stopped (Not installed)
Failed Resource Actions:
- * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='Done', queued=0ms, exec=33ms
+ * dummy-2_monitor_0 on cluster02 'Unimplemented' (3): call=2, status='Done', queued=0ms, exec=33ms
=#=#=#= End test: Output of partially active resources, with inactive resources - OK (0) =#=#=#=
* Passed: crm_mon - Output of partially active resources, with inactive resources
=#=#=#= Begin test: Complete brief text output, with inactive resources =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 16 resource instances configured (1 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster02: online
* GuestNode httpd-bundle-1@cluster01: online
Full List of Resources:
* 0/1 (ocf:pacemaker:HealthSMART): Active
* 1/1 (stonith:fence_xvm): Active cluster01
* Clone Set: ping-clone [ping]:
* ping (ocf:pacemaker:ping): Started cluster01
- * ping (ocf:pacemaker:ping): Stopped (not installed)
+ * ping (ocf:pacemaker:ping): Stopped (Not installed)
* Container bundle set: httpd-bundle [pcmk:http]:
* Replica[0]
* httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster02
* httpd (ocf:heartbeat:apache): Started httpd-bundle-0
* httpd-bundle-docker-0 (ocf:heartbeat:docker): Started cluster02
* httpd-bundle-0 (ocf:pacemaker:remote): Started cluster02
* Replica[1]
* httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started cluster01
* httpd (ocf:heartbeat:apache): FAILED httpd-bundle-1
* httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster01
* httpd-bundle-1 (ocf:pacemaker:remote): Started cluster01
* Resource Group: partially-active-group:
* 2/4 (ocf:pacemaker:Dummy): Active cluster02
Node Attributes:
* Node: cluster01 (1):
* pingd : 1000
* Node: cluster02 (2):
* pingd : 1000
Operations:
* Node: cluster02 (2):
* httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* dummy-1: migration-threshold=1000000:
* (2) start
* dummy-2: migration-threshold=1000000:
* (2) probe
* dummy-4: migration-threshold=1000000:
* (2) probe
* smart-mon: migration-threshold=1000000:
* (9) probe
* ping: migration-threshold=1000000:
* (6) probe
* Node: cluster01 (1):
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
* ping: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* httpd-bundle-ip-192.168.122.132: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: httpd-bundle-0@cluster02:
* httpd: migration-threshold=1000000:
* (1) start
* Node: httpd-bundle-1@cluster01:
* httpd: migration-threshold=1000000:
* (1) probe
Failed Resource Actions:
- * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='Done', queued=0ms, exec=33ms
+ * dummy-2_monitor_0 on cluster02 'Unimplemented' (3): call=2, status='Done', queued=0ms, exec=33ms
=#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#=
* Passed: crm_mon - Complete brief text output, with inactive resources
=#=#=#= Begin test: Text output of partially active group =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 16 resource instances configured (1 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Resource Group: partially-active-group (2 members inactive):
* dummy-1 (ocf:pacemaker:Dummy): Started cluster02
* dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02
=#=#=#= End test: Text output of partially active group - OK (0) =#=#=#=
* Passed: crm_mon - Text output of partially active group
=#=#=#= Begin test: Text output of partially active group, with inactive resources =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 16 resource instances configured (1 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Full List of Resources:
* Resource Group: partially-active-group:
* dummy-1 (ocf:pacemaker:Dummy): Started cluster02
* dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02
* dummy-3 (ocf:pacemaker:Dummy): Stopped (disabled)
- * dummy-4 (ocf:pacemaker:Dummy): Stopped (not installed)
+ * dummy-4 (ocf:pacemaker:Dummy): Stopped (Not installed)
=#=#=#= End test: Text output of partially active group, with inactive resources - OK (0) =#=#=#=
* Passed: crm_mon - Text output of partially active group, with inactive resources
=#=#=#= Begin test: Text output of active member of partially active group =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 16 resource instances configured (1 DISABLED)
Node List:
* Online: [ cluster01 cluster02 ]
* GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ]
Active Resources:
* Resource Group: partially-active-group (2 members inactive):
* dummy-1 (ocf:pacemaker:Dummy): Started cluster02
=#=#=#= End test: Text output of active member of partially active group - OK (0) =#=#=#=
* Passed: crm_mon - Text output of active member of partially active group
=#=#=#= Begin test: Text output of inactive member of partially active group =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 16 resource instances configured (1 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1
* Node cluster02 (2): online, feature set <3.15.1
* GuestNode httpd-bundle-0@cluster02: online
* GuestNode httpd-bundle-1@cluster01: online
Active Resources:
* Resource Group: partially-active-group (2 members inactive):
* dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02
Failed Resource Actions:
- * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='Done', queued=0ms, exec=33ms
+ * dummy-2_monitor_0 on cluster02 'Unimplemented' (3): call=2, status='Done', queued=0ms, exec=33ms
=#=#=#= End test: Text output of inactive member of partially active group - OK (0) =#=#=#=
* Passed: crm_mon - Text output of inactive member of partially active group
=#=#=#= Begin test: Complete brief text output grouped by node, with inactive resources =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (2) (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 16 resource instances configured (1 DISABLED)
Node List:
* Node cluster01 (1): online, feature set <3.15.1:
* Resources:
* 1 (ocf:heartbeat:IPaddr2): Active
* 1 (ocf:heartbeat:docker): Active
* 1 (ocf:pacemaker:ping): Active
* 1 (ocf:pacemaker:remote): Active
* 1 (stonith:fence_xvm): Active
* Node cluster02 (2): online, feature set <3.15.1:
* Resources:
* 1 (ocf:heartbeat:IPaddr2): Active
* 1 (ocf:heartbeat:docker): Active
* 2 (ocf:pacemaker:Dummy): Active
* 1 (ocf:pacemaker:remote): Active
* GuestNode httpd-bundle-0@cluster02: online:
* Resources:
* 1 (ocf:heartbeat:apache): Active
* GuestNode httpd-bundle-1@cluster01: online:
* Resources:
* 1 (ocf:heartbeat:apache): Active
Inactive Resources:
* Clone Set: ping-clone [ping]:
* ping (ocf:pacemaker:ping): Started cluster01
- * ping (ocf:pacemaker:ping): Stopped (not installed)
+ * ping (ocf:pacemaker:ping): Stopped (Not installed)
* Resource Group: partially-active-group:
* 2/4 (ocf:pacemaker:Dummy): Active cluster02
- * smart-mon (ocf:pacemaker:HealthSMART): Stopped (not installed)
+ * smart-mon (ocf:pacemaker:HealthSMART): Stopped (Not installed)
Node Attributes:
* Node: cluster01 (1):
* pingd : 1000
* Node: cluster02 (2):
* pingd : 1000
Operations:
* Node: cluster02 (2):
* httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-0: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* dummy-1: migration-threshold=1000000:
* (2) start
* dummy-2: migration-threshold=1000000:
* (2) probe
* dummy-4: migration-threshold=1000000:
* (2) probe
* smart-mon: migration-threshold=1000000:
* (9) probe
* ping: migration-threshold=1000000:
* (6) probe
* Node: cluster01 (1):
* Fencing: migration-threshold=1000000:
* (15) start
* (20) monitor: interval="60000ms"
* ping: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="10000ms"
* httpd-bundle-ip-192.168.122.132: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-docker-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="60000ms"
* httpd-bundle-1: migration-threshold=1000000:
* (2) start
* (3) monitor: interval="30000ms"
* Node: httpd-bundle-0@cluster02:
* httpd: migration-threshold=1000000:
* (1) start
* Node: httpd-bundle-1@cluster01:
* httpd: migration-threshold=1000000:
* (1) probe
Failed Resource Actions:
- * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='Done', queued=0ms, exec=33ms
+ * dummy-2_monitor_0 on cluster02 'Unimplemented' (3): call=2, status='Done', queued=0ms, exec=33ms
=#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#=
* Passed: crm_mon - Complete brief text output grouped by node, with inactive resources
=#=#=#= Begin test: Text output of partially active resources, with inactive resources, filtered by node =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 16 resource instances configured (1 DISABLED)
Node List:
* Online: [ cluster01 ]
Full List of Resources:
* Clone Set: ping-clone [ping]:
* Started: [ cluster01 ]
* Fencing (stonith:fence_xvm): Started cluster01
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): FAILED cluster01
- * smart-mon (ocf:pacemaker:HealthSMART): Stopped (not installed)
+ * smart-mon (ocf:pacemaker:HealthSMART): Stopped (Not installed)
=#=#=#= End test: Text output of partially active resources, with inactive resources, filtered by node - OK (0) =#=#=#=
* Passed: crm_mon - Text output of partially active resources, with inactive resources, filtered by node
=#=#=#= Begin test: Output of partially active resources, filtered by node (XML) =#=#=#=
-unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0
-unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0
+unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (Unimplemented) | dummy-2_last_failure_0
+unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (Invalid parameter) | httpd_last_failure_0
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
=#=#=#= End test: Output of partially active resources, filtered by node (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output of partially active resources, filtered by node (XML)
=#=#=#= Begin test: Output of active unmanaged resource on offline node =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 2 nodes configured
* 3 resource instances configured
*** Resource management is DISABLED ***
The cluster will not attempt to start, stop or recover services
Node List:
* Online: [ cluster01 ]
* OFFLINE: [ cluster02 ]
Active Resources:
* Fencing (stonith:fence_xvm): Started cluster01 (maintenance)
* rsc1 (ocf:pacemaker:Dummy): Started cluster01 (maintenance)
* rsc2 (ocf:pacemaker:Dummy): Started cluster02 (maintenance)
=#=#=#= End test: Output of active unmanaged resource on offline node - OK (0) =#=#=#=
* Passed: crm_mon - Output of active unmanaged resource on offline node
=#=#=#= Begin test: Output of active unmanaged resource on offline node (XML) =#=#=#=
-
-
-
+
+
+
-
+
=#=#=#= End test: Output of active unmanaged resource on offline node (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output of active unmanaged resource on offline node (XML)
=#=#=#= Begin test: Brief text output of active unmanaged resource on offline node =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 2 nodes configured
* 3 resource instances configured
*** Resource management is DISABLED ***
The cluster will not attempt to start, stop or recover services
Node List:
* Online: [ cluster01 ]
* OFFLINE: [ cluster02 ]
Active Resources:
* 1 (ocf:pacemaker:Dummy): Active cluster01
* 1 (ocf:pacemaker:Dummy): Active cluster02
* 1 (stonith:fence_xvm): Active cluster01
=#=#=#= End test: Brief text output of active unmanaged resource on offline node - OK (0) =#=#=#=
* Passed: crm_mon - Brief text output of active unmanaged resource on offline node
=#=#=#= Begin test: Brief text output of active unmanaged resource on offline node, grouped by node =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 2 nodes configured
* 3 resource instances configured
*** Resource management is DISABLED ***
The cluster will not attempt to start, stop or recover services
Node List:
* Node cluster01: online:
* Resources:
* 1 (ocf:pacemaker:Dummy): Active
* 1 (stonith:fence_xvm): Active
* Node cluster02: OFFLINE:
* Resources:
* 1 (ocf:pacemaker:Dummy): Active
=#=#=#= End test: Brief text output of active unmanaged resource on offline node, grouped by node - OK (0) =#=#=#=
* Passed: crm_mon - Brief text output of active unmanaged resource on offline node, grouped by node
=#=#=#= Begin test: Output of all resources with maintenance-mode enabled =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
*** Resource management is DISABLED ***
The cluster will not attempt to start, stop or recover services
Node List:
* GuestNode httpd-bundle-0: maintenance
* GuestNode httpd-bundle-1: maintenance
* Online: [ cluster01 cluster02 ]
Full List of Resources:
* Clone Set: ping-clone [ping] (maintenance):
* ping (ocf:pacemaker:ping): Started cluster02 (maintenance)
* ping (ocf:pacemaker:ping): Started cluster01 (maintenance)
* Fencing (stonith:fence_xvm): Started cluster01 (maintenance)
* dummy (ocf:pacemaker:Dummy): Started cluster02 (maintenance)
* Clone Set: inactive-clone [inactive-dhcpd] (disabled, maintenance):
* Stopped (disabled): [ cluster01 cluster02 ]
* Resource Group: inactive-group (disabled, maintenance):
* inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled, maintenance)
* inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled, maintenance)
* Container bundle set: httpd-bundle [pcmk:http] (maintenance):
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 (maintenance)
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 (maintenance)
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped (maintenance)
* Resource Group: exim-group (maintenance):
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02 (maintenance)
* Email (lsb:exim): Started cluster02 (maintenance)
* Clone Set: mysql-clone-group [mysql-group] (maintenance):
* Resource Group: mysql-group:0 (maintenance):
* mysql-proxy (lsb:mysql-proxy): Started cluster02 (maintenance)
* Resource Group: mysql-group:1 (maintenance):
* mysql-proxy (lsb:mysql-proxy): Started cluster01 (maintenance)
* Clone Set: promotable-clone [promotable-rsc] (promotable, maintenance):
* promotable-rsc (ocf:pacemaker:Stateful): Promoted cluster02 (maintenance)
* promotable-rsc (ocf:pacemaker:Stateful): Unpromoted cluster01 (maintenance)
=#=#=#= End test: Output of all resources with maintenance-mode enabled - OK (0) =#=#=#=
* Passed: crm_mon - Output of all resources with maintenance-mode enabled
=#=#=#= Begin test: Output of all resources with maintenance-mode enabled (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
+
-
+
-
-
+
+
-
-
-
-
+
+
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output of all resources with maintenance-mode enabled (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output of all resources with maintenance-mode enabled (XML)
=#=#=#= Begin test: Output of all resources with maintenance enabled for a node =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* Node cluster02: maintenance
* GuestNode httpd-bundle-1: maintenance
* Online: [ cluster01 ]
* GuestOnline: [ httpd-bundle-0 ]
Full List of Resources:
* Clone Set: ping-clone [ping]:
* ping (ocf:pacemaker:ping): Started cluster02 (maintenance)
* Started: [ cluster01 ]
* Fencing (stonith:fence_xvm): Started cluster01
* dummy (ocf:pacemaker:Dummy): Started cluster02 (maintenance)
* Clone Set: inactive-clone [inactive-dhcpd] (disabled):
* Stopped (disabled): [ cluster01 cluster02 ]
* Resource Group: inactive-group (disabled):
* inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled)
* inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled)
* Container bundle set: httpd-bundle [pcmk:http]:
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 (maintenance)
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped
* Resource Group: exim-group:
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02 (maintenance)
* Email (lsb:exim): Started cluster02 (maintenance)
* Clone Set: mysql-clone-group [mysql-group]:
* Resource Group: mysql-group:0:
* mysql-proxy (lsb:mysql-proxy): Started cluster02 (maintenance)
* Started: [ cluster01 ]
* Clone Set: promotable-clone [promotable-rsc] (promotable):
* promotable-rsc (ocf:pacemaker:Stateful): Promoted cluster02 (maintenance)
* Unpromoted: [ cluster01 ]
=#=#=#= End test: Output of all resources with maintenance enabled for a node - OK (0) =#=#=#=
* Passed: crm_mon - Output of all resources with maintenance enabled for a node
=#=#=#= Begin test: Output of all resources with maintenance enabled for a node (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
+
-
+
-
-
+
+
-
-
-
-
+
+
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output of all resources with maintenance enabled for a node (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output of all resources with maintenance enabled for a node (XML)
=#=#=#= Begin test: Output of all resources with maintenance meta attribute true =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cluster02 (version) - partition with quorum
* Last updated:
* Last change:
* 5 nodes configured
* 32 resource instances configured (4 DISABLED)
Node List:
* GuestNode httpd-bundle-0: maintenance
* GuestNode httpd-bundle-1: maintenance
* Online: [ cluster01 cluster02 ]
Full List of Resources:
* Clone Set: ping-clone [ping] (maintenance):
* ping (ocf:pacemaker:ping): Started cluster02 (maintenance)
* ping (ocf:pacemaker:ping): Started cluster01 (maintenance)
* Fencing (stonith:fence_xvm): Started cluster01
* dummy (ocf:pacemaker:Dummy): Started cluster02 (maintenance)
* Clone Set: inactive-clone [inactive-dhcpd] (disabled, maintenance):
* Stopped (disabled): [ cluster01 cluster02 ]
* Resource Group: inactive-group (disabled, maintenance):
* inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled, maintenance)
* inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled, maintenance)
* Container bundle set: httpd-bundle [pcmk:http] (maintenance):
* httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 (maintenance)
* httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 (maintenance)
* httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped (maintenance)
* Resource Group: exim-group (maintenance):
* Public-IP (ocf:heartbeat:IPaddr): Started cluster02 (maintenance)
* Email (lsb:exim): Started cluster02 (maintenance)
* Clone Set: mysql-clone-group [mysql-group] (maintenance):
* Resource Group: mysql-group:0 (maintenance):
* mysql-proxy (lsb:mysql-proxy): Started cluster02 (maintenance)
* Resource Group: mysql-group:1 (maintenance):
* mysql-proxy (lsb:mysql-proxy): Started cluster01 (maintenance)
* Clone Set: promotable-clone [promotable-rsc] (promotable, maintenance):
* promotable-rsc (ocf:pacemaker:Stateful): Promoted cluster02 (maintenance)
* promotable-rsc (ocf:pacemaker:Stateful): Unpromoted cluster01 (maintenance)
=#=#=#= End test: Output of all resources with maintenance meta attribute true - OK (0) =#=#=#=
* Passed: crm_mon - Output of all resources with maintenance meta attribute true
=#=#=#= Begin test: Output of all resources with maintenance meta attribute true (XML) =#=#=#=
-
-
+
+
-
-
+
+
-
+
-
+
-
-
+
+
-
-
-
-
+
+
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
-
+
=#=#=#= End test: Output of all resources with maintenance meta attribute true (XML) - OK (0) =#=#=#=
* Passed: crm_mon - Output of all resources with maintenance meta attribute true (XML)
=#=#=#= Begin test: Text output of guest node's container on different node from its remote resource =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cent7-host2 (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 10 resource instances configured
Node List:
* Online: [ cent7-host1 cent7-host2 ]
* GuestOnline: [ httpd-bundle1-0 httpd-bundle2-0 ]
Active Resources:
* Resource Group: group1:
* dummy1 (ocf:pacemaker:Dummy): Started cent7-host1
* Resource Group: group2:
* dummy2 (ocf:pacemaker:Dummy): Started cent7-host2
* Container bundle: httpd-bundle1 [pcmktest:http]:
* httpd-bundle1-0 (192.168.20.188) (ocf:heartbeat:apache): Started cent7-host1
* Container bundle: httpd-bundle2 [pcmktest:http]:
* httpd-bundle2-0 (192.168.20.190) (ocf:heartbeat:apache): Started cent7-host2
=#=#=#= End test: Text output of guest node's container on different node from its remote resource - OK (0) =#=#=#=
* Passed: crm_mon - Text output of guest node's container on different node from its remote resource
=#=#=#= Begin test: Complete text output of guest node's container on different node from its remote resource =#=#=#=
Cluster Summary:
* Stack: corosync
* Current DC: cent7-host2 (3232262829) (version) - partition with quorum
* Last updated:
* Last change:
* 4 nodes configured
* 10 resource instances configured
Node List:
* Node cent7-host1 (3232262828): online, feature set <3.15.1
* Node cent7-host2 (3232262829): online, feature set <3.15.1
* GuestNode httpd-bundle1-0@cent7-host1: online
* GuestNode httpd-bundle2-0@cent7-host2: online
Active Resources:
* Resource Group: group1:
* dummy1 (ocf:pacemaker:Dummy): Started cent7-host1
* Resource Group: group2:
* dummy2 (ocf:pacemaker:Dummy): Started cent7-host2
* Container bundle: httpd-bundle1 [pcmktest:http]:
* httpd-bundle1-ip-192.168.20.188 (ocf:heartbeat:IPaddr2): Started cent7-host1
* httpd1 (ocf:heartbeat:apache): Started httpd-bundle1-0
* httpd-bundle1-docker-0 (ocf:heartbeat:docker): Started cent7-host1
* httpd-bundle1-0 (ocf:pacemaker:remote): Started cent7-host2
* Container bundle: httpd-bundle2 [pcmktest:http]:
* httpd-bundle2-ip-192.168.20.190 (ocf:heartbeat:IPaddr2): Started cent7-host2
* httpd2 (ocf:heartbeat:apache): Started httpd-bundle2-0
* httpd-bundle2-docker-0 (ocf:heartbeat:docker): Started cent7-host2
* httpd-bundle2-0 (ocf:pacemaker:remote): Started cent7-host2
=#=#=#= End test: Complete text output of guest node's container on different node from its remote resource - OK (0) =#=#=#=
* Passed: crm_mon - Complete text output of guest node's container on different node from its remote resource
diff --git a/cts/scheduler/summary/failed-probe-clone.summary b/cts/scheduler/summary/failed-probe-clone.summary
index febee14400..90d4b790ff 100644
--- a/cts/scheduler/summary/failed-probe-clone.summary
+++ b/cts/scheduler/summary/failed-probe-clone.summary
@@ -1,48 +1,48 @@
Current cluster status:
* Node List:
* Online: [ cluster01 cluster02 ]
* Full List of Resources:
* Fencing (stonith:fence_xvm): Started cluster01
* Clone Set: ping-1-clone [ping-1]:
- * Stopped (not installed): [ cluster01 cluster02 ]
+ * Stopped (Not installed): [ cluster01 cluster02 ]
* Clone Set: ping-2-clone [ping-2]:
* Stopped: [ cluster02 ]
- * Stopped (not installed): [ cluster01 ]
+ * Stopped (Not installed): [ cluster01 ]
* Clone Set: ping-3-clone [ping-3]:
* ping-3 (ocf:pacemaker:ping): FAILED cluster01
- * Stopped (not installed): [ cluster02 ]
+ * Stopped (Not installed): [ cluster02 ]
Transition Summary:
* Start ping-2:0 ( cluster02 )
* Stop ping-3:0 ( cluster01 ) due to node availability
Executing Cluster Transition:
* Cluster action: clear_failcount for ping-1 on cluster02
* Cluster action: clear_failcount for ping-1 on cluster01
* Cluster action: clear_failcount for ping-2 on cluster02
* Cluster action: clear_failcount for ping-2 on cluster01
* Pseudo action: ping-2-clone_start_0
* Cluster action: clear_failcount for ping-3 on cluster01
* Cluster action: clear_failcount for ping-3 on cluster02
* Pseudo action: ping-3-clone_stop_0
* Resource action: ping-2 start on cluster02
* Pseudo action: ping-2-clone_running_0
* Resource action: ping-3 stop on cluster01
* Pseudo action: ping-3-clone_stopped_0
* Resource action: ping-2 monitor=10000 on cluster02
Revised Cluster Status:
* Node List:
* Online: [ cluster01 cluster02 ]
* Full List of Resources:
* Fencing (stonith:fence_xvm): Started cluster01
* Clone Set: ping-1-clone [ping-1]:
- * Stopped (not installed): [ cluster01 cluster02 ]
+ * Stopped (Not installed): [ cluster01 cluster02 ]
* Clone Set: ping-2-clone [ping-2]:
* Started: [ cluster02 ]
- * Stopped (not installed): [ cluster01 ]
+ * Stopped (Not installed): [ cluster01 ]
* Clone Set: ping-3-clone [ping-3]:
* Stopped: [ cluster01 ]
- * Stopped (not installed): [ cluster02 ]
+ * Stopped (Not installed): [ cluster02 ]
diff --git a/cts/scheduler/summary/failed-probe-primitive.summary b/cts/scheduler/summary/failed-probe-primitive.summary
index ea8edae494..ead43e7591 100644
--- a/cts/scheduler/summary/failed-probe-primitive.summary
+++ b/cts/scheduler/summary/failed-probe-primitive.summary
@@ -1,27 +1,27 @@
Current cluster status:
* Node List:
* Online: [ cluster01 cluster02 ]
* Full List of Resources:
* Fencing (stonith:fence_xvm): Started cluster01
- * dummy-1 (ocf:pacemaker:Dummy): Stopped (not installed)
- * dummy-2 (ocf:pacemaker:Dummy): Stopped (not installed)
+ * dummy-1 (ocf:pacemaker:Dummy): Stopped (Not installed)
+ * dummy-2 (ocf:pacemaker:Dummy): Stopped (Not installed)
* dummy-3 (ocf:pacemaker:Dummy): FAILED cluster01
Transition Summary:
* Start dummy-2 ( cluster02 )
* Stop dummy-3 ( cluster01 ) due to node availability
Executing Cluster Transition:
* Resource action: dummy-2 start on cluster02
* Resource action: dummy-3 stop on cluster01
Revised Cluster Status:
* Node List:
* Online: [ cluster01 cluster02 ]
* Full List of Resources:
* Fencing (stonith:fence_xvm): Started cluster01
- * dummy-1 (ocf:pacemaker:Dummy): Stopped (not installed)
+ * dummy-1 (ocf:pacemaker:Dummy): Stopped (Not installed)
* dummy-2 (ocf:pacemaker:Dummy): Started cluster02
- * dummy-3 (ocf:pacemaker:Dummy): Stopped (not installed)
+ * dummy-3 (ocf:pacemaker:Dummy): Stopped (Not installed)
diff --git a/cts/scheduler/summary/multiply-active-stonith.summary b/cts/scheduler/summary/multiply-active-stonith.summary
index ec37de03b0..f86750fbc6 100644
--- a/cts/scheduler/summary/multiply-active-stonith.summary
+++ b/cts/scheduler/summary/multiply-active-stonith.summary
@@ -1,28 +1,28 @@
Using the original execution date of: 2018-05-09 09:54:39Z
Current cluster status:
* Node List:
* Node node2: UNCLEAN (online)
* Online: [ node1 node3 ]
* Full List of Resources:
* fencer (stonith:fence_ipmilan): Started [ node2 node3 ]
* rsc1 (lsb:rsc1): FAILED node2
Transition Summary:
* Fence (reboot) node2 'rsc1 failed there'
* Stop rsc1 ( node2 ) due to node availability
Executing Cluster Transition:
* Resource action: fencer monitor=60000 on node3
* Fencing node2 (reboot)
* Pseudo action: rsc1_stop_0
Using the original execution date of: 2018-05-09 09:54:39Z
Revised Cluster Status:
* Node List:
* Online: [ node1 node3 ]
* OFFLINE: [ node2 ]
* Full List of Resources:
* fencer (stonith:fence_ipmilan): Started node3
- * rsc1 (lsb:rsc1): Stopped (not installed)
+ * rsc1 (lsb:rsc1): Stopped (Not installed)
diff --git a/cts/scheduler/summary/promoted-demote.summary b/cts/scheduler/summary/promoted-demote.summary
index c0406425bd..d5955c22e5 100644
--- a/cts/scheduler/summary/promoted-demote.summary
+++ b/cts/scheduler/summary/promoted-demote.summary
@@ -1,70 +1,70 @@
Current cluster status:
* Node List:
* Online: [ cxa1 cxb1 ]
* Full List of Resources:
* cyrus_address (ocf:heartbeat:IPaddr2): Started cxa1
* cyrus_master (ocf:heartbeat:cyrus-imap): Stopped
* cyrus_syslogd (ocf:heartbeat:syslogd): Stopped
* cyrus_filesys (ocf:heartbeat:Filesystem): Stopped
- * cyrus_volgroup (ocf:heartbeat:VolGroup): Stopped (not installed)
+ * cyrus_volgroup (ocf:heartbeat:VolGroup): Stopped (Not installed)
* Clone Set: cyrus_drbd [cyrus_drbd_node] (promotable):
* Promoted: [ cxa1 ]
* Unpromoted: [ cxb1 ]
* named_address (ocf:heartbeat:IPaddr2): Started cxa1
* named_filesys (ocf:heartbeat:Filesystem): Stopped
- * named_volgroup (ocf:heartbeat:VolGroup): Stopped (not installed)
+ * named_volgroup (ocf:heartbeat:VolGroup): Stopped (Not installed)
* named_daemon (ocf:heartbeat:recursor): Stopped
* named_syslogd (ocf:heartbeat:syslogd): Stopped
* Clone Set: named_drbd [named_drbd_node] (promotable):
* Unpromoted: [ cxa1 cxb1 ]
* Clone Set: pingd_clone [pingd_node]:
* Started: [ cxa1 cxb1 ]
* Clone Set: fence_clone [fence_node]:
* Started: [ cxa1 cxb1 ]
Transition Summary:
* Move named_address ( cxa1 -> cxb1 )
* Promote named_drbd_node:1 ( Unpromoted -> Promoted cxb1 )
Executing Cluster Transition:
* Resource action: named_address stop on cxa1
* Pseudo action: named_drbd_pre_notify_promote_0
* Resource action: named_address start on cxb1
* Resource action: named_drbd_node:1 notify on cxa1
* Resource action: named_drbd_node:0 notify on cxb1
* Pseudo action: named_drbd_confirmed-pre_notify_promote_0
* Pseudo action: named_drbd_promote_0
* Resource action: named_drbd_node:0 promote on cxb1
* Pseudo action: named_drbd_promoted_0
* Pseudo action: named_drbd_post_notify_promoted_0
* Resource action: named_drbd_node:1 notify on cxa1
* Resource action: named_drbd_node:0 notify on cxb1
* Pseudo action: named_drbd_confirmed-post_notify_promoted_0
* Resource action: named_drbd_node:0 monitor=10000 on cxb1
Revised Cluster Status:
* Node List:
* Online: [ cxa1 cxb1 ]
* Full List of Resources:
* cyrus_address (ocf:heartbeat:IPaddr2): Started cxa1
* cyrus_master (ocf:heartbeat:cyrus-imap): Stopped
* cyrus_syslogd (ocf:heartbeat:syslogd): Stopped
* cyrus_filesys (ocf:heartbeat:Filesystem): Stopped
- * cyrus_volgroup (ocf:heartbeat:VolGroup): Stopped (not installed)
+ * cyrus_volgroup (ocf:heartbeat:VolGroup): Stopped (Not installed)
* Clone Set: cyrus_drbd [cyrus_drbd_node] (promotable):
* Promoted: [ cxa1 ]
* Unpromoted: [ cxb1 ]
* named_address (ocf:heartbeat:IPaddr2): Started cxb1
* named_filesys (ocf:heartbeat:Filesystem): Stopped
- * named_volgroup (ocf:heartbeat:VolGroup): Stopped (not installed)
+ * named_volgroup (ocf:heartbeat:VolGroup): Stopped (Not installed)
* named_daemon (ocf:heartbeat:recursor): Stopped
* named_syslogd (ocf:heartbeat:syslogd): Stopped
* Clone Set: named_drbd [named_drbd_node] (promotable):
* Promoted: [ cxb1 ]
* Unpromoted: [ cxa1 ]
* Clone Set: pingd_clone [pingd_node]:
* Started: [ cxa1 cxb1 ]
* Clone Set: fence_clone [fence_node]:
* Started: [ cxa1 cxb1 ]
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 988a4e32d4..9f808972ca 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1,2426 +1,2426 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include // lrmd_event_data_t, lrmd_rsc_info_t, etc.
#include
#include
#include
#include
#include
#include
#define START_DELAY_THRESHOLD 5 * 60 * 1000
#define MAX_LRM_REG_FAILS 30
struct delete_event_s {
int rc;
const char *rsc;
lrm_state_t *lrm_state;
};
static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state,
const xmlNode *rsc_op,
const char *rsc_id,
const char *operation);
static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
xmlNode *msg, struct ra_metadata_s *md);
static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
int log_level);
static void
lrm_connection_destroy(void)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) {
crm_crit("Lost connection to local executor");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
}
}
static char *
make_stop_id(const char *rsc, int call_id)
{
return crm_strdup_printf("%s:%d", rsc, call_id);
}
static void
copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") == NULL) {
pcmk__insert_dup(user_data, (const char *) key, (const char *) value);
}
}
static void
copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") != NULL) {
pcmk__insert_dup(user_data, (const char *) key, (const char *) value);
}
}
/*!
* \internal
* \brief Remove a recurring operation from a resource's history
*
* \param[in,out] history Resource history to modify
* \param[in] op Operation to remove
*
* \return TRUE if the operation was found and removed, FALSE otherwise
*/
static gboolean
history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_event_data_t *existing = iter->data;
if ((op->interval_ms == existing->interval_ms)
&& pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none)
&& pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) {
history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
lrmd_free_event(existing);
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Free all recurring operations in resource history
*
* \param[in,out] history Resource history to modify
*/
static void
history_free_recurring_ops(rsc_history_t *history)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_free_event(iter->data);
}
g_list_free(history->recurring_op_list);
history->recurring_op_list = NULL;
}
/*!
* \internal
* \brief Free resource history
*
* \param[in,out] history Resource history to free
*/
void
history_free(gpointer data)
{
rsc_history_t *history = (rsc_history_t*)data;
if (history->stop_params) {
g_hash_table_destroy(history->stop_params);
}
/* Don't need to free history->rsc.id because it's set to history->id */
free(history->rsc.type);
free(history->rsc.standard);
free(history->rsc.provider);
lrmd_free_event(history->failed);
lrmd_free_event(history->last);
free(history->id);
history_free_recurring_ops(history);
free(history);
}
static void
update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
int target_rc = 0;
rsc_history_t *entry = NULL;
if (op->rsc_deleted) {
crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
NULL, crmd_cib_smart_opt());
return;
}
if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
return;
}
crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
if (entry == NULL && rsc) {
entry = pcmk__assert_alloc(1, sizeof(rsc_history_t));
entry->id = pcmk__str_copy(op->rsc_id);
g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
entry->rsc.id = entry->id;
entry->rsc.type = pcmk__str_copy(rsc->type);
entry->rsc.standard = pcmk__str_copy(rsc->standard);
entry->rsc.provider = pcmk__str_copy(rsc->provider);
} else if (entry == NULL) {
crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
return;
}
entry->last_callid = op->call_id;
target_rc = rsc_op_expected_rc(op);
if (op->op_status == PCMK_EXEC_CANCELLED) {
if (op->interval_ms > 0) {
crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
history_remove_recurring_op(entry, op);
return;
} else {
crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
op->rsc_id, op->op_type, op->interval_ms, op->rc,
op->op_status);
}
} else if (did_rsc_op_fail(op, target_rc)) {
/* Store failed monitors here, otherwise the block below will cause them
* to be forgotten when a stop happens.
*/
if (entry->failed) {
lrmd_free_event(entry->failed);
}
entry->failed = lrmd_copy_event(op);
} else if (op->interval_ms == 0) {
if (entry->last) {
lrmd_free_event(entry->last);
}
entry->last = lrmd_copy_event(op);
if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START,
PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_MONITOR, NULL)) {
if (entry->stop_params) {
g_hash_table_destroy(entry->stop_params);
}
entry->stop_params = pcmk__strkey_table(free, free);
g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
}
}
if (op->interval_ms > 0) {
/* Ensure there are no duplicates */
history_remove_recurring_op(entry, op);
crm_trace("Adding recurring op: " PCMK__OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
} else if ((entry->recurring_op_list != NULL)
&& !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
g_list_length(entry->recurring_op_list), op->rsc_id,
op->op_type, op->interval_ms);
history_free_recurring_ops(entry);
}
}
/*!
* \internal
* \brief Send a direct OK ack for a resource task
*
* \param[in] lrm_state LRM connection
* \param[in] input Input message being ack'ed
* \param[in] rsc_id ID of affected resource
* \param[in] rsc Affected resource (if available)
* \param[in] task Operation task being ack'ed
* \param[in] ack_host Name of host to send ack to
* \param[in] ack_sys IPC system name to ack
*/
static void
send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input,
const char *rsc_id, const lrmd_rsc_info_t *rsc,
const char *task, const char *ack_host, const char *ack_sys)
{
lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
lrmd_free_event(op);
}
static inline const char *
op_node_name(lrmd_event_data_t *op)
{
return pcmk__s(op->remote_nodename,
controld_globals.cluster->priv->node_name);
}
void
lrm_op_callback(lrmd_event_data_t * op)
{
CRM_CHECK(op != NULL, return);
switch (op->type) {
case lrmd_event_disconnect:
if (op->remote_nodename == NULL) {
/* If this is the local executor IPC connection, set the right
* bits in the controller when the connection goes down.
*/
lrm_connection_destroy();
}
break;
case lrmd_event_exec_complete:
{
lrm_state_t *lrm_state =
controld_get_executor_state(op_node_name(op), false);
pcmk__assert(lrm_state != NULL);
process_lrm_event(lrm_state, op, NULL, NULL);
}
break;
default:
break;
}
}
static void
try_local_executor_connect(long long action, fsa_data_t *msg_data,
lrm_state_t *lrm_state)
{
int rc = pcmk_rc_ok;
crm_debug("Connecting to the local executor");
// If we can connect, great
rc = controld_connect_local_executor(lrm_state);
if (rc == pcmk_rc_ok) {
controld_set_fsa_input_flags(R_LRM_CONNECTED);
crm_info("Connection to the local executor established");
return;
}
// Otherwise, if we can try again, set a timer to do so
if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
crm_warn("Failed to connect to the local executor %d time%s "
"(%d max): %s", lrm_state->num_lrm_register_fails,
pcmk__plural_s(lrm_state->num_lrm_register_fails),
MAX_LRM_REG_FAILS, pcmk_rc_str(rc));
controld_start_wait_timer();
crmd_fsa_stall(FALSE);
return;
}
// Otherwise give up
crm_err("Failed to connect to the executor the max allowed "
"%d time%s: %s", lrm_state->num_lrm_register_fails,
pcmk__plural_s(lrm_state->num_lrm_register_fails),
pcmk_rc_str(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
/* A_LRM_CONNECT */
void
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* This only pertains to local executor connections. Remote connections are
* handled as resources within the scheduler. Connecting and disconnecting
* from remote executor instances is handled differently.
*/
lrm_state_t *lrm_state = NULL;
if (controld_globals.cluster->priv->node_name == NULL) {
return; // Shouldn't be possible
}
lrm_state = controld_get_executor_state(NULL, true);
if (lrm_state == NULL) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (action & A_LRM_DISCONNECT) {
if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
if (action == A_LRM_DISCONNECT) {
crmd_fsa_stall(FALSE);
return;
}
}
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
lrm_state_disconnect(lrm_state);
lrm_state_reset_tables(lrm_state, FALSE);
}
if (action & A_LRM_CONNECT) {
try_local_executor_connect(action, msg_data, lrm_state);
}
if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action),
__func__);
}
}
static gboolean
lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
{
int counter = 0;
gboolean rc = TRUE;
const char *when = "lrm disconnect";
GHashTableIter gIter;
const char *key = NULL;
rsc_history_t *entry = NULL;
active_op_t *pending = NULL;
crm_debug("Checking for active resources before exit");
if (cur_state == S_TERMINATE) {
log_level = LOG_ERR;
when = "shutdown";
} else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
when = "shutdown... waiting";
}
if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) {
guint removed = g_hash_table_foreach_remove(lrm_state->active_ops,
stop_recurring_actions,
lrm_state);
guint nremaining = g_hash_table_size(lrm_state->active_ops);
if (removed || nremaining) {
crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
removed, pcmk__plural_s(removed), when, nremaining);
}
}
if (lrm_state->active_ops != NULL) {
g_hash_table_iter_init(&gIter, lrm_state->active_ops);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
/* Ignore recurring actions in the shutdown calculations */
if (pending->interval_ms == 0) {
counter++;
}
}
}
if (counter > 0) {
do_crm_log(log_level, "%d pending executor operation%s at %s",
counter, pcmk__plural_s(counter), when);
if ((cur_state == S_TERMINATE)
|| !pcmk_is_set(controld_globals.fsa_input_register,
R_SENT_RSC_STOP)) {
g_hash_table_iter_init(&gIter, lrm_state->active_ops);
while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
}
} else {
rc = FALSE;
}
return rc;
}
if (lrm_state->resource_history == NULL) {
return rc;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
/* At this point we're not waiting, we're just shutting down */
when = "shutdown";
}
counter = 0;
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
if (is_rsc_active(lrm_state, entry->id) == FALSE) {
continue;
}
counter++;
if (log_level == LOG_ERR) {
crm_info("Found %s active at %s", entry->id, when);
} else {
crm_trace("Found %s active at %s", entry->id, when);
}
if (lrm_state->active_ops != NULL) {
GHashTableIter hIter;
g_hash_table_iter_init(&hIter, lrm_state->active_ops);
while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) {
crm_notice("%sction %s (%s) incomplete at %s",
pending->interval_ms == 0 ? "A" : "Recurring a",
key, pending->op_key, when);
}
}
}
}
if (counter) {
crm_err("%d resource%s active at %s",
counter, (counter == 1)? " was" : "s were", when);
}
return rc;
}
static gboolean
is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
{
rsc_history_t *entry = NULL;
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->last == NULL) {
return FALSE;
}
crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
entry->last->interval_ms, entry->last->rc);
if ((entry->last->rc == PCMK_OCF_OK)
&& pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP,
pcmk__str_casei)) {
return FALSE;
} else if (entry->last->rc == PCMK_OCF_OK
&& pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO,
pcmk__str_casei)) {
// A stricter check is too complex ... leave that to the scheduler
return FALSE;
} else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
return FALSE;
} else if ((entry->last->interval_ms == 0)
&& (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
/* Badly configured resources can't be reliably stopped */
return FALSE;
}
return TRUE;
}
static gboolean
build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
{
GHashTableIter iter;
rsc_history_t *entry = NULL;
g_hash_table_iter_init(&iter, lrm_state->resource_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
GList *gIter = NULL;
xmlNode *xml_rsc = pcmk__xe_create(rsc_list, PCMK__XE_LRM_RESOURCE);
crm_xml_add(xml_rsc, PCMK_XA_ID, entry->id);
crm_xml_add(xml_rsc, PCMK_XA_TYPE, entry->rsc.type);
crm_xml_add(xml_rsc, PCMK_XA_CLASS, entry->rsc.standard);
crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, entry->rsc.provider);
if (entry->last && entry->last->params) {
static const char *name = CRM_META "_" PCMK__META_CONTAINER;
const char *container = g_hash_table_lookup(entry->last->params,
name);
if (container) {
crm_trace("Resource %s is a part of container resource %s", entry->id, container);
crm_xml_add(xml_rsc, PCMK__META_CONTAINER, container);
}
}
controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed,
lrm_state->node_name);
controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last,
lrm_state->node_name);
for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data,
lrm_state->node_name);
}
}
return FALSE;
}
xmlNode *
controld_query_executor_state(void)
{
xmlNode *xml_state = NULL;
xmlNode *xml_data = NULL;
xmlNode *rsc_list = NULL;
pcmk__node_status_t *peer = NULL;
lrm_state_t *lrm_state = controld_get_executor_state(NULL, false);
if (!lrm_state) {
crm_err("Could not get executor state for local node");
return NULL;
}
peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any);
CRM_CHECK(peer != NULL, return NULL);
xml_state = create_node_state_update(peer,
node_update_cluster|node_update_peer,
NULL, __func__);
if (xml_state == NULL) {
return NULL;
}
xml_data = pcmk__xe_create(xml_state, PCMK__XE_LRM);
crm_xml_add(xml_data, PCMK_XA_ID, peer->xml_id);
rsc_list = pcmk__xe_create(xml_data, PCMK__XE_LRM_RESOURCES);
/* Build a list of active (not always running) resources */
build_active_RAs(lrm_state, rsc_list);
crm_log_xml_trace(xml_state, "Current executor state");
return xml_state;
}
/*!
* \internal
* \brief Map standard Pacemaker return code to operation status and OCF code
*
* \param[out] event Executor event whose status and return code should be set
* \param[in] rc Standard Pacemaker return code
*/
void
controld_rc2event(lrmd_event_data_t *event, int rc)
{
/* This is called for cleanup requests from controller peers/clients, not
* for resource actions, so no exit reason is needed.
*/
switch (rc) {
case pcmk_rc_ok:
lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
break;
case EACCES:
lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV,
PCMK_EXEC_ERROR, NULL);
break;
default:
lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
NULL);
break;
}
}
/*!
* \internal
* \brief Trigger a new transition after CIB status was deleted
*
* If a CIB status delete was not expected (as part of the transition graph),
* trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
* cluster property.
*
* \param[in] from_sys IPC name that requested the delete
* \param[in] rsc_id Resource whose status was deleted (for logging only)
*/
void
controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id)
{
if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));
crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
cib__update_node_attr(controld_globals.logger_out,
controld_globals.cib_conn, cib_none,
PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL,
"last-lrm-refresh", now_s, NULL, NULL);
free(now_s);
}
}
static void
notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
{
lrmd_event_data_t *op = NULL;
const char *from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
const char *from_host = crm_element_value(input->msg, PCMK__XA_SRC);
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "localhost"), rsc_id,
((rc == pcmk_ok)? "" : " not"));
op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE);
controld_rc2event(op, pcmk_legacy2rc(rc));
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
static gboolean
lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
{
struct delete_event_s *event = user_data;
struct pending_deletion_op_s *op = value;
if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) {
notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
return TRUE;
}
return FALSE;
}
static gboolean
lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
{
const char *rsc = user_data;
active_op_t *pending = value;
if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) {
crm_info("Removing op %s:%d for deleted resource %s",
pending->op_key, pending->call_id, rsc);
return TRUE;
}
return FALSE;
}
static void
delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input,
const char *rsc_id, GHashTableIter *rsc_iter, int rc,
const char *user_name, bool from_cib)
{
struct delete_event_s event;
CRM_CHECK(rsc_id != NULL, return);
if (rc == pcmk_ok) {
char *rsc_id_copy = pcmk__str_copy(rsc_id);
if (rsc_iter) {
g_hash_table_iter_remove(rsc_iter);
} else {
g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
}
if (from_cib) {
controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
user_name, crmd_cib_smart_opt());
}
g_hash_table_foreach_remove(lrm_state->active_ops,
lrm_remove_deleted_op, rsc_id_copy);
free(rsc_id_copy);
}
if (input) {
notify_deleted(lrm_state, input, rsc_id, rc);
}
event.rc = rc;
event.rsc = rsc_id;
event.lrm_state = lrm_state;
g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
}
static inline gboolean
last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
{
if (entry == NULL) {
return FALSE;
}
if (op == NULL) {
return TRUE;
}
return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei)
&& (interval_ms == entry->failed->interval_ms));
}
/*!
* \internal
* \brief Clear a resource's last failure
*
* Erase a resource's last failure on a particular node from both the
* LRM resource history in the CIB, and the resource history remembered
* for the LRM state.
*
* \param[in] rsc_id Resource name
* \param[in] node_name Node name
* \param[in] operation If specified, only clear if matching this operation
* \param[in] interval_ms If operation is specified, it has this interval
*/
void
lrm_clear_last_failure(const char *rsc_id, const char *node_name,
const char *operation, guint interval_ms)
{
lrm_state_t *lrm_state = controld_get_executor_state(node_name, false);
if (lrm_state == NULL) {
return;
}
if (lrm_state->resource_history != NULL) {
rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
rsc_id);
if (last_failed_matches_op(entry, operation, interval_ms)) {
lrmd_free_event(entry->failed);
entry->failed = NULL;
}
}
}
/* Returns: gboolean - cancellation is in progress */
static gboolean
cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
{
int rc = pcmk_ok;
char *local_key = NULL;
active_op_t *pending = NULL;
CRM_CHECK(op != 0, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
if (key == NULL) {
local_key = make_stop_id(rsc_id, op);
key = local_key;
}
pending = g_hash_table_lookup(lrm_state->active_ops, key);
if (pending) {
if (remove && !pcmk_is_set(pending->flags, active_op_remove)) {
controld_set_active_op_flags(pending, active_op_remove);
crm_debug("Scheduling %s for removal", key);
}
if (pcmk_is_set(pending->flags, active_op_cancelled)) {
crm_debug("Operation %s already cancelled", key);
free(local_key);
return FALSE;
}
controld_set_active_op_flags(pending, active_op_cancelled);
} else {
crm_info("No pending op found for %s", key);
free(local_key);
return FALSE;
}
crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
pending->interval_ms);
if (rc == pcmk_ok) {
crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
free(local_key);
return TRUE;
}
crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
/* The caller needs to make sure the entry is
* removed from the active operations list
*
* Usually by returning TRUE inside the worker function
* supplied to g_hash_table_foreach_remove()
*
* Not removing the entry from active operations will block
* the node from shutting down
*/
free(local_key);
return FALSE;
}
struct cancel_data {
gboolean done;
gboolean remove;
const char *key;
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct cancel_data *data = user_data;
active_op_t *op = value;
if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) {
data->done = TRUE;
remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
}
return remove;
}
static gboolean
cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
{
guint removed = 0;
struct cancel_data data;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(key != NULL, return FALSE);
data.key = key;
data.rsc = rsc;
data.done = FALSE;
data.remove = remove;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->active_ops,
cancel_action_by_key, &data);
crm_trace("Removed %u op cache entries, new size: %u",
removed, g_hash_table_size(lrm_state->active_ops));
return data.done;
}
/*!
* \internal
* \brief Retrieve resource information from LRM
*
* \param[in,out] lrm_state Executor connection state to use
* \param[in] rsc_xml XML containing resource configuration
* \param[in] do_create If true, register resource if not already
* \param[out] rsc_info Where to store information obtained from executor
*
* \retval pcmk_ok Success (and rsc_info holds newly allocated result)
* \retval -EINVAL Required information is missing from arguments
* \retval -ENOTCONN No active connection to LRM
* \retval -ENODEV Resource not found
* \retval -errno Error communicating with executor when registering resource
*
* \note Caller is responsible for freeing result on success.
*/
static int
get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml,
gboolean do_create, lrmd_rsc_info_t **rsc_info)
{
const char *id = pcmk__xe_id(rsc_xml);
CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
CRM_CHECK(id, return -EINVAL);
if (lrm_state_is_connected(lrm_state) == FALSE) {
return -ENOTCONN;
}
crm_trace("Retrieving resource information for %s from the executor", id);
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
// If resource isn't known by ID, try clone name, if provided
if (!*rsc_info) {
const char *long_id = crm_element_value(rsc_xml, PCMK__XA_LONG_ID);
if (long_id) {
*rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
}
}
if ((*rsc_info == NULL) && do_create) {
const char *class = crm_element_value(rsc_xml, PCMK_XA_CLASS);
const char *provider = crm_element_value(rsc_xml, PCMK_XA_PROVIDER);
const char *type = crm_element_value(rsc_xml, PCMK_XA_TYPE);
int rc;
crm_trace("Registering resource %s with the executor", id);
rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
lrmd_opt_drop_recurring);
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Could not register resource %s with the executor on %s: %s "
QB_XS " rc=%d",
id, lrm_state->node_name, pcmk_strerror(rc), rc);
/* Register this as an internal error if this involves the local
* executor. Otherwise, we're likely dealing with an unresponsive
* remote node, which is not an FSA failure.
*/
if (lrm_state_is_local(lrm_state) == TRUE) {
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
return rc;
}
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
}
return *rsc_info? pcmk_ok : -ENODEV;
}
static void
delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc,
GHashTableIter *iter, const char *sys, const char *user,
ha_msg_input_t *request, bool unregister, bool from_cib)
{
int rc = pcmk_ok;
crm_info("Removing resource %s from executor for %s%s%s",
id, sys, (user? " as " : ""), (user? user : ""));
if (rsc && unregister) {
rc = lrm_state_unregister_rsc(lrm_state, id, 0);
}
if (rc == pcmk_ok) {
crm_trace("Resource %s deleted from executor", id);
} else if (rc == -EINPROGRESS) {
crm_info("Deletion of resource '%s' from executor is pending", id);
if (request) {
struct pending_deletion_op_s *op = NULL;
char *ref = crm_element_value_copy(request->msg, PCMK_XA_REFERENCE);
op = pcmk__assert_alloc(1, sizeof(struct pending_deletion_op_s));
op->rsc = pcmk__str_copy(rsc->id);
op->input = copy_ha_msg_input(request);
g_hash_table_insert(lrm_state->deletion_ops, ref, op);
}
return;
} else {
crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
QB_XS " rc=%d", id, sys, (user? " as " : ""),
(user? user : ""), pcmk_strerror(rc), rc);
}
delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib);
}
static int
get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
{
int call_id = 999999999;
rsc_history_t *entry = NULL;
if(lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* Make sure the call id is greater than the last successful operation,
* otherwise the failure will not result in a possible recovery of the resource
* as it could appear the failure occurred before the successful start */
if (entry) {
call_id = entry->last_callid + 1;
}
if (call_id < 0) {
call_id = 1;
}
return call_id;
}
static void
fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
enum ocf_exitcode op_exitcode, const char *exit_reason)
{
op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
lrmd__set_result(op, op_exitcode, op_status, exit_reason);
}
static void
force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node, bool reprobe_all_nodes)
{
GHashTableIter gIter;
rsc_history_t *entry = NULL;
crm_info("Clearing resource history on node %s", lrm_state->node_name);
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
/* only unregister the resource during a reprobe if it is not a remote connection
* resource. otherwise unregistering the connection will terminate remote-node
* membership */
bool unregister = true;
if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
unregister = false;
if (reprobe_all_nodes) {
lrm_state_t *remote_lrm_state =
controld_get_executor_state(entry->id, false);
if (remote_lrm_state != NULL) {
/* If reprobing all nodes, be sure to reprobe the remote
* node before clearing its connection resource
*/
force_reprobe(remote_lrm_state, from_sys, from_host,
user_name, TRUE, reprobe_all_nodes);
}
}
}
/* Don't delete from the CIB, since we'll delete the whole node's LRM
* state from the CIB soon
*/
delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
user_name, NULL, unregister, false);
}
/* Now delete the copy in the CIB */
controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
cib_none);
}
/*!
* \internal
* \brief Fail a requested action without actually executing it
*
* For an action that can't be executed, process it similarly to an actual
* execution result, with specified error status (except for notify actions,
* which will always be treated as successful).
*
* \param[in,out] lrm_state Executor connection that action is for
* \param[in] action Action XML from request
* \param[in] rc Desired return code to use
* \param[in] op_status Desired operation status to use
* \param[in] exit_reason Human-friendly detail, if error
*/
static void
synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action,
int op_status, enum ocf_exitcode rc,
const char *exit_reason)
{
lrmd_event_data_t *op = NULL;
const char *operation = crm_element_value(action, PCMK_XA_OPERATION);
const char *target_node = crm_element_value(action, PCMK__META_ON_NODE);
xmlNode *xml_rsc = pcmk__xe_first_child(action, PCMK_XE_PRIMITIVE, NULL,
NULL);
if ((xml_rsc == NULL) || (pcmk__xe_id(xml_rsc) == NULL)) {
/* @TODO Should we do something else, like direct ack? */
crm_info("Can't fake %s failure (%d) on %s without resource configuration",
crm_element_value(action, PCMK__XA_OPERATION_KEY), rc,
target_node);
return;
} else if(operation == NULL) {
/* This probably came from crm_resource -C, nothing to do */
crm_info("Can't fake %s failure (%d) on %s without operation",
pcmk__xe_id(xml_rsc), rc, target_node);
return;
}
op = construct_op(lrm_state, action, pcmk__xe_id(xml_rsc), operation);
if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
// Notifications can't fail
fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL);
} else {
fake_op_status(lrm_state, op, op_status, rc, exit_reason);
}
crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
// Process the result as if it came from the LRM
process_lrm_event(lrm_state, op, NULL, action);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Get target of an LRM operation (replacing \p NULL with local node
* name)
*
* \param[in] xml LRM operation data XML
*
* \return LRM operation target node name (local node or Pacemaker Remote node)
*/
static const char *
lrm_op_target(const xmlNode *xml)
{
const char *target = NULL;
if (xml) {
target = crm_element_value(xml, PCMK__META_ON_NODE);
}
if (target == NULL) {
target = controld_globals.cluster->priv->node_name;
}
return target;
}
static void
fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
lrmd_event_data_t *op = NULL;
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = pcmk__xe_first_child(xml, PCMK_XE_PRIMITIVE, NULL, NULL);
CRM_CHECK(xml_rsc != NULL, return);
/* The executor simply executes operations and reports the results, without
* any concept of success or failure, so to fail a resource, we must fake
* what a failure looks like.
*
* To do this, we create a fake executor operation event for the resource,
* and pass that event to the executor client callback so it will be
* processed as if it came from the executor.
*/
op = construct_op(lrm_state, xml, pcmk__xe_id(xml_rsc), "asyncmon");
free((char*) op->user_data);
op->user_data = NULL;
op->interval_ms = 0;
if (user_name && !pcmk__is_privileged(user_name)) {
crm_err("%s does not have permission to fail %s",
user_name, pcmk__xe_id(xml_rsc));
fake_op_status(lrm_state, op, PCMK_EXEC_ERROR,
PCMK_OCF_INSUFFICIENT_PRIV,
"Unprivileged user cannot fail resources");
controld_ack_event_directly(from_host, from_sys, NULL, op,
pcmk__xe_id(xml_rsc));
lrmd_free_event(op);
return;
}
if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
crm_info("Failing resource %s...", rsc->id);
fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR,
"Simulated failure");
process_lrm_event(lrm_state, op, NULL, xml);
op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded
lrmd_free_rsc_info(rsc);
} else {
crm_info("Cannot find/create resource in order to fail it...");
crm_log_xml_warn(xml, "bad input");
fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR,
"Cannot fail unknown resource");
}
controld_ack_event_directly(from_host, from_sys, NULL, op,
pcmk__xe_id(xml_rsc));
lrmd_free_event(op);
}
static void
handle_reprobe_op(lrm_state_t *lrm_state, xmlNode *msg, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node, bool reprobe_all_nodes)
{
crm_notice("Forcing the status of all resources to be redetected");
force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node,
reprobe_all_nodes);
if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
xmlNode *reply = pcmk__new_reply(msg, NULL);
crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
pcmk__xml_free(reply);
}
}
static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
{
char *op_key = NULL;
char *meta_key = NULL;
int call = 0;
const char *call_id = NULL;
const char *op_task = NULL;
guint interval_ms = 0;
gboolean in_progress = FALSE;
xmlNode *params = pcmk__xe_first_child(input->xml, PCMK__XE_ATTRIBUTES,
NULL, NULL);
CRM_CHECK(params != NULL, return FALSE);
meta_key = crm_meta_name(PCMK_XA_OPERATION);
op_task = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(op_task != NULL, return FALSE);
meta_key = crm_meta_name(PCMK_META_INTERVAL);
if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
free(meta_key);
return FALSE;
}
free(meta_key);
op_key = pcmk__op_key(rsc->id, op_task, interval_ms);
meta_key = crm_meta_name(PCMK__XA_CALL_ID);
call_id = crm_element_value(params, meta_key);
free(meta_key);
crm_debug("Scheduler requested op %s (call=%s) be cancelled",
op_key, (call_id? call_id : "NA"));
pcmk__scan_min_int(call_id, &call, 0);
if (call == 0) {
// Normal case when the scheduler cancels a recurring op
in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
} else {
// Normal case when the scheduler cancels an orphan op
in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
}
// Acknowledge cancellation operation if for a remote connection resource
if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
char *op_id = make_stop_id(rsc->id, call);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
crm_info("Nothing known about operation %d for %s", call, op_key);
}
controld_delete_action_history_by_key(rsc->id, lrm_state->node_name,
op_key, call);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
/* needed at least for cancellation of a remote operation */
if (lrm_state->active_ops != NULL) {
g_hash_table_remove(lrm_state->active_ops, op_id);
}
free(op_id);
}
free(op_key);
return TRUE;
}
static void
do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
bool crm_rsc_delete, const char *user_name)
{
bool unregister = true;
int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
user_name,
cib_dryrun|cib_sync_call);
if (cib_rc != pcmk_rc_ok) {
lrmd_event_data_t *op = NULL;
op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE);
/* These are resource clean-ups, not actions, so no exit reason is
* needed.
*/
lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL);
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
lrmd_free_event(op);
return;
}
if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
unregister = false;
}
delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
user_name, input, unregister, true);
}
// User data for asynchronous metadata execution
struct metadata_cb_data {
lrmd_rsc_info_t *rsc; // Copy of resource information
xmlNode *input_xml; // Copy of FSA input XML
};
static struct metadata_cb_data *
new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
{
struct metadata_cb_data *data = NULL;
data = pcmk__assert_alloc(1, sizeof(struct metadata_cb_data));
data->input_xml = pcmk__xml_copy(NULL, input_xml);
data->rsc = lrmd_copy_rsc_info(rsc);
return data;
}
static void
free_metadata_cb_data(struct metadata_cb_data *data)
{
lrmd_free_rsc_info(data->rsc);
pcmk__xml_free(data->input_xml);
free(data);
}
/*!
* \internal
* \brief Execute an action after metadata has been retrieved
*
* \param[in] pid Ignored
* \param[in] result Result of metadata action
* \param[in] user_data Metadata callback data
*/
static void
metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
{
struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
struct ra_metadata_s *md = NULL;
lrm_state_t *lrm_state =
controld_get_executor_state(lrm_op_target(data->input_xml), false);
if ((lrm_state != NULL) && pcmk__result_ok(result)) {
md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
result->action_stdout);
}
if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
}
free_metadata_cb_data(data);
}
/* A_LRM_INVOKE */
void
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
lrm_state_t *lrm_state = NULL;
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *user_name = NULL;
const char *target_node = lrm_op_target(input->xml);
gboolean is_remote_node = FALSE;
bool crm_rsc_delete = FALSE;
// Message routed to the local node is targeting a specific, non-local node
is_remote_node = !controld_is_local_node(target_node);
lrm_state = controld_get_executor_state(target_node, false);
if ((lrm_state == NULL) && is_remote_node) {
crm_err("Failing action because local node has never had connection to remote node %s",
target_node);
synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR,
"Local node has no connection to remote");
return;
}
pcmk__assert(lrm_state != NULL);
user_name = pcmk__update_acl_user(input->msg, PCMK__XA_CRM_USER, NULL);
crm_op = crm_element_value(input->msg, PCMK__XA_CRM_TASK);
from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
from_host = crm_element_value(input->msg, PCMK__XA_SRC);
}
if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
crm_rsc_delete = TRUE; // from crm_resource
}
operation = PCMK_ACTION_DELETE;
} else if (input->xml != NULL) {
operation = crm_element_value(input->xml, PCMK_XA_OPERATION);
}
CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return);
crm_trace("'%s' execution request from %s as %s user",
pcmk__s(crm_op, operation),
pcmk__s(from_sys, "unknown subsystem"),
pcmk__s(user_name, "current"));
if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) {
fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
from_sys);
} else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none)
|| pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
const char *raw_target = NULL;
if (input->xml != NULL) {
// For CRM_OP_REPROBE, a NULL target means we're targeting all nodes
raw_target = crm_element_value(input->xml, PCMK__META_ON_NODE);
}
handle_reprobe_op(lrm_state, input->msg, from_sys, from_host, user_name,
is_remote_node, (raw_target == NULL));
} else if (operation != NULL) {
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = pcmk__xe_first_child(input->xml, PCMK_XE_PRIMITIVE,
NULL, NULL);
gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE,
pcmk__str_none);
int rc;
// We can't return anything meaningful without a resource ID
CRM_CHECK((xml_rsc != NULL) && (pcmk__xe_id(xml_rsc) != NULL), return);
rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
if (rc == -ENOTCONN) {
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR,
"Not connected to remote executor");
return;
} else if ((rc < 0) && !create_rsc) {
/* Delete of malformed or nonexistent resource
* (deleting something that does not exist is a success)
*/
crm_notice("Not registering resource '%s' for a %s event "
QB_XS " get-rc=%d (%s) transition-key=%s",
pcmk__xe_id(xml_rsc), operation,
rc, pcmk_strerror(rc), pcmk__xe_id(input->xml));
delete_rsc_entry(lrm_state, input, pcmk__xe_id(xml_rsc), NULL,
pcmk_ok, user_name, true);
return;
} else if (rc == -EINVAL) {
// Resource operation on malformed resource
crm_err("Invalid resource definition for %s", pcmk__xe_id(xml_rsc));
crm_log_xml_warn(input->msg, "invalid resource");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
PCMK_OCF_NOT_CONFIGURED, // fatal error
"Invalid resource definition");
return;
} else if (rc < 0) {
// Error communicating with the executor
crm_err("Could not register resource '%s' with executor: %s "
QB_XS " rc=%d",
pcmk__xe_id(xml_rsc), pcmk_strerror(rc), rc);
crm_log_xml_warn(input->msg, "failed registration");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
PCMK_OCF_INVALID_PARAM, // hard error
"Could not register resource with executor");
return;
}
if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) {
if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
crm_log_xml_warn(input->xml, "Bad command");
}
} else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE,
pcmk__str_none)) {
do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
crm_rsc_delete, user_name);
} else {
struct ra_metadata_s *md = NULL;
/* Getting metadata from cache is OK except for start actions --
* always refresh from the agent for those, in case the resource
* agent was updated.
*
* @TODO Only refresh metadata for starts if the agent actually
* changed (using something like inotify, or a hash or modification
* time of the agent executable).
*/
if (strcmp(operation, PCMK_ACTION_START) != 0) {
md = controld_get_rsc_metadata(lrm_state, rsc,
controld_metadata_from_cache);
}
if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
operation)) {
/* Most likely, we'll need the agent metadata to record the
* pending operation and the operation result. Get it now rather
* than wait until then, so the metadata action doesn't eat into
* the real action's timeout.
*
* @TODO Metadata is retrieved via direct execution of the
* agent, which has a couple of related issues: the executor
* should execute agents, not the controller; and metadata for
* Pacemaker Remote nodes should be collected on those nodes,
* not locally.
*/
struct metadata_cb_data *data = NULL;
data = new_metadata_cb_data(rsc, input->xml);
crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
rsc->id, rsc->standard,
((rsc->provider == NULL)? "" : ":"),
((rsc->provider == NULL)? "" : rsc->provider),
rsc->type);
(void) lrmd__metadata_async(rsc, metadata_complete,
(void *) data);
} else {
do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
}
}
lrmd_free_rsc_info(rsc);
} else {
crm_err("Invalid execution request: unknown command '%s' (bug?)",
crm_op);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static lrmd_event_data_t *
construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op,
const char *rsc_id, const char *operation)
{
lrmd_event_data_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
GHashTable *params = NULL;
xmlNode *primitive = NULL;
const char *class = NULL;
const char *transition = NULL;
pcmk__assert((rsc_id != NULL) && (operation != NULL));
op = lrmd_new_event(rsc_id, operation, 0);
op->type = lrmd_event_exec_complete;
op->timeout = 0;
op->start_delay = 0;
lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
if (rsc_op == NULL) {
CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP,
pcmk__str_casei));
op->user_data = NULL;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = pcmk__strkey_table(free, free);
pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
crm_trace("Constructed %s op for %s", operation, rsc_id);
return op;
}
params = xml2list(rsc_op);
g_hash_table_remove(params, CRM_META "_" PCMK__META_OP_TARGET_RC);
op_delay = crm_meta_value(params, PCMK_META_START_DELAY);
pcmk__scan_min_int(op_delay, &op->start_delay, 0);
op_timeout = crm_meta_value(params, PCMK_META_TIMEOUT);
pcmk__scan_min_int(op_timeout, &op->timeout, 0);
if (pcmk__guint_from_hash(params, CRM_META "_" PCMK_META_INTERVAL, 0,
&(op->interval_ms)) != pcmk_rc_ok) {
op->interval_ms = 0;
}
/* Use pcmk_monitor_timeout instead of meta timeout for stonith
recurring monitor, if set */
primitive = pcmk__xe_first_child(rsc_op, PCMK_XE_PRIMITIVE, NULL, NULL);
class = crm_element_value(primitive, PCMK_XA_CLASS);
if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params)
&& pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei)
&& (op->interval_ms > 0)) {
op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout");
if (op_timeout != NULL) {
long long timeout_ms = crm_get_msec(op_timeout);
op->timeout = (int) QB_MIN(timeout_ms, INT_MAX);
}
}
if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) {
op->params = params;
} else {
rsc_history_t *entry = NULL;
if (lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* If we do not have stop parameters cached, use
* whatever we are given */
if (!entry || !entry->stop_params) {
op->params = params;
} else {
/* Copy the cached parameter list so that we stop the resource
* with the old attributes, not the new ones */
op->params = pcmk__strkey_table(free, free);
g_hash_table_foreach(params, copy_meta_keys, op->params);
g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
g_hash_table_destroy(params);
params = NULL;
}
}
/* sanity */
if (op->timeout <= 0) {
op->timeout = op->interval_ms;
}
if (op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, PCMK__XA_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = pcmk__str_copy(transition);
if (op->interval_ms != 0) {
if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP,
NULL)) {
crm_err("Start and Stop actions cannot have an interval: %u",
op->interval_ms);
op->interval_ms = 0;
}
}
crm_trace("Constructed %s op for %s: interval=%u",
operation, rsc_id, op->interval_ms);
return op;
}
/*!
* \internal
* \brief Send a (synthesized) event result
*
* Reply with a synthesized event result directly, as opposed to going through
* the executor.
*
* \param[in] to_host Host to send result to
* \param[in] to_sys IPC name to send result (NULL for transition engine)
* \param[in] rsc Type information about resource the result is for
* \param[in,out] op Event with result to send
* \param[in] rsc_id ID of resource the result is for
*/
void
controld_ack_event_directly(const char *to_host, const char *to_sys,
const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op,
const char *rsc_id)
{
xmlNode *reply = NULL;
xmlNode *update, *iter;
pcmk__node_status_t *peer = NULL;
CRM_CHECK(op != NULL, return);
if (op->rsc_id == NULL) {
// op->rsc_id is a (const char *) but lrmd_free_event() frees it
pcmk__assert(rsc_id != NULL);
op->rsc_id = pcmk__str_copy(rsc_id);
}
if (to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
peer = controld_get_local_node_status();
update = create_node_state_update(peer, node_update_none, NULL,
__func__);
iter = pcmk__xe_create(update, PCMK__XE_LRM);
crm_xml_add(iter, PCMK_XA_ID, controld_globals.our_uuid);
iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCES);
iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCE);
crm_xml_add(iter, PCMK_XA_ID, op->rsc_id);
controld_add_resource_history_xml(iter, rsc, op,
controld_globals.cluster->priv->node_name);
/* We don't have the original message ID, so use "direct-ack" (we just need
* something non-NULL for this to create a reply)
*
* @TODO It would be better to use the server, message ID, and task from the
* original request when callers have it available
*/
reply = pcmk__new_message(pcmk_ipc_controld, "direct-ack", CRM_SYSTEM_LRMD,
to_host, to_sys, CRM_OP_INVOKE_LRM, update);
crm_log_xml_trace(update, "[direct ACK]");
crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
op->rsc_id, op->op_type, op->interval_ms, op->user_data,
crm_element_value(reply, PCMK_XA_REFERENCE));
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
pcmk__xml_free(update);
pcmk__xml_free(reply);
}
gboolean
verify_stopped(enum crmd_fsa_state cur_state, int log_level)
{
gboolean res = TRUE;
GList *lrm_state_list = lrm_state_get_list();
GList *state_entry;
for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
lrm_state_t *lrm_state = state_entry->data;
if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
/* keep iterating through all even when false is returned */
res = FALSE;
}
}
controld_set_fsa_input_flags(R_SENT_RSC_STOP);
g_list_free(lrm_state_list); lrm_state_list = NULL;
return res;
}
struct stop_recurring_action_s {
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct stop_recurring_action_s *event = user_data;
active_op_t *op = value;
if ((op->interval_ms != 0)
&& pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) {
crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
}
return remove;
}
static gboolean
stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
lrm_state_t *lrm_state = user_data;
active_op_t *op = value;
if (op->interval_ms != 0) {
crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
(const char *) key);
remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
}
return remove;
}
/*!
* \internal
* \brief Check whether recurring actions should be cancelled before an action
*
* \param[in] rsc_id Resource that action is for
* \param[in] action Action being performed
* \param[in] interval_ms Operation interval of \p action (in milliseconds)
*
* \return true if recurring actions should be cancelled, otherwise false
*/
static bool
should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms)
{
if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0)
&& (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) {
/* Don't stop monitoring a migrating Pacemaker Remote connection
* resource until the entire migration has completed. We must detect if
* the connection is unexpectedly severed, even during a migration.
*/
return false;
}
// Cancel recurring actions before changing resource state
return (interval_ms == 0)
&& !pcmk__str_any_of(action, PCMK_ACTION_MONITOR,
PCMK_ACTION_NOTIFY, NULL);
}
/*!
* \internal
* \brief Check whether an action should not be performed at this time
*
* \param[in] operation Action to be performed
*
* \return Readable description of why action should not be performed,
* or NULL if it should be performed
*/
static const char *
should_nack_action(const char *action)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)
&& pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) {
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
return "Not attempting start due to shutdown in progress";
}
switch (controld_globals.fsa_state) {
case S_NOT_DC:
case S_POLICY_ENGINE: // Recalculating
case S_TRANSITION_ENGINE:
break;
default:
if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) {
return "Controller cannot attempt actions at this time";
}
break;
}
return NULL;
}
static void
do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
struct ra_metadata_s *md)
{
int rc;
int call_id = 0;
char *op_id = NULL;
lrmd_event_data_t *op = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
const char *operation = NULL;
const char *nack_reason = NULL;
CRM_CHECK((rsc != NULL) && (msg != NULL), return);
operation = crm_element_value(msg, PCMK_XA_OPERATION);
CRM_CHECK(!pcmk__str_empty(operation), return);
transition = crm_element_value(msg, PCMK__XA_TRANSITION_KEY);
if (pcmk__str_empty(transition)) {
crm_log_xml_err(msg, "Missing transition number");
}
if (lrm_state == NULL) {
// This shouldn't be possible, but provide a failsafe just in case
crm_err("Cannot execute %s of %s: No executor connection "
QB_XS " transition_key=%s",
operation, rsc->id, pcmk__s(transition, ""));
synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
PCMK_OCF_UNKNOWN_ERROR,
"No executor connection");
return;
}
if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT, NULL)) {
/* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
* will schedule reload-agent actions only. In either case, we need
* to map that to whatever the resource agent actually supports.
* Default to the OCF 1.1 name.
*/
if ((md != NULL)
&& pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
operation = PCMK_ACTION_RELOAD;
} else {
operation = PCMK_ACTION_RELOAD_AGENT;
}
}
op = construct_op(lrm_state, msg, rsc->id, operation);
CRM_CHECK(op != NULL, return);
if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) {
guint removed = 0;
struct stop_recurring_action_s data;
data.rsc = rsc;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->active_ops,
stop_recurring_action_by_rsc,
&data);
if (removed) {
crm_debug("Stopped %u recurring operation%s in preparation for "
PCMK__OP_FMT, removed, pcmk__plural_s(removed),
rsc->id, operation, op->interval_ms);
}
}
/* now do the op */
crm_notice("Requesting local execution of %s operation for %s on %s "
QB_XS " transition_key=%s op_key=" PCMK__OP_FMT,
pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
lrm_state->node_name, pcmk__s(transition, ""), rsc->id,
operation, op->interval_ms);
nack_reason = should_nack_action(operation);
if (nack_reason != NULL) {
crm_notice("Discarding attempt to perform action %s on %s in state %s "
"(shutdown=%s)", operation, rsc->id,
fsa_state2string(controld_globals.fsa_state),
pcmk__flag_text(controld_globals.fsa_input_register,
R_SHUTDOWN));
lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
nack_reason);
controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
lrmd_free_event(op);
free(op_id);
return;
}
controld_record_pending_op(lrm_state->node_name, rsc, op);
op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
if (op->interval_ms > 0) {
/* cancel it so we can then restart it without conflict */
cancel_op_key(lrm_state, rsc, op_id, FALSE);
}
rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type,
op->user_data, op->interval_ms,
op->timeout, op->start_delay,
op->params, &call_id);
if (rc == pcmk_rc_ok) {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
active_op_t *pending = NULL;
pending = pcmk__assert_alloc(1, sizeof(active_op_t));
crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
pending->call_id = call_id;
pending->interval_ms = op->interval_ms;
pending->op_type = pcmk__str_copy(operation);
pending->op_key = pcmk__str_copy(op_id);
pending->rsc_id = pcmk__str_copy(rsc->id);
pending->start_time = time(NULL);
pending->user_data = pcmk__str_copy(op->user_data);
if (crm_element_value_epoch(msg, PCMK_OPT_SHUTDOWN_LOCK,
&(pending->lock_time)) != pcmk_ok) {
pending->lock_time = 0;
}
g_hash_table_replace(lrm_state->active_ops, call_id_s, pending);
if ((op->interval_ms > 0)
&& (op->start_delay > START_DELAY_THRESHOLD)) {
int target_rc = PCMK_OCF_OK;
crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL);
controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
}
pending->params = op->params;
op->params = NULL;
} else if (lrm_state_is_local(lrm_state)) {
crm_err("Could not initiate %s action for resource %s locally: %s "
QB_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc);
fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
process_lrm_event(lrm_state, op, NULL, NULL);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else {
crm_err("Could not initiate %s action for resource %s remotely on %s: "
"%s " QB_XS " rc=%d",
operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc);
fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
process_lrm_event(lrm_state, op, NULL, NULL);
}
free(op_id);
lrmd_free_event(op);
}
void
do_lrm_event(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
{
CRM_CHECK(FALSE, return);
}
static char *
unescape_newlines(const char *string)
{
char *pch = NULL;
char *ret = NULL;
static const char *escaped_newline = "\\n";
if (!string) {
return NULL;
}
ret = pcmk__str_copy(string);
pch = strstr(ret, escaped_newline);
while (pch != NULL) {
/* Replace newline escape pattern with actual newline (and a space so we
* don't have to shuffle the rest of the buffer)
*/
pch[0] = '\n';
pch[1] = ' ';
pch = strstr(pch, escaped_newline);
}
return ret;
}
static bool
did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
const char * op_type, guint interval_ms)
{
rsc_history_t *entry = NULL;
CRM_CHECK(lrm_state != NULL, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
CRM_CHECK(op_type != NULL, return FALSE);
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->failed == NULL) {
return FALSE;
}
if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none)
&& pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei)
&& entry->failed->interval_ms == interval_ms) {
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Log the result of an executor action (actual or synthesized)
*
* \param[in] op Executor action to log result for
* \param[in] op_key Operation key for action
* \param[in] node_name Name of node action was performed on, if known
* \param[in] confirmed Whether to log that graph action was confirmed
*/
static void
log_executor_event(const lrmd_event_data_t *op, const char *op_key,
const char *node_name, gboolean confirmed)
{
int log_level = LOG_ERR;
GString *str = g_string_sized_new(100); // reasonable starting size
pcmk__g_strcat(str,
"Result of ",
pcmk__readable_action(op->op_type, op->interval_ms),
" operation for ", op->rsc_id, NULL);
if (node_name != NULL) {
pcmk__g_strcat(str, " on ", node_name, NULL);
}
switch (op->op_status) {
case PCMK_EXEC_DONE:
log_level = LOG_NOTICE;
- pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL);
+ pcmk__g_strcat(str, ": ", crm_exit_str((crm_exit_t) op->rc), NULL);
break;
case PCMK_EXEC_TIMEOUT:
pcmk__g_strcat(str,
": ", pcmk_exec_status_str(op->op_status), " after ",
pcmk__readable_interval(op->timeout), NULL);
break;
case PCMK_EXEC_CANCELLED:
log_level = LOG_INFO;
/* order of __attribute__ and Fall through comment is IMPORTANT!
* do not change it without proper testing with both clang and gcc
* in multiple versions.
* the clang check allows to build with all versions of clang.
* the has_c_attribute check is to workaround a bug in clang version
* in rhel7. has_attribute would happily return "YES SIR WE GOT IT"
* and fail the build the next line.
*/
#ifdef __clang__
#ifdef __has_c_attribute
#if __has_attribute(fallthrough)
__attribute__((fallthrough));
#endif
#endif
#endif
// Fall through
default:
pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
NULL);
}
if ((op->exit_reason != NULL)
&& ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) {
pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL);
}
g_string_append(str, " " QB_XS);
g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s",
(confirmed? "" : "un"), op->call_id, op_key);
if (op->op_status == PCMK_EXEC_DONE) {
g_string_append_printf(str, " rc=%d", op->rc);
}
do_crm_log(log_level, "%s", str->str);
g_string_free(str, TRUE);
/* The services library has already logged the output at info or debug
* level, so just raise to notice if it looks like a failure.
*/
if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
op->rsc_id, op->op_type,
op->interval_ms, node_name);
crm_log_output(LOG_NOTICE, prefix, op->output);
free(prefix);
}
}
void
process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
active_op_t *pending, const xmlNode *action_xml)
{
char *op_id = NULL;
char *op_key = NULL;
gboolean remove = FALSE;
gboolean removed = FALSE;
bool need_direct_ack = FALSE;
lrmd_rsc_info_t *rsc = NULL;
const char *node_name = NULL;
CRM_CHECK(op != NULL, return);
CRM_CHECK(op->rsc_id != NULL, return);
// Remap new status codes for older DCs
if (compare_version(controld_globals.dc_version, "3.2.0") < 0) {
switch (op->op_status) {
case PCMK_EXEC_NOT_CONNECTED:
lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED,
PCMK_EXEC_ERROR, op->exit_reason);
break;
case PCMK_EXEC_INVALID:
lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR,
op->exit_reason);
break;
default:
break;
}
}
op_id = make_stop_id(op->rsc_id, op->call_id);
op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
// Get resource info if available (from executor state or action XML)
if (lrm_state) {
rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
}
if ((rsc == NULL) && action_xml) {
xmlNode *xml = pcmk__xe_first_child(action_xml, PCMK_XE_PRIMITIVE, NULL,
NULL);
const char *standard = crm_element_value(xml, PCMK_XA_CLASS);
const char *provider = crm_element_value(xml, PCMK_XA_PROVIDER);
const char *type = crm_element_value(xml, PCMK_XA_TYPE);
if (standard && type) {
crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
op->rsc_id, standard,
(provider? ":" : ""), (provider? provider : ""), type);
rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
} else {
crm_err("Can't process %s result because %s agent information not cached or in XML",
op_key, op->rsc_id);
}
}
// Get node name if available (from executor state or action XML)
if (lrm_state) {
node_name = lrm_state->node_name;
} else if (action_xml) {
node_name = crm_element_value(action_xml, PCMK__META_ON_NODE);
}
if(pending == NULL) {
remove = TRUE;
if (lrm_state) {
pending = g_hash_table_lookup(lrm_state->active_ops, op_id);
}
}
if (op->op_status == PCMK_EXEC_ERROR) {
switch(op->rc) {
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_PROMOTED:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_PROMOTED:
// Leave it to the TE/scheduler to decide if this is an error
op->op_status = PCMK_EXEC_DONE;
break;
default:
/* Nothing to do */
break;
}
}
if (op->op_status != PCMK_EXEC_CANCELLED) {
/* We might not record the result, so directly acknowledge it to the
* originator instead, so it doesn't time out waiting for the result
* (especially important if part of a transition).
*/
need_direct_ack = TRUE;
if (controld_action_is_recordable(op->op_type)) {
if (node_name && rsc) {
// We should record the result, and happily, we can
time_t lock_time = (pending == NULL)? 0 : pending->lock_time;
controld_update_resource_history(node_name, rsc, op, lock_time);
need_direct_ack = FALSE;
} else if (op->rsc_deleted) {
/* We shouldn't record the result (likely the resource was
* refreshed, cleaned, or removed while this operation was
* in flight).
*/
crm_notice("Not recording %s result in CIB because "
"resource information was removed since it was initiated",
op_key);
} else {
/* This shouldn't be possible; the executor didn't consider the
* resource deleted, but we couldn't find resource or node
* information.
*/
crm_err("Unable to record %s result in CIB: %s", op_key,
(node_name? "No resource information" : "No node name"));
}
}
} else if (op->interval_ms == 0) {
/* A non-recurring operation was cancelled. Most likely, the
* never-initiated action was removed from the executor's pending
* operations list upon resource removal.
*/
need_direct_ack = TRUE;
} else if (pending == NULL) {
/* This recurring operation was cancelled, but was not pending. No
* transition actions are waiting on it, nothing needs to be done.
*/
} else if (op->user_data == NULL) {
/* This recurring operation was cancelled and pending, but we don't
* have a transition key. This should never happen.
*/
crm_err("Recurring operation %s was cancelled without transition information",
op_key);
} else if (pcmk_is_set(pending->flags, active_op_remove)) {
/* This recurring operation was cancelled (by us) and pending, and we
* have been waiting for it to finish.
*/
if (lrm_state) {
controld_delete_action_history(op);
}
/* Directly acknowledge failed recurring actions here. The above call to
* controld_delete_action_history() will not erase any corresponding
* last_failure entry, which means that the DC won't confirm the
* cancellation via process_op_deletion(), and the transition would
* otherwise wait for the action timer to pop.
*/
if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
pending->op_type, pending->interval_ms)) {
need_direct_ack = TRUE;
}
} else if (op->rsc_deleted) {
/* This recurring operation was cancelled (but not by us, and the
* executor does not have resource information, likely due to resource
* cleanup, refresh, or removal) and pending.
*/
crm_debug("Recurring op %s was cancelled due to resource deletion",
op_key);
need_direct_ack = TRUE;
} else {
/* This recurring operation was cancelled (but not by us, likely by the
* executor before stopping the resource) and pending. We don't need to
* do anything special.
*/
}
if (need_direct_ack) {
controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
}
if(remove == FALSE) {
/* The caller will do this afterwards, but keep the logging consistent */
removed = TRUE;
} else if (lrm_state && ((op->interval_ms == 0)
|| (op->op_status == PCMK_EXEC_CANCELLED))) {
gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id);
if (op->interval_ms != 0) {
removed = TRUE;
} else if (found) {
removed = TRUE;
crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
op_key, op->call_id, op_id,
g_hash_table_size(lrm_state->active_ops));
}
}
log_executor_event(op, op_key, node_name, removed);
if (lrm_state) {
if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA,
pcmk__str_casei)) {
crmd_alert_resource_op(lrm_state->node_name, op);
} else if (rsc && (op->rc == PCMK_OCF_OK)) {
char *metadata = unescape_newlines(op->output);
controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
free(metadata);
}
}
if (op->rsc_deleted) {
crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
if (lrm_state) {
delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL,
true);
}
}
/* If a shutdown was escalated while operations were pending,
* then the FSA will be stalled right now... allow it to continue
*/
controld_trigger_fsa();
if (lrm_state && rsc) {
update_history_cache(lrm_state, rsc, op);
}
lrmd_free_rsc_info(rsc);
free(op_key);
free(op_id);
}
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 76c2af48f9..39354b77d8 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -1,1479 +1,1479 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#define REMOTE_LRMD_RA "remote"
/* The max start timeout before cmd retry */
#define MAX_START_TIMEOUT_MS 10000
#define cmd_set_flags(cmd, flags_to_set) do { \
(cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define cmd_clear_flags(cmd, flags_to_clear) do { \
(cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_cmd_status {
cmd_reported_success = (1 << 0),
cmd_cancel = (1 << 1),
};
typedef struct remote_ra_cmd_s {
/*! the local node the cmd is issued from */
char *owner;
/*! the remote node the cmd is executed on */
char *rsc_id;
/*! the action to execute */
char *action;
/*! some string the client wants us to give it back */
char *userdata;
/*! start delay in ms */
int start_delay;
/*! timer id used for start delay. */
int delay_id;
/*! timeout in ms for cmd */
int timeout;
int remaining_timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
int interval_id;
int monitor_timeout_id;
int takeover_timeout_id;
/*! action parameters */
lrmd_key_value_t *params;
pcmk__action_result_t result;
int call_id;
time_t start_time;
uint32_t status;
} remote_ra_cmd_t;
#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_status {
expect_takeover = (1 << 0),
takeover_complete = (1 << 1),
remote_active = (1 << 2),
/* Maintenance mode is difficult to determine from the controller's context,
* so we have it signalled back with the transition from the scheduler.
*/
remote_in_maint = (1 << 3),
/* Similar for whether we are controlling a guest node or remote node.
* Fortunately there is a meta-attribute in the transition already and
* as the situation doesn't change over time we can use the
* resource start for noting down the information for later use when
* the attributes aren't at hand.
*/
controlling_guest = (1 << 4),
};
typedef struct remote_ra_data_s {
crm_trigger_t *work;
remote_ra_cmd_t *cur_cmd;
GList *cmds;
GList *recurring_cmds;
uint32_t status;
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
static GList *fail_all_monitor_cmds(GList * list);
static void
free_cmd(gpointer user_data)
{
remote_ra_cmd_t *cmd = user_data;
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
}
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
}
if (cmd->takeover_timeout_id) {
g_source_remove(cmd->takeover_timeout_id);
}
free(cmd->owner);
free(cmd->rsc_id);
free(cmd->action);
free(cmd->userdata);
pcmk__reset_result(&(cmd->result));
lrmd_key_value_freeall(cmd->params);
free(cmd);
}
static int
generate_callid(void)
{
static int remote_ra_callid = 0;
remote_ra_callid++;
if (remote_ra_callid <= 0) {
remote_ra_callid = 1;
}
return remote_ra_callid;
}
static gboolean
recurring_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->interval_id = 0;
connection_rsc = controld_get_executor_state(cmd->rsc_id, false);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->delay_id = 0;
connection_rsc = controld_get_executor_state(cmd->rsc_id, false);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static bool
should_purge_attributes(pcmk__node_status_t *node)
{
pcmk__node_status_t *conn_node = NULL;
lrm_state_t *connection_rsc = NULL;
if ((node->conn_host == NULL) || (node->name == NULL)) {
return true;
}
/* Get the node that was hosting the remote connection resource from the
* peer cache. That's the one we really care about here.
*/
conn_node = pcmk__get_node(0, node->conn_host, NULL,
pcmk__node_search_cluster_member);
if (conn_node == NULL) {
return true;
}
/* Check the uptime of connection_rsc. If it hasn't been running long
* enough, set purge=true. "Long enough" means it started running earlier
* than the timestamp when we noticed it went away in the first place.
*/
connection_rsc = controld_get_executor_state(node->name, false);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
time_t uptime = lrmd__uptime(lrm);
time_t now = time(NULL);
/* Add 20s of fuzziness to give corosync a while to notice the remote
* host is gone. On various error conditions (failure to get uptime,
* peer_lost isn't set) we default to purging.
*/
if (uptime > 0 &&
conn_node->peer_lost > 0 &&
uptime + 20 >= now - conn_node->peer_lost) {
return false;
}
}
return true;
}
static enum controld_section_e
section_to_delete(bool purge)
{
if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
if (purge) {
return controld_section_all_unlocked;
} else {
return controld_section_lrm_unlocked;
}
} else {
if (purge) {
return controld_section_all;
} else {
return controld_section_lrm;
}
}
}
static void
purge_remote_node_attrs(int call_opt, pcmk__node_status_t *node)
{
bool purge = should_purge_attributes(node);
enum controld_section_e section = section_to_delete(purge);
/* Purge node from attrd's memory */
if (purge) {
update_attrd_remote_node_removed(node->name, NULL);
}
controld_delete_node_state(node->name, section, call_opt);
}
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node joining
*
* \param[in] node_name Name of newly integrated pacemaker_remote node
*/
static void
remote_node_up(const char *node_name)
{
int call_opt;
xmlNode *update, *state;
pcmk__node_status_t *node = NULL;
lrm_state_t *connection_rsc = NULL;
CRM_CHECK(node_name != NULL, return);
crm_info("Announcing Pacemaker Remote node %s", node_name);
call_opt = crmd_cib_smart_opt();
/* Delete node's CRM_OP_PROBED attribute. Deleting any attribute ensures
* that the attribute manager learns the node is remote. Deletion of this
* specfic attribute is a holdover from when it had special meaning.
*
* @COMPAT Find another way to tell attrd that the node is remote, without
* risking deletion or overwrite of an arbitrary attribute. Then work on
* deprecating CRM_OP_PROBED.
*/
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
/* Ensure node is in the remote peer cache with member status */
node = pcmk__cluster_lookup_remote_node(node_name);
CRM_CHECK((node != NULL) && (node->name != NULL), return);
purge_remote_node_attrs(call_opt, node);
pcmk__update_peer_state(__func__, node, PCMK_VALUE_MEMBER, 0);
/* Apply any start state that we were given from the environment on the
* remote node.
*/
connection_rsc = controld_get_executor_state(node->name, false);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
const char *start_state = lrmd__node_start_state(lrm);
if (start_state) {
set_join_state(start_state, node->name, node->xml_id, true);
}
}
/* pacemaker_remote nodes don't participate in the membership layer,
* so cluster nodes don't automatically get notified when they come and go.
* We send a cluster message to the DC, and update the CIB node state entry,
* so the DC will get it sooner (via message) or later (via CIB refresh),
* and any other interested parties can query the CIB.
*/
broadcast_remote_state_message(node_name, true);
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
state = create_node_state_update(node, node_update_cluster, update,
__func__);
/* Clear the PCMK__XA_NODE_FENCED flag in the node state. If the node ever
* needs to be fenced, this flag will allow various actions to determine
* whether the fencing has happened yet.
*/
crm_xml_add(state, PCMK__XA_NODE_FENCED, "0");
/* TODO: If the remote connection drops, and this (async) CIB update either
* failed or has not yet completed, later actions could mistakenly think the
* node has already been fenced (if the PCMK__XA_NODE_FENCED attribute was
* previously set, because it won't have been cleared). This could prevent
* actual fencing or allow recurring monitor failures to be cleared too
* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
*/
controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
pcmk__xml_free(update);
}
enum down_opts {
DOWN_KEEP_LRM,
DOWN_ERASE_LRM
};
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
* \param[in] opts Whether to keep or erase LRM history
*/
static void
remote_node_down(const char *node_name, const enum down_opts opts)
{
xmlNode *update;
int call_opt = crmd_cib_smart_opt();
pcmk__node_status_t *node = NULL;
/* Purge node from attrd's memory */
update_attrd_remote_node_removed(node_name, NULL);
/* Normally, only node attributes should be erased, and the resource history
* should be kept until the node comes back up. However, after a successful
* fence, we want to clear the history as well, so we don't think resources
* are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
controld_delete_node_state(node_name, controld_section_all, call_opt);
} else {
controld_delete_node_state(node_name, controld_section_attrs, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
node = pcmk__cluster_lookup_remote_node(node_name);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, PCMK__VALUE_LOST, 0);
/* Notify DC */
broadcast_remote_state_message(node_name, false);
/* Update CIB node state */
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
create_node_state_update(node, node_update_cluster, update, __func__);
controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
pcmk__xml_free(update);
}
/*!
* \internal
* \brief Handle effects of a remote RA command on node state
*
* \param[in] cmd Completed remote RA command
*/
static void
check_remote_node_state(const remote_ra_cmd_t *cmd)
{
/* Only successful actions can change node state */
if (!pcmk__result_ok(&(cmd->result))) {
return;
}
if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
remote_node_up(cmd->rsc_id);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_casei)) {
/* After a successful migration, we don't need to do remote_node_up()
* because the DC already knows the node is up, and we don't want to
* clear LRM history etc. We do need to add the remote node to this
* host's remote peer cache, because (unless it happens to be DC)
* it hasn't been tracking the remote node, and other code relies on
* the cache to distinguish remote nodes from unseen cluster nodes.
*/
pcmk__node_status_t *node =
pcmk__cluster_lookup_remote_node(cmd->rsc_id);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, PCMK_VALUE_MEMBER, 0);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
lrm_state_t *lrm_state = controld_get_executor_state(cmd->rsc_id,
false);
remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
if (ra_data) {
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* Stop means down if we didn't successfully migrate elsewhere */
remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
* un-cache the node, so we don't have stale info
*/
pcmk__cluster_forget_remote_node(cmd->rsc_id);
}
}
}
/* We don't do anything for successful monitors, which is correct for
* routine recurring monitors, and for monitors on nodes where the
* connection isn't supposed to be (the cluster will stop the connection in
* that case). However, if the initial probe finds the connection already
* active on the node where we want it, we probably should do
* remote_node_up(). Unfortunately, we can't distinguish that case here.
* Given that connections have to be initiated by the cluster, the chance of
* that should be close to zero.
*/
}
static void
report_remote_ra_result(remote_ra_cmd_t * cmd)
{
lrmd_event_data_t op = { 0, };
check_remote_node_state(cmd);
op.type = lrmd_event_exec_complete;
op.rsc_id = cmd->rsc_id;
op.op_type = cmd->action;
op.user_data = cmd->userdata;
op.timeout = cmd->timeout;
op.interval_ms = cmd->interval_ms;
op.t_run = cmd->start_time;
op.t_rcchange = cmd->start_time;
lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
cmd->result.exit_reason);
if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
op.t_rcchange = time(NULL);
/* This edge case will likely never ever occur, but if it does the
* result is that a failure will not be processed correctly. This is only
* remotely possible because we are able to detect a connection resource's tcp
* connection has failed at any moment after start has completed. The actual
* recurring operation is just a connectivity ping.
*
* basically, we are not guaranteed that the first successful monitor op and
* a subsequent failed monitor op will not occur in the same timestamp. We have to
* make it look like the operations occurred at separate times though. */
if (op.t_rcchange == op.t_run) {
op.t_rcchange++;
}
}
if (cmd->params) {
lrmd_key_value_t *tmp;
op.params = pcmk__strkey_table(free, free);
for (tmp = cmd->params; tmp; tmp = tmp->next) {
pcmk__insert_dup(op.params, tmp->key, tmp->value);
}
}
op.call_id = cmd->call_id;
op.remote_nodename = cmd->owner;
lrm_op_callback(&op);
if (op.params) {
g_hash_table_destroy(op.params);
}
lrmd__reset_result(&op);
}
static void
update_remaining_timeout(remote_ra_cmd_t * cmd)
{
cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
}
static gboolean
retry_start_cmd_cb(gpointer data)
{
lrm_state_t *lrm_state = data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = ETIME;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
}
cmd = ra_data->cur_cmd;
if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
return FALSE;
}
update_remaining_timeout(cmd);
if (cmd->remaining_timeout > 0) {
rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
} else {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Not enough time remains to retry remote connection");
}
if (rc != pcmk_rc_ok) {
report_remote_ra_result(cmd);
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
ra_data->cur_cmd = NULL;
free_cmd(cmd);
} else {
/* wait for connection event */
}
return FALSE;
}
static gboolean
connection_takeover_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
crm_info("takeover event timed out for node %s", cmd->rsc_id);
cmd->takeover_timeout_id = 0;
lrm_state = controld_get_executor_state(cmd->rsc_id, false);
handle_remote_ra_stop(lrm_state, cmd);
free_cmd(cmd);
return FALSE;
}
static gboolean
monitor_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
lrm_state = controld_get_executor_state(cmd->rsc_id, false);
crm_info("Timed out waiting for remote poke response from %s%s",
cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
"Remote executor did not respond");
if (lrm_state && lrm_state->remote_ra_data) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (ra_data->cur_cmd == cmd) {
ra_data->cur_cmd = NULL;
}
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
}
report_remote_ra_result(cmd);
free_cmd(cmd);
if(lrm_state) {
lrm_state_disconnect(lrm_state);
}
return FALSE;
}
static void
synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
{
lrmd_event_data_t op = { 0, };
if (lrm_state == NULL) {
/* if lrm_state not given assume local */
lrm_state = controld_get_executor_state(NULL, false);
}
pcmk__assert(lrm_state != NULL);
op.type = lrmd_event_exec_complete;
op.rsc_id = rsc_id;
op.op_type = op_type;
op.t_run = time(NULL);
op.t_rcchange = op.t_run;
op.call_id = generate_callid();
lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
process_lrm_event(lrm_state, &op, NULL, NULL);
}
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
gboolean cmd_handled = FALSE;
lrm_state_t *lrm_state = NULL;
remote_ra_data_t *ra_data = NULL;
remote_ra_cmd_t *cmd = NULL;
CRM_CHECK((op != NULL) && (op->remote_nodename != NULL), return);
crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
"(%d) status=%s (%d)",
(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
lrmd_event_type2str(op->type), op->remote_nodename,
- services_ocf_exitcode_str(op->rc), op->rc,
+ crm_exit_str((crm_exit_t) op->rc), op->rc,
pcmk_exec_status_str(op->op_status), op->op_status);
lrm_state = controld_get_executor_state(op->remote_nodename, false);
if (!lrm_state || !lrm_state->remote_ra_data) {
crm_debug("No state information found for remote connection event");
return;
}
ra_data = lrm_state->remote_ra_data;
if (op->type == lrmd_event_new_client) {
// Another client has connected to the remote daemon
if (pcmk_is_set(ra_data->status, expect_takeover)) {
// Great, we knew this was coming
lrm_remote_clear_flags(lrm_state, expect_takeover);
lrm_remote_set_flags(lrm_state, takeover_complete);
} else {
crm_err("Disconnecting from Pacemaker Remote node %s due to "
"unexpected client takeover", op->remote_nodename);
/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
/* Do not free lrm_state->conn yet. */
/* It'll be freed in the following stop action. */
lrm_state_disconnect_only(lrm_state);
}
return;
}
/* filter all EXEC events up */
if (op->type == lrmd_event_exec_complete) {
if (pcmk_is_set(ra_data->status, takeover_complete)) {
crm_debug("ignoring event, this connection is taken over by another node");
} else {
lrm_op_callback(op);
}
return;
}
if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
if (!pcmk_is_set(ra_data->status, remote_active)) {
crm_debug("Disconnection from Pacemaker Remote node %s complete",
lrm_state->node_name);
} else if (!remote_ra_is_in_maintenance(lrm_state)) {
crm_err("Lost connection to Pacemaker Remote node %s",
lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name,
PCMK_ACTION_STOP);
}
return;
}
if (!ra_data->cur_cmd) {
crm_debug("no event to match");
return;
}
cmd = ra_data->cur_cmd;
/* Start actions and migrate from actions complete after connection
* comes back to us. */
if ((op->type == lrmd_event_connect)
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
if (op->connection_rc < 0) {
update_remaining_timeout(cmd);
if ((op->connection_rc == -ENOKEY)
|| (op->connection_rc == -EKEYREJECTED)) {
// Hard error, don't retry
pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
PCMK_EXEC_ERROR,
pcmk_strerror(op->connection_rc));
} else if (cmd->remaining_timeout > 3000) {
crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
crm_trace("can't reschedule start, remaining timeout too small %d",
cmd->remaining_timeout);
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"%s without enough time to retry",
pcmk_strerror(op->connection_rc));
}
} else {
lrm_state_reset_tables(lrm_state, TRUE);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
lrm_remote_set_flags(lrm_state, remote_active);
}
crm_debug("Remote connection event matched %s action", cmd->action);
report_remote_ra_result(cmd);
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_poke)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
cmd->monitor_timeout_id = 0;
}
/* Only report success the first time, after that only worry about failures.
* For this function, if we get the poke pack, it is always a success. Pokes
* only fail if the send fails, or the response times out. */
if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
cmd_set_flags(cmd, cmd_reported_success);
}
crm_debug("Remote poke event matched %s action", cmd->action);
/* success, keep rescheduling if interval is present. */
if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
cmd->interval_id = g_timeout_add(cmd->interval_ms,
recurring_helper, cmd);
cmd = NULL; /* prevent free */
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_disconnect)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (pcmk_is_set(ra_data->status, remote_active) &&
!pcmk_is_set(cmd->status, cmd_cancel)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Remote connection unexpectedly dropped "
"during monitor");
report_remote_ra_result(cmd);
crm_err("Remote connection to %s unexpectedly dropped during monitor",
lrm_state->node_name);
}
cmd_handled = TRUE;
} else {
crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
}
if (cmd_handled) {
ra_data->cur_cmd = NULL;
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
free_cmd(cmd);
}
}
static void
handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
{
remote_ra_data_t *ra_data = NULL;
pcmk__assert(lrm_state != NULL);
ra_data = lrm_state->remote_ra_data;
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* delete pending ops when ever the remote connection is intentionally stopped */
g_hash_table_remove_all(lrm_state->active_ops);
} else {
/* we no longer hold the history if this connection has been migrated,
* however, we keep metadata cache for future use */
lrm_state_reset_tables(lrm_state, FALSE);
}
lrm_remote_clear_flags(lrm_state, remote_active);
lrm_state_disconnect(lrm_state);
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
ra_data->cmds = NULL;
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
if (cmd) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
}
// \return Standard Pacemaker return code
static int
handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
{
const char *server = NULL;
lrmd_key_value_t *tmp = NULL;
int port = 0;
int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
int rc = pcmk_rc_ok;
for (tmp = cmd->params; tmp; tmp = tmp->next) {
if (pcmk__strcase_any_of(tmp->key,
PCMK_REMOTE_RA_ADDR, PCMK_REMOTE_RA_SERVER,
NULL)) {
server = tmp->value;
} else if (pcmk__str_eq(tmp->key, PCMK_REMOTE_RA_PORT,
pcmk__str_none)) {
port = atoi(tmp->value);
} else if (pcmk__str_eq(tmp->key, CRM_META "_" PCMK__META_CONTAINER,
pcmk__str_none)) {
lrm_remote_set_flags(lrm_state, controlling_guest);
}
}
rc = controld_connect_remote_executor(lrm_state, server, port,
timeout_used);
if (rc != pcmk_rc_ok) {
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Could not connect to Pacemaker Remote node %s: %s",
lrm_state->node_name, pcmk_rc_str(rc));
}
return rc;
}
static gboolean
handle_remote_ra_exec(gpointer user_data)
{
int rc = 0;
lrm_state_t *lrm_state = user_data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd;
GList *first = NULL;
if (ra_data->cur_cmd) {
/* still waiting on previous cmd */
return TRUE;
}
while (ra_data->cmds) {
first = ra_data->cmds;
cmd = first->data;
if (cmd->delay_id) {
/* still waiting for start delay timer to trip */
return TRUE;
}
ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
g_list_free_1(first);
if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
if (handle_remote_ra_start(lrm_state, cmd,
cmd->timeout) == pcmk_rc_ok) {
/* take care of this later when we get async connection result */
crm_debug("Initiated async remote connection, %s action will complete after connect event",
cmd->action);
ra_data->cur_cmd = cmd;
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) {
if (lrm_state_is_connected(lrm_state) == TRUE) {
rc = lrm_state_poke_connection(lrm_state);
if (rc < 0) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, pcmk_strerror(rc));
}
} else {
rc = -1;
pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
PCMK_EXEC_DONE, "Remote connection inactive");
}
if (rc == 0) {
crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
cmd->rsc_id);
ra_data->cur_cmd = cmd;
cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) {
if (pcmk_is_set(ra_data->status, expect_takeover)) {
/* briefly wait on stop for the takeover event to occur. If the
* takeover event does not occur during the wait period, that's fine.
* It just means that the remote-node's lrm_status section is going to get
* cleared which will require all the resources running in the remote-node
* to be explicitly re-detected via probe actions. If the takeover does occur
* successfully, then we can leave the status section intact. */
cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
ra_data->cur_cmd = cmd;
return TRUE;
}
handle_remote_ra_stop(lrm_state, cmd);
} else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) {
lrm_remote_clear_flags(lrm_state, takeover_complete);
lrm_remote_set_flags(lrm_state, expect_takeover);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT, NULL)) {
/* Currently the only reloadable parameter is
* PCMK_REMOTE_RA_RECONNECT_INTERVAL, which is only used by the
* scheduler via the CIB, so reloads are a no-op.
*
* @COMPAT DC <2.1.0: We only need to check for "reload" in case
* we're in a rolling upgrade with a DC scheduling "reload" instead
* of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway,
* so this would work for that purpose as well.
*/
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
free_cmd(cmd);
}
return TRUE;
}
static void
remote_ra_data_init(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = NULL;
if (lrm_state->remote_ra_data) {
return;
}
ra_data = pcmk__assert_alloc(1, sizeof(remote_ra_data_t));
ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
lrm_state->remote_ra_data = ra_data;
}
void
remote_ra_cleanup(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (!ra_data) {
return;
}
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
mainloop_destroy_trigger(ra_data->work);
free(ra_data);
lrm_state->remote_ra_data = NULL;
}
gboolean
is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
{
if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
return TRUE;
}
return (id != NULL) && (controld_get_executor_state(id, false) != NULL)
&& !controld_is_local_node(id);
}
lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
{
lrmd_rsc_info_t *info = NULL;
CRM_CHECK(rsc_id != NULL, return NULL);
if (controld_get_executor_state(rsc_id, false) != NULL) {
info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t));
info->id = pcmk__str_copy(rsc_id);
info->type = pcmk__str_copy(REMOTE_LRMD_RA);
info->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_OCF);
info->provider = pcmk__str_copy("pacemaker");
}
return info;
}
static gboolean
is_remote_ra_supported_action(const char *action)
{
return pcmk__str_any_of(action,
PCMK_ACTION_START,
PCMK_ACTION_STOP,
PCMK_ACTION_MONITOR,
PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_RELOAD,
NULL);
}
static GList *
fail_all_monitor_cmds(GList * list)
{
GList *rm_list = NULL;
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms > 0)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
rm_list = g_list_append(rm_list, cmd);
}
}
for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, "Lost connection to remote executor");
crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
report_remote_ra_result(cmd);
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
/* frees only the list data, not the cmds */
g_list_free(rm_list);
return list;
}
static GList *
remove_cmd(GList * list, const char *action, guint interval_ms)
{
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
break;
}
cmd = NULL;
}
if (cmd) {
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
return list;
}
int
remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_data_t *ra_data = NULL;
CRM_CHECK(rsc_id != NULL, return -EINVAL);
connection_rsc = controld_get_executor_state(rsc_id, false);
if (!connection_rsc || !connection_rsc->remote_ra_data) {
return -EINVAL;
}
ra_data = connection_rsc->remote_ra_data;
ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
interval_ms);
if (ra_data->cur_cmd &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
(pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
}
return 0;
}
static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
const char *userdata)
{
GList *gIter = NULL;
remote_ra_cmd_t *cmd = NULL;
/* there are 3 places a potential duplicate monitor operation
* could exist.
* 1. recurring_cmds list. where the op is waiting for its next interval
* 2. cmds list, where the op is queued to get executed immediately
* 3. cur_cmd, which means the monitor op is in flight right now.
*/
if (interval_ms == 0) {
return NULL;
}
if (ra_data->cur_cmd &&
!pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
(ra_data->cur_cmd->interval_ms == interval_ms)
&& pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
cmd = ra_data->cur_cmd;
goto handle_dup;
}
for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
return NULL;
handle_dup:
crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms);
/* update the userdata */
if (userdata) {
free(cmd->userdata);
cmd->userdata = pcmk__str_copy(userdata);
}
/* if we've already reported success, generate a new call id */
if (pcmk_is_set(cmd->status, cmd_reported_success)) {
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
cmd_clear_flags(cmd, cmd_reported_success);
}
/* if we have an interval_id set, that means we are in the process of
* waiting for this cmd's next interval. instead of waiting, cancel
* the timer and execute the action immediately */
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
cmd->interval_id = 0;
recurring_helper(cmd);
}
return cmd;
}
/*!
* \internal
* \brief Execute an action using the (internal) ocf:pacemaker:remote agent
*
* \param[in] lrm_state Executor state object for remote connection
* \param[in] rsc_id Connection resource ID
* \param[in] action Action to execute
* \param[in] userdata String to copy and pass to execution callback
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] timeout_ms Action timeout (in milliseconds)
* \param[in] start_delay_ms Delay (in milliseconds) before executing action
* \param[in,out] params Connection resource parameters
* \param[out] call_id Where to store call ID on success
*
* \return Standard Pacemaker return code
* \note This takes ownership of \p params, which should not be used or freed
* after calling this function.
*/
int
controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
const char *action, const char *userdata,
guint interval_ms, int timeout_ms,
int start_delay_ms, lrmd_key_value_t *params,
int *call_id)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_cmd_t *cmd = NULL;
remote_ra_data_t *ra_data = NULL;
*call_id = 0;
CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
&& (userdata != NULL) && (call_id != NULL),
lrmd_key_value_freeall(params); return EINVAL);
if (!is_remote_ra_supported_action(action)) {
lrmd_key_value_freeall(params);
return EOPNOTSUPP;
}
connection_rsc = controld_get_executor_state(rsc_id, false);
if (connection_rsc == NULL) {
lrmd_key_value_freeall(params);
return ENOTCONN;
}
remote_ra_data_init(connection_rsc);
ra_data = connection_rsc->remote_ra_data;
cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
if (cmd) {
*call_id = cmd->call_id;
lrmd_key_value_freeall(params);
return pcmk_rc_ok;
}
cmd = pcmk__assert_alloc(1, sizeof(remote_ra_cmd_t));
cmd->owner = pcmk__str_copy(lrm_state->node_name);
cmd->rsc_id = pcmk__str_copy(rsc_id);
cmd->action = pcmk__str_copy(action);
cmd->userdata = pcmk__str_copy(userdata);
cmd->interval_ms = interval_ms;
cmd->timeout = timeout_ms;
cmd->start_delay = start_delay_ms;
cmd->params = params;
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
*call_id = cmd->call_id;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Immediately fail all monitors of a remote node, if proxied here
*
* \param[in] node_name Name of pacemaker_remote node
*/
void
remote_ra_fail(const char *node_name)
{
lrm_state_t *lrm_state = NULL;
CRM_CHECK(node_name != NULL, return);
lrm_state = controld_get_executor_state(node_name, false);
if (lrm_state && lrm_state_is_connected(lrm_state)) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
}
}
/* A guest node fencing implied by host fencing looks like:
*
*
*
*
*
*
*
*/
#define XPATH_PSEUDO_FENCE "/" PCMK__XE_PSEUDO_EVENT \
"[@" PCMK_XA_OPERATION "='stonith']/" PCMK__XE_DOWNED "/" PCMK_XE_NODE
/*!
* \internal
* \brief Check a pseudo-action for Pacemaker Remote node side effects
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_pseudo(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
if (numXpathResults(search) == 1) {
xmlNode *result = getXpathResult(search, 0);
/* Normally, we handle the necessary side effects of a guest node stop
* action when reporting the remote agent's result. However, if the stop
* is implied due to fencing, it will be a fencing pseudo-event, and
* there won't be a result to report. Handle that case here.
*
* This will result in a duplicate call to remote_node_down() if the
* guest stop was real instead of implied, but that shouldn't hurt.
*
* There is still one corner case that isn't handled: if a guest node
* isn't running any resources when its host is fenced, it will appear
* to be cleanly stopped, so there will be no pseudo-fence, and our
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
if (result) {
const char *remote = pcmk__xe_id(result);
if (remote) {
remote_node_down(remote, DOWN_ERASE_LRM);
}
}
}
freeXpathObject(search);
}
static void
remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
{
xmlNode *update, *state;
int call_opt;
pcmk__node_status_t *node = NULL;
call_opt = crmd_cib_smart_opt();
node = pcmk__cluster_lookup_remote_node(lrm_state->node_name);
CRM_CHECK(node != NULL, return);
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
state = create_node_state_update(node, node_update_none, update,
__func__);
crm_xml_add(state, PCMK__XA_NODE_IN_MAINTENANCE, (maintenance? "1" : "0"));
if (controld_update_cib(PCMK_XE_STATUS, update, call_opt,
NULL) == pcmk_rc_ok) {
/* TODO: still not 100% sure that async update will succeed ... */
if (maintenance) {
lrm_remote_set_flags(lrm_state, remote_in_maint);
} else {
lrm_remote_clear_flags(lrm_state, remote_in_maint);
}
}
pcmk__xml_free(update);
}
#define XPATH_PSEUDO_MAINTENANCE "//" PCMK__XE_PSEUDO_EVENT \
"[@" PCMK_XA_OPERATION "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \
PCMK__XE_MAINTENANCE
/*!
* \internal
* \brief Check a pseudo-action holding updates for maintenance state
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_maintenance_nodes(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
if (numXpathResults(search) == 1) {
xmlNode *node;
int cnt = 0, cnt_remote = 0;
for (node = pcmk__xe_first_child(getXpathResult(search, 0),
PCMK_XE_NODE, NULL, NULL);
node != NULL; node = pcmk__xe_next_same(node)) {
lrm_state_t *lrm_state = NULL;
const char *id = pcmk__xe_id(node);
cnt++;
if (id == NULL) {
continue; // Shouldn't be possible
}
lrm_state = controld_get_executor_state(id, false);
if (lrm_state && lrm_state->remote_ra_data &&
pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
const char *in_maint_s = NULL;
int in_maint;
cnt_remote++;
in_maint_s = crm_element_value(node,
PCMK__XA_NODE_IN_MAINTENANCE);
pcmk__scan_min_int(in_maint_s, &in_maint, 0);
remote_ra_maintenance(lrm_state, in_maint);
}
}
crm_trace("Action holds %d nodes (%d remotes found) adjusting "
PCMK_OPT_MAINTENANCE_MODE,
cnt, cnt_remote);
}
freeXpathObject(search);
}
gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, remote_in_maint);
}
gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, controlling_guest);
}
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
index bb80a161e8..9ac0a2c4a2 100644
--- a/daemons/controld/controld_te_events.c
+++ b/daemons/controld/controld_te_events.c
@@ -1,613 +1,611 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
/*!
* \internal
* \brief Action numbers of outside events processed in current update diff
*
* This table is to be used as a set. It should be empty when the transitioner
* begins processing a CIB update diff. It ensures that if there are multiple
* events (for example, "_last_0" and "_last_failure_0") for the same action,
* only one of them updates the failcount. Events that originate outside the
* cluster can't be confirmed, since they're not in the transition graph.
*/
static GHashTable *outside_events = NULL;
/*!
* \internal
* \brief Empty the hash table containing action numbers of outside events
*/
void
controld_remove_all_outside_events(void)
{
if (outside_events != NULL) {
g_hash_table_remove_all(outside_events);
}
}
/*!
* \internal
* \brief Destroy the hash table containing action numbers of outside events
*/
void
controld_destroy_outside_events_table(void)
{
if (outside_events != NULL) {
g_hash_table_destroy(outside_events);
outside_events = NULL;
}
}
/*!
* \internal
* \brief Add an outside event's action number to a set
*
* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
* event was not already in the set, or \p pcmk_rc_already otherwise.
*/
static int
record_outside_event(gint action_num)
{
if (outside_events == NULL) {
outside_events = g_hash_table_new(NULL, NULL);
}
if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) {
return pcmk_rc_ok;
}
return pcmk_rc_already;
}
gboolean
fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
{
const char *target_uuid = NULL;
const char *router = NULL;
const char *router_uuid = NULL;
xmlNode *last_action = NULL;
GList *gIter = NULL;
GList *gIter2 = NULL;
if (graph == NULL || graph->complete) {
return FALSE;
}
gIter = graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
/* We've already been here */
continue;
}
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
if ((action->type == pcmk__pseudo_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
} else if (action->type == pcmk__cluster_graph_action) {
const char *task = crm_element_value(action->xml,
PCMK_XA_OPERATION);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
continue;
}
}
target_uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
router = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router) {
const pcmk__node_status_t *node =
pcmk__get_node(0, router, NULL,
pcmk__node_search_cluster_member);
if (node != NULL) {
router_uuid = node->xml_id;
}
}
if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
last_action = action->xml;
stop_te_timer(action);
pcmk__update_graph(graph, action);
if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
crm_notice("Action %d (%s) was pending on %s (offline)",
action->id,
crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY),
down_node);
} else {
crm_info("Action %d (%s) is scheduled for %s (offline)",
action->id,
crm_element_value(action->xml, PCMK__XA_OPERATION_KEY),
down_node);
}
}
}
}
if (last_action != NULL) {
crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Node failure", last_action);
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Update failure-related node attributes if warranted
*
* \param[in] event XML describing operation that (maybe) failed
* \param[in] event_node_uuid Node that event occurred on
* \param[in] rc Actual operation return code
* \param[in] target_rc Expected operation return code
* \param[in] do_update If TRUE, do update regardless of operation type
* \param[in] ignore_failures If TRUE, update last failure but not fail count
*
* \return TRUE if this was not a direct nack, success or lrm status refresh
*/
static gboolean
update_failcount(const xmlNode *event, const char *event_node_uuid, int rc,
int target_rc, gboolean do_update, gboolean ignore_failures)
{
guint interval_ms = 0;
char *task = NULL;
char *rsc_id = NULL;
const char *value = NULL;
const char *id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
const char *on_uname = pcmk__node_name_from_uuid(event_node_uuid);
const char *origin = crm_element_value(event, PCMK_XA_CRM_DEBUG_ORIGIN);
// Nothing needs to be done for success or status refresh
if (rc == target_rc) {
return FALSE;
} else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
id, rc, on_uname);
return FALSE;
}
/* Sanity check */
CRM_CHECK(on_uname != NULL, return TRUE);
CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
crm_err("Couldn't parse: %s", pcmk__xe_id(event)); goto bail);
/* Decide whether update is necessary and what value to use */
if ((interval_ms > 0)
|| pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_none)
|| pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_none)) {
do_update = TRUE;
} else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_none)) {
do_update = TRUE;
value = pcmk__s(controld_globals.transition_graph->failed_start_offset,
PCMK_VALUE_INFINITY);
} else if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_none)) {
do_update = TRUE;
value = pcmk__s(controld_globals.transition_graph->failed_stop_offset,
PCMK_VALUE_INFINITY);
}
if (do_update) {
pcmk__attrd_query_pair_t *fail_pair = NULL;
pcmk__attrd_query_pair_t *last_pair = NULL;
char *fail_name = NULL;
char *last_name = NULL;
GList *attrs = NULL;
uint32_t opts = pcmk__node_attr_none;
char *now = pcmk__ttoa(time(NULL));
// Fail count will be either incremented or set to infinity
if (!pcmk_str_is_infinity(value)) {
value = PCMK_XA_VALUE "++";
}
if (g_hash_table_lookup(pcmk__remote_peer_cache, event_node_uuid)) {
opts |= pcmk__node_attr_remote;
}
crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
(ignore_failures? "last failure" : "failcount"),
rsc_id, on_uname, task, rc, value, now);
/* Update the fail count, if we're not ignoring failures */
if (!ignore_failures) {
fail_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
fail_name = pcmk__failcount_name(rsc_id, task, interval_ms);
fail_pair->name = fail_name;
fail_pair->value = value;
fail_pair->node = on_uname;
attrs = g_list_prepend(attrs, fail_pair);
}
/* Update the last failure time (even if we're ignoring failures,
* so that failure can still be detected and shown, e.g. by crm_mon)
*/
last_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
last_pair->name = last_name;
last_pair->value = now;
last_pair->node = on_uname;
attrs = g_list_prepend(attrs, last_pair);
update_attrd_list(attrs, opts);
free(fail_name);
free(fail_pair);
free(last_name);
free(last_pair);
g_list_free(attrs);
free(now);
}
bail:
free(rsc_id);
free(task);
return TRUE;
}
pcmk__graph_action_t *
controld_get_action(int id)
{
for (GList *item = controld_globals.transition_graph->synapses;
item != NULL; item = item->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
if (action->id == id) {
return action;
}
}
}
return NULL;
}
pcmk__graph_action_t *
get_cancel_action(const char *id, const char *node)
{
GList *gIter = NULL;
GList *gIter2 = NULL;
gIter = controld_globals.transition_graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
const char *task = NULL;
const char *target = NULL;
pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (!pcmk__str_eq(PCMK_ACTION_CANCEL, task, pcmk__str_casei)) {
continue;
}
task = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
crm_trace("Wrong key %s for %s on %s", task, id, node);
continue;
}
target = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
crm_trace("Wrong node %s for %s on %s", target, id, node);
continue;
}
crm_trace("Found %s on %s", id, node);
return action;
}
}
return NULL;
}
bool
confirm_cancel_action(const char *id, const char *node_id)
{
const char *op_key = NULL;
const char *node_name = NULL;
pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
if (cancel == NULL) {
return FALSE;
}
op_key = crm_element_value(cancel->xml, PCMK__XA_OPERATION_KEY);
node_name = crm_element_value(cancel->xml, PCMK__META_ON_NODE);
stop_te_timer(cancel);
te_action_confirmed(cancel, controld_globals.transition_graph);
crm_info("Cancellation of %s on %s confirmed (action %d)",
op_key, node_name, cancel->id);
return TRUE;
}
/* downed nodes are listed like: ... */
#define XPATH_DOWNED "//" PCMK__XE_DOWNED \
"/" PCMK_XE_NODE "[@" PCMK_XA_ID "='%s']"
/*!
* \brief Find a transition event that would have made a specified node down
*
* \param[in] target UUID of node to match
*
* \return Matching event if found, NULL otherwise
*/
pcmk__graph_action_t *
match_down_event(const char *target)
{
pcmk__graph_action_t *match = NULL;
xmlXPathObjectPtr xpath_ret = NULL;
GList *gIter, *gIter2;
char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
for (gIter = controld_globals.transition_graph->synapses;
gIter != NULL && match == NULL;
gIter = gIter->next) {
for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
gIter2 != NULL && match == NULL;
gIter2 = gIter2->next) {
match = (pcmk__graph_action_t *) gIter2->data;
if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
xpath_ret = xpath_search(match->xml, xpath);
if (numXpathResults(xpath_ret) < 1) {
match = NULL;
}
freeXpathObject(xpath_ret);
} else {
// Only actions that were actually started can match
match = NULL;
}
}
}
free(xpath);
if (match != NULL) {
crm_debug("Shutdown action %d (%s) found for node %s", match->id,
crm_element_value(match->xml, PCMK__XA_OPERATION_KEY),
target);
} else {
crm_debug("No reason to expect node %s to be down", target);
}
return match;
}
void
process_graph_event(xmlNode *event, const char *event_node)
{
int rc = -1; // Actual result
int target_rc = -1; // Expected result
int status = -1; // Executor status
int callid = -1; // Executor call ID
int transition_num = -1; // Transition number
int action_num = -1; // Action number within transition
char *update_te_uuid = NULL;
bool ignore_failures = FALSE;
const char *id = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *uname = NULL;
pcmk__assert(event != NULL);
/*
*/
magic = crm_element_value(event, PCMK__XA_TRANSITION_KEY);
if (magic == NULL) {
/* non-change */
return;
}
crm_element_value_int(event, PCMK__XA_OP_STATUS, &status);
if (status == PCMK_EXEC_PENDING) {
return;
}
id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
crm_element_value_int(event, PCMK__XA_RC_CODE, &rc);
crm_element_value_int(event, PCMK__XA_CALL_ID, &callid);
rc = pcmk__effective_rc(rc);
if (decode_transition_key(magic, &update_te_uuid, &transition_num,
&action_num, &target_rc) == FALSE) {
// decode_transition_key() already logged the bad key
crm_err("Can't process action %s result: Incompatible versions? "
QB_XS " call-id=%d", id, callid);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Bad event", event);
return;
}
if (transition_num == -1) {
// E.g. crm_resource --fail
if (record_outside_event(action_num) != pcmk_rc_ok) {
crm_debug("Outside event with transition key '%s' has already been "
"processed", magic);
goto bail;
}
desc = "initiated outside of the cluster";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Unexpected event", event);
} else if ((action_num < 0)
|| !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid,
pcmk__str_none)) {
desc = "initiated by a different DC";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Foreign event", event);
} else if ((controld_globals.transition_graph->id != transition_num)
|| controld_globals.transition_graph->complete) {
// Action is not from currently active transition
guint interval_ms = 0;
if (parse_op_key(id, NULL, NULL, &interval_ms)
&& (interval_ms != 0)) {
/* Recurring actions have the transition number they were first
* scheduled in.
*/
if (status == PCMK_EXEC_CANCELLED) {
confirm_cancel_action(id, get_node_id(event));
goto bail;
}
desc = "arrived after initial scheduling";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Change in recurring result", event);
} else if (controld_globals.transition_graph->id != transition_num) {
desc = "arrived really late";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Old event", event);
} else {
desc = "arrived late";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Inactive graph", event);
}
} else {
// Event is result of an action from currently active transition
pcmk__graph_action_t *action = controld_get_action(action_num);
if (action == NULL) {
// Should never happen
desc = "unknown";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Unknown event", event);
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
/* Nothing further needs to be done if the action has already been
* confirmed. This can happen e.g. when processing both an
* "xxx_last_0" or "xxx_last_failure_0" record as well as the main
* history record, which would otherwise result in incorrectly
* bumping the fail count twice.
*/
crm_log_xml_debug(event, "Event already confirmed:");
goto bail;
} else {
/* An action result needs to be confirmed.
* (This is the only case where desc == NULL.)
*/
if (pcmk__str_eq(crm_meta_value(action->params, PCMK_META_ON_FAIL),
PCMK_VALUE_IGNORE, pcmk__str_casei)) {
ignore_failures = TRUE;
} else if (rc != target_rc) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
}
stop_te_timer(action);
te_action_confirmed(action, controld_globals.transition_graph);
if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
abort_transition(action->synapse->priority + 1,
pcmk__graph_restart, "Event failed", event);
}
}
}
if (id == NULL) {
id = "unknown action";
}
uname = crm_element_value(event, PCMK__META_ON_NODE);
if (uname == NULL) {
uname = "unknown node";
}
if (status == PCMK_EXEC_INVALID) {
// We couldn't attempt the action
crm_info("Transition %d action %d (%s on %s): %s",
transition_num, action_num, id, uname,
pcmk_exec_status_str(status));
} else if (desc && update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), FALSE)) {
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
QB_XS " target-rc=%d rc=%d call-id=%d event='%s'",
transition_num, action_num, id, uname,
- services_ocf_exitcode_str(target_rc),
- services_ocf_exitcode_str(rc),
+ crm_exit_str(target_rc), crm_exit_str(rc),
target_rc, rc, callid, desc);
} else if (desc) {
crm_info("Transition %d action %d (%s on %s): %s "
QB_XS " rc=%d target-rc=%d call-id=%d",
transition_num, action_num, id, uname,
desc, rc, target_rc, callid);
} else if (rc == target_rc) {
crm_info("Transition %d action %d (%s on %s) confirmed: %s "
QB_XS " rc=%d call-id=%d",
transition_num, action_num, id, uname,
- services_ocf_exitcode_str(rc), rc, callid);
+ crm_exit_str(rc), rc, callid);
} else {
update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), ignore_failures);
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
QB_XS " target-rc=%d rc=%d call-id=%d",
transition_num, action_num, id, uname,
- services_ocf_exitcode_str(target_rc),
- services_ocf_exitcode_str(rc),
+ crm_exit_str(target_rc), crm_exit_str(rc),
target_rc, rc, callid);
}
bail:
free(update_te_uuid);
}
diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c
index 974794b1be..cad5f813ec 100644
--- a/daemons/execd/cts-exec-helper.c
+++ b/daemons/execd/cts-exec-helper.c
@@ -1,626 +1,626 @@
/*
* Copyright 2012-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define SUMMARY "cts-exec-helper - inject commands into the Pacemaker executor and watch for events"
static int exec_call_id = 0;
static gboolean start_test(gpointer user_data);
static void try_connect(void);
static char *key = NULL;
static char *val = NULL;
static struct {
int verbose;
int quiet;
guint interval_ms;
int timeout;
int start_delay;
int cancel_call_id;
gboolean no_wait;
gboolean is_running;
gboolean no_connect;
int exec_call_opts;
const char *api_call;
const char *rsc_id;
const char *provider;
const char *class;
const char *type;
const char *action;
const char *listen;
gboolean use_tls;
lrmd_key_value_t *params;
} options;
static gboolean
interval_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
return pcmk_parse_interval_spec(optarg,
&options.interval_ms) == pcmk_rc_ok;
}
static gboolean
notify_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (pcmk__str_any_of(option_name, "--notify-orig", "-n", NULL)) {
options.exec_call_opts = lrmd_opt_notify_orig_only;
} else if (pcmk__str_any_of(option_name, "--notify-changes", "-o", NULL)) {
options.exec_call_opts = lrmd_opt_notify_changes_only;
}
return TRUE;
}
static gboolean
param_key_val_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (pcmk__str_any_of(option_name, "--param-key", "-k", NULL)) {
pcmk__str_update(&key, optarg);
} else if (pcmk__str_any_of(option_name, "--param-val", "-v", NULL)) {
pcmk__str_update(&val, optarg);
}
if (key != NULL && val != NULL) {
options.params = lrmd_key_value_add(options.params, key, val);
pcmk__str_update(&key, NULL);
pcmk__str_update(&val, NULL);
}
return TRUE;
}
static GOptionEntry basic_entries[] = {
{ "api-call", 'c', 0, G_OPTION_ARG_STRING, &options.api_call,
"Directly relates to executor API functions",
NULL },
{ "is-running", 'R', 0, G_OPTION_ARG_NONE, &options.is_running,
"Determine if a resource is registered and running",
NULL },
{ "listen", 'l', 0, G_OPTION_ARG_STRING, &options.listen,
"Listen for a specific event string",
NULL },
{ "no-wait", 'w', 0, G_OPTION_ARG_NONE, &options.no_wait,
"Make api call and do not wait for result",
NULL },
{ "notify-changes", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb,
"Only notify client changes to recurring operations",
NULL },
{ "notify-orig", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb,
"Only notify this client of the results of an API action",
NULL },
{ "tls", 'S', 0, G_OPTION_ARG_NONE, &options.use_tls,
"Use TLS backend for local connection",
NULL },
{ NULL }
};
static GOptionEntry api_call_entries[] = {
{ "action", 'a', 0, G_OPTION_ARG_STRING, &options.action,
NULL, NULL },
{ "cancel-call-id", 'x', 0, G_OPTION_ARG_INT, &options.cancel_call_id,
NULL, NULL },
{ "class", 'C', 0, G_OPTION_ARG_STRING, &options.class,
NULL, NULL },
{ "interval", 'i', 0, G_OPTION_ARG_CALLBACK, interval_cb,
NULL, NULL },
{ "param-key", 'k', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb,
NULL, NULL },
{ "param-val", 'v', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb,
NULL, NULL },
{ "provider", 'P', 0, G_OPTION_ARG_STRING, &options.provider,
NULL, NULL },
{ "rsc-id", 'r', 0, G_OPTION_ARG_STRING, &options.rsc_id,
NULL, NULL },
{ "start-delay", 's', 0, G_OPTION_ARG_INT, &options.start_delay,
NULL, NULL },
{ "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout,
NULL, NULL },
{ "type", 'T', 0, G_OPTION_ARG_STRING, &options.type,
NULL, NULL },
{ NULL }
};
static GMainLoop *mainloop = NULL;
static lrmd_t *lrmd_conn = NULL;
static char event_buf_v0[1024];
static crm_exit_t
test_exit(crm_exit_t exit_code)
{
lrmd_api_delete(lrmd_conn);
return crm_exit(exit_code);
}
#define print_result(fmt, args...) \
if (!options.quiet) { \
printf(fmt "\n" , ##args); \
}
#define report_event(event) \
snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \
lrmd_event_type2str(event->type), \
event->rsc_id, \
event->op_type ? event->op_type : "none", \
- services_ocf_exitcode_str(event->rc), \
+ crm_exit_str((crm_exit_t) event->rc), \
pcmk_exec_status_str(event->op_status)); \
crm_info("%s", event_buf_v0);
static void
test_shutdown(int nsig)
{
lrmd_api_delete(lrmd_conn);
lrmd_conn = NULL;
}
static void
read_events(lrmd_event_data_t * event)
{
report_event(event);
if (options.listen) {
if (pcmk__str_eq(options.listen, event_buf_v0, pcmk__str_casei)) {
print_result("LISTEN EVENT SUCCESSFUL");
test_exit(CRM_EX_OK);
}
}
if (exec_call_id && (event->call_id == exec_call_id)) {
if (event->op_status == 0 && event->rc == 0) {
print_result("API-CALL SUCCESSFUL for 'exec'");
} else {
print_result("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s",
event->rc, pcmk_exec_status_str(event->op_status));
test_exit(CRM_EX_ERROR);
}
if (!options.listen) {
test_exit(CRM_EX_OK);
}
}
}
static gboolean
timeout_err(gpointer data)
{
print_result("LISTEN EVENT FAILURE - timeout occurred, never found");
test_exit(CRM_EX_TIMEOUT);
return FALSE;
}
static void
connection_events(lrmd_event_data_t * event)
{
int rc = event->connection_rc;
if (event->type != lrmd_event_connect) {
/* ignore */
return;
}
if (!rc) {
crm_info("Executor client connection established");
start_test(NULL);
return;
} else {
sleep(1);
try_connect();
crm_notice("Executor client connection failed");
}
}
static void
try_connect(void)
{
int tries = 10;
static int num_tries = 0;
int rc = 0;
lrmd_conn->cmds->set_callback(lrmd_conn, connection_events);
for (; num_tries < tries; num_tries++) {
rc = lrmd_conn->cmds->connect_async(lrmd_conn, crm_system_name, 3000);
if (!rc) {
return; /* we'll hear back in async callback */
}
sleep(1);
}
print_result("API CONNECTION FAILURE");
test_exit(CRM_EX_ERROR);
}
static gboolean
start_test(gpointer user_data)
{
int rc = 0;
if (!options.no_connect) {
if (!lrmd_conn->cmds->is_connected(lrmd_conn)) {
try_connect();
/* async connect -- this function will get called back into */
return 0;
}
}
lrmd_conn->cmds->set_callback(lrmd_conn, read_events);
if (options.timeout) {
g_timeout_add(options.timeout, timeout_err, NULL);
}
if (!options.api_call) {
return 0;
}
if (pcmk__str_eq(options.api_call, "exec", pcmk__str_casei)) {
rc = lrmd_conn->cmds->exec(lrmd_conn,
options.rsc_id,
options.action,
NULL,
options.interval_ms,
options.timeout,
options.start_delay,
options.exec_call_opts,
options.params);
if (rc > 0) {
exec_call_id = rc;
print_result("API-CALL 'exec' action pending, waiting on response");
}
} else if (pcmk__str_eq(options.api_call, "register_rsc", pcmk__str_casei)) {
rc = lrmd_conn->cmds->register_rsc(lrmd_conn,
options.rsc_id,
options.class, options.provider, options.type, 0);
} else if (pcmk__str_eq(options.api_call, "get_rsc_info", pcmk__str_casei)) {
lrmd_rsc_info_t *rsc_info;
rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0);
if (rsc_info) {
print_result("RSC_INFO: id:%s class:%s provider:%s type:%s",
rsc_info->id, rsc_info->standard,
(rsc_info->provider? rsc_info->provider : ""),
rsc_info->type);
lrmd_free_rsc_info(rsc_info);
rc = pcmk_ok;
} else {
rc = -1;
}
} else if (pcmk__str_eq(options.api_call, "unregister_rsc", pcmk__str_casei)) {
rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0);
} else if (pcmk__str_eq(options.api_call, "cancel", pcmk__str_casei)) {
rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action,
options.interval_ms);
} else if (pcmk__str_eq(options.api_call, "metadata", pcmk__str_casei)) {
char *output = NULL;
rc = lrmd_conn->cmds->get_metadata(lrmd_conn,
options.class,
options.provider, options.type, &output, 0);
if (rc == pcmk_ok) {
print_result("%s", output);
free(output);
}
} else if (pcmk__str_eq(options.api_call, "list_agents", pcmk__str_casei)) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider);
if (rc > 0) {
print_result("%d agents found", rc);
for (iter = list; iter != NULL; iter = iter->next) {
print_result("%s", iter->val);
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result("API_CALL FAILURE - no agents found");
rc = -1;
}
} else if (pcmk__str_eq(options.api_call, "list_ocf_providers", pcmk__str_casei)) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list);
if (rc > 0) {
print_result("%d providers found", rc);
for (iter = list; iter != NULL; iter = iter->next) {
print_result("%s", iter->val);
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result("API_CALL FAILURE - no providers found");
rc = -1;
}
} else if (pcmk__str_eq(options.api_call, "list_standards", pcmk__str_casei)) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list);
if (rc > 0) {
print_result("%d standards found", rc);
for (iter = list; iter != NULL; iter = iter->next) {
print_result("%s", iter->val);
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result("API_CALL FAILURE - no providers found");
rc = -1;
}
} else if (pcmk__str_eq(options.api_call, "get_recurring_ops", pcmk__str_casei)) {
GList *op_list = NULL;
GList *op_item = NULL;
rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0,
&op_list);
for (op_item = op_list; op_item != NULL; op_item = op_item->next) {
lrmd_op_info_t *op_info = op_item->data;
print_result("RECURRING_OP: %s_%s_%s timeout=%sms",
op_info->rsc_id, op_info->action,
op_info->interval_ms_s, op_info->timeout_ms_s);
lrmd_free_op_info(op_info);
}
g_list_free(op_list);
} else if (options.api_call) {
print_result("API-CALL FAILURE unknown action '%s'", options.action);
test_exit(CRM_EX_ERROR);
}
if (rc < 0) {
print_result("API-CALL FAILURE for '%s' api_rc:%d",
options.api_call, rc);
test_exit(CRM_EX_ERROR);
}
if (options.api_call && rc == pcmk_ok) {
print_result("API-CALL SUCCESSFUL for '%s'", options.api_call);
if (!options.listen) {
test_exit(CRM_EX_OK);
}
}
if (options.no_wait) {
/* just make the call and exit regardless of anything else. */
test_exit(CRM_EX_OK);
}
return 0;
}
/*!
* \internal
* \brief Generate resource parameters from CIB if none explicitly given
*
* \return Standard Pacemaker return code
*/
static int
generate_params(void)
{
int rc = pcmk_rc_ok;
pcmk_scheduler_t *scheduler = NULL;
xmlNode *cib_xml_copy = NULL;
pcmk_resource_t *rsc = NULL;
GHashTable *params = NULL;
GHashTable *meta = NULL;
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
if (options.params != NULL) {
return pcmk_rc_ok; // User specified parameters explicitly
}
// Retrieve and update CIB
rc = cib__signon_query(NULL, NULL, &cib_xml_copy);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = pcmk__update_configured_schema(&cib_xml_copy, false);
if (rc != pcmk_rc_ok) {
return rc;
}
// Calculate cluster status
scheduler = pe_new_working_set();
if (scheduler == NULL) {
crm_crit("Could not allocate scheduler data");
return ENOMEM;
}
pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_counts);
scheduler->input = cib_xml_copy;
scheduler->priv->now = crm_time_new(NULL);
cluster_status(scheduler);
// Find resource in CIB
rsc = pe_find_resource_with_flags(scheduler->priv->resources,
options.rsc_id,
pcmk_rsc_match_history
|pcmk_rsc_match_basename);
if (rsc == NULL) {
crm_err("Resource does not exist in config");
pe_free_working_set(scheduler);
return EINVAL;
}
// Add resource instance parameters to options.params
params = pe_rsc_params(rsc, NULL, scheduler);
if (params != NULL) {
g_hash_table_iter_init(&iter, params);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
options.params = lrmd_key_value_add(options.params, key, value);
}
}
// Add resource meta-attributes to options.params
meta = pcmk__strkey_table(free, free);
get_meta_attributes(meta, rsc, NULL, scheduler);
g_hash_table_iter_init(&iter, meta);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
char *crm_name = crm_meta_name(key);
options.params = lrmd_key_value_add(options.params, crm_name, value);
free(crm_name);
}
g_hash_table_destroy(meta);
pe_free_working_set(scheduler);
return rc;
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, NULL, group, NULL);
pcmk__add_main_args(context, basic_entries);
pcmk__add_arg_group(context, "api-call", "API Call Options:",
"Parameters for api-call option", api_call_entries);
return context;
}
int
main(int argc, char **argv)
{
GError *error = NULL;
crm_exit_t exit_code = CRM_EX_OK;
crm_trigger_t *trig = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
/* Typically we'd pass all the single character options that take an argument
* as the second parameter here (and there's a bunch of those in this tool).
* However, we control how this program is called so we can just not call it
* in a way where the preprocessing ever matters.
*/
gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
GOptionContext *context = build_arg_context(args, NULL);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
/* We have to use crm_log_init here to set up the logging because there's
* different handling for daemons vs. command line programs, and
* pcmk__cli_init_logging is set up to only handle the latter.
*/
crm_log_init(NULL, LOG_INFO, TRUE, (args->verbosity? TRUE : FALSE), argc,
argv, FALSE);
for (int i = 0; i < args->verbosity; i++) {
crm_bump_log_level(argc, argv);
}
if (!options.listen && pcmk__strcase_any_of(options.api_call, "metadata", "list_agents",
"list_standards", "list_ocf_providers", NULL)) {
options.no_connect = TRUE;
}
if (options.is_running) {
int rc = pcmk_rc_ok;
if (options.rsc_id == NULL) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"--is-running requires --rsc-id");
goto done;
}
options.interval_ms = 0;
if (options.timeout == 0) {
options.timeout = 30000;
}
rc = generate_params();
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Can not determine resource status: "
"unable to get parameters from CIB");
goto done;
}
options.api_call = "exec";
options.action = PCMK_ACTION_MONITOR;
options.exec_call_opts = lrmd_opt_notify_orig_only;
}
if (!options.api_call && !options.listen) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Must specify at least one of --api-call, --listen, "
"or --is-running");
goto done;
}
if (options.use_tls) {
lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0);
} else {
lrmd_conn = lrmd_api_new();
}
trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL);
mainloop_set_trigger(trig);
mainloop_add_signal(SIGTERM, test_shutdown);
crm_info("Starting");
mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(mainloop);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
free(key);
free(val);
pcmk__output_and_clear_error(&error, NULL);
return test_exit(exit_code);
}
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
index 88259a7c29..482707455a 100644
--- a/daemons/execd/execd_commands.c
+++ b/daemons/execd/execd_commands.c
@@ -1,1942 +1,1944 @@
/*
* Copyright 2012-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
// Check whether we have a high-resolution monotonic clock
#undef PCMK__TIME_USE_CGT
#if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
# define PCMK__TIME_USE_CGT
# include /* clock_gettime */
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "pacemaker-execd.h"
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
guint interval_ms;
int start_delay;
int timeout_orig;
int call_id;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
int service_flags;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *real_action;
char *userdata_str;
pcmk__action_result_t result;
/* We can track operation queue time and run time, to be saved with the CIB
* resource history (and displayed in cluster status). We need
* high-resolution monotonic time for this purpose, so we use
* clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
* is disabled).
*
* However, we also need epoch timestamps for recording the time the command
* last ran and the time its return value last changed, for use in time
* displays (as opposed to interval calculations). We keep time_t values for
* this purpose.
*
* The last run time is used for both purposes, so we keep redundant
* monotonic and epoch values for this. Technically the two could represent
* different times, but since time_t has only second resolution and the
* values are used for distinct purposes, that is not significant.
*/
#ifdef PCMK__TIME_USE_CGT
/* Recurring and systemd operations may involve more than one executor
* command per operation, so they need info about the original and the most
* recent.
*/
struct timespec t_first_run; // When op first ran
struct timespec t_run; // When op most recently ran
struct timespec t_first_queue; // When op was first queued
struct timespec t_queue; // When op was most recently queued
#endif
time_t epoch_last_run; // Epoch timestamp of when op last ran
time_t epoch_rcchange; // Epoch timestamp of when rc last changed
bool first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean execute_resource_action(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
#ifdef PCMK__TIME_USE_CGT
/*!
* \internal
* \brief Check whether a struct timespec has been set
*
* \param[in] timespec Time to check
*
* \return true if timespec has been set (i.e. is nonzero), false otherwise
*/
static inline bool
time_is_set(const struct timespec *timespec)
{
return (timespec != NULL) &&
((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
}
/*
* \internal
* \brief Set a timespec (and its original if unset) to the current time
*
* \param[out] t_current Where to store current time
* \param[out] t_orig Where to copy t_current if unset
*/
static void
get_current_time(struct timespec *t_current, struct timespec *t_orig)
{
clock_gettime(CLOCK_MONOTONIC, t_current);
if ((t_orig != NULL) && !time_is_set(t_orig)) {
*t_orig = *t_current;
}
}
/*!
* \internal
* \brief Return difference between two times in milliseconds
*
* \param[in] now More recent time (or NULL to use current time)
* \param[in] old Earlier time
*
* \return milliseconds difference (or 0 if old is NULL or unset)
*
* \note Can overflow on 32bit machines when the differences is around
* 24 days or more.
*/
static int
time_diff_ms(const struct timespec *now, const struct timespec *old)
{
int diff_ms = 0;
if (time_is_set(old)) {
struct timespec local_now = { 0, };
if (now == NULL) {
clock_gettime(CLOCK_MONOTONIC, &local_now);
now = &local_now;
}
diff_ms = (now->tv_sec - old->tv_sec) * 1000
+ (now->tv_nsec - old->tv_nsec) / 1000000;
}
return diff_ms;
}
/*!
* \internal
* \brief Reset a command's operation times to their original values.
*
* Reset a command's run and queued timestamps to the timestamps of the original
* command, so we report the entire time since then and not just the time since
* the most recent command (for recurring and systemd operations).
*
* \param[in,out] cmd Executor command object to reset
*
* \note It's not obvious what the queued time should be for a systemd
* start/stop operation, which might go like this:
* initial command queued 5ms, runs 3s
* monitor command queued 10ms, runs 10s
* monitor command queued 10ms, runs 10s
* Is the queued time for that operation 5ms, 10ms or 25ms? The current
* implementation will report 5ms. If it's 25ms, then we need to
* subtract 20ms from the total exec time so as not to count it twice.
* We can implement that later if it matters to anyone ...
*/
static void
cmd_original_times(lrmd_cmd_t * cmd)
{
cmd->t_run = cmd->t_first_run;
cmd->t_queue = cmd->t_first_queue;
}
#endif
static inline bool
action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms)
{
return (cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, action, pcmk__str_casei);
}
/*!
* \internal
* \brief Log the result of an asynchronous command
*
* \param[in] cmd Command to log result for
* \param[in] exec_time_ms Execution time in milliseconds, if known
* \param[in] queue_time_ms Queue time in milliseconds, if known
*/
static void
log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
{
int log_level = LOG_INFO;
GString *str = g_string_sized_new(100); // reasonable starting size
if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
log_level = LOG_DEBUG;
}
g_string_append_printf(str, "%s %s (call %d",
cmd->rsc_id, cmd->action, cmd->call_id);
if (cmd->last_pid != 0) {
g_string_append_printf(str, ", PID %d", cmd->last_pid);
}
if (cmd->result.execution_status == PCMK_EXEC_DONE) {
g_string_append_printf(str, ") exited with status %d",
cmd->result.exit_status);
} else {
pcmk__g_strcat(str, ") could not be executed: ",
pcmk_exec_status_str(cmd->result.execution_status),
NULL);
}
if (cmd->result.exit_reason != NULL) {
pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL);
}
#ifdef PCMK__TIME_USE_CGT
pcmk__g_strcat(str, " (execution time ",
pcmk__readable_interval(exec_time_ms), NULL);
if (queue_time_ms > 0) {
pcmk__g_strcat(str, " after being queued ",
pcmk__readable_interval(queue_time_ms), NULL);
}
g_string_append_c(str, ')');
#endif
do_crm_log(log_level, "%s", str->str);
g_string_free(str, TRUE);
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei) &&
pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
return PCMK_ACTION_STATUS;
}
return action;
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = pcmk__assert_alloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action,
rsc);
// Initialize fence device probes (to return "not running")
pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL);
return rsc;
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = pcmk__assert_alloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &call_options);
cmd->call_opts = call_options;
cmd->client_id = pcmk__str_copy(client->id);
crm_element_value_int(msg, PCMK__XA_LRMD_CALLID, &cmd->call_id);
crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL,
&cmd->interval_ms);
crm_element_value_int(rsc_xml, PCMK__XA_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, PCMK__XA_LRMD_RSC_START_DELAY,
&cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml,
PCMK__XA_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"),
PCMK_VALUE_BLOCK, pcmk__str_casei)) {
crm_debug("Setting flag to leave pid group on timeout and "
"only kill action pid for " PCMK__OP_FMT,
cmd->rsc_id, cmd->action, cmd->interval_ms);
cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
LOG_TRACE, "Action",
cmd->action, 0,
SVC_ACTION_LEAVE_GROUP,
"SVC_ACTION_LEAVE_GROUP");
}
return cmd;
}
static void
stop_recurring_timer(lrmd_cmd_t *cmd)
{
if (cmd) {
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
cmd->stonith_recurring_id = 0;
}
}
static void
free_lrmd_cmd(lrmd_cmd_t * cmd)
{
stop_recurring_timer(cmd);
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
pcmk__reset_result(&(cmd->result));
free(cmd->origin);
free(cmd->action);
free(cmd->real_action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
pcmk__assert(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static inline void
start_recurring_timer(lrmd_cmd_t *cmd)
{
if (cmd && (cmd->interval_ms > 0)) {
cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
stonith_recurring_op_helper,
cmd);
}
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
/*!
* \internal
* \brief Check whether a list already contains the equivalent of a given action
*
* \param[in] action_list List to search
* \param[in] cmd Action to search for
*/
static lrmd_cmd_t *
find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd)
{
for (const GList *item = action_list; item != NULL; item = item->next) {
lrmd_cmd_t *dup = item->data;
if (action_matches(cmd, dup->action, dup->interval_ms)) {
return dup;
}
}
return NULL;
}
static bool
merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
lrmd_cmd_t * dup = NULL;
bool dup_pending = true;
if (cmd->interval_ms == 0) {
return false;
}
// Search for a duplicate of this action (in-flight or not)
dup = find_duplicate_action(rsc->pending_ops, cmd);
if (dup == NULL) {
dup_pending = false;
dup = find_duplicate_action(rsc->recurring_ops, cmd);
if (dup == NULL) {
return false;
}
}
/* Do not merge fencing monitors marked for cancellation, so we can reply to
* the cancellation separately.
*/
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_casei)
&& (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
return false;
}
/* This should not occur. If it does, we need to investigate how something
* like this is possible in the controller.
*/
crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
"), merging with previous op entry",
rsc->rsc_id, normalize_action_name(rsc, dup->action),
dup->interval_ms);
// Merge new action's call ID and user data into existing action
dup->first_notify_sent = false;
free(dup->userdata_str);
dup->userdata_str = cmd->userdata_str;
cmd->userdata_str = NULL;
dup->call_id = cmd->call_id;
free_lrmd_cmd(cmd);
cmd = NULL;
/* If dup is not pending, that means it has already executed at least once
* and is waiting in the interval. In that case, stop waiting and initiate
* a new instance now.
*/
if (!dup_pending) {
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_casei)) {
stop_recurring_timer(dup);
stonith_recurring_op_helper(dup);
} else {
services_action_kick(rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval_ms);
}
}
return true;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
if (merge_recurring_duplicate(rsc, cmd)) {
// Equivalent of cmd has already been scheduled
return;
}
/* The controller expects the executor to automatically cancel
* recurring operations before a resource stops.
*/
if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
cancel_all_recurring(rsc, NULL);
}
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static xmlNode *
create_lrmd_reply(const char *origin, int rc, int call_id)
{
xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_LRMD_REPLY);
crm_xml_add(reply, PCMK__XA_LRMD_ORIGIN, origin);
crm_xml_add_int(reply, PCMK__XA_LRMD_RC, rc);
crm_xml_add_int(reply, PCMK__XA_LRMD_CALLID, call_id);
return reply;
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
int rc;
int log_level = LOG_WARNING;
const char *msg = NULL;
CRM_CHECK(client != NULL, return);
if (client->name == NULL) {
crm_trace("Skipping notification to client without name");
return;
}
if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
/* We only want to notify clients of the executor IPC API. If we are
* running as Pacemaker Remote, we may have clients proxied to other
* IPC services in the cluster, so skip those.
*/
crm_trace("Skipping executor API notification to client %s",
pcmk__client_name(client));
return;
}
rc = lrmd_server_send_notify(client, update_msg);
if (rc == pcmk_rc_ok) {
return;
}
switch (rc) {
case ENOTCONN:
case EPIPE: // Client exited without waiting for notification
log_level = LOG_INFO;
msg = "Disconnected";
break;
default:
msg = pcmk_rc_str(rc);
break;
}
do_crm_log(log_level, "Could not notify client %s: %s " QB_XS " rc=%d",
pcmk__client_name(client), msg, rc);
}
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
xmlNode *notify = NULL;
int exec_time = 0;
int queue_time = 0;
#ifdef PCMK__TIME_USE_CGT
exec_time = time_diff_ms(NULL, &(cmd->t_run));
queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
#endif
log_finished(cmd, exec_time, queue_time);
/* If the originator requested to be notified only for changes in recurring
* operation results, skip the notification if the result hasn't changed.
*/
if (cmd->first_notify_sent
&& pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only)
&& (cmd->last_notify_rc == cmd->result.exit_status)
&& (cmd->last_notify_op_status == cmd->result.execution_status)) {
return;
}
cmd->first_notify_sent = true;
cmd->last_notify_rc = cmd->result.exit_status;
cmd->last_notify_op_status = cmd->result.execution_status;
notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add_int(notify, PCMK__XA_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_ms(notify, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_RC, cmd->result.exit_status);
crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_OP_STATUS,
cmd->result.execution_status);
crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_DELETED, cmd->rsc_deleted);
crm_xml_add_ll(notify, PCMK__XA_LRMD_RUN_TIME,
(long long) cmd->epoch_last_run);
crm_xml_add_ll(notify, PCMK__XA_LRMD_RCCHANGE_TIME,
(long long) cmd->epoch_rcchange);
#ifdef PCMK__TIME_USE_CGT
crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_TIME, exec_time);
crm_xml_add_int(notify, PCMK__XA_LRMD_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, PCMK__XA_LRMD_OP, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, cmd->rsc_id);
if(cmd->real_action) {
crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->real_action);
} else {
crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->action);
}
crm_xml_add(notify, PCMK__XA_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, PCMK__XA_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
if (cmd->result.action_stderr != NULL) {
crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT,
cmd->result.action_stderr);
} else if (cmd->result.action_stdout != NULL) {
crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT,
cmd->result.action_stdout);
}
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = pcmk__xe_create(notify, PCMK__XE_ATTRIBUTES);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2smartfield((gpointer) key, (gpointer) value, args);
}
}
if ((cmd->client_id != NULL)
&& pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) {
pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
if (client != NULL) {
send_client_notify(client->id, client, notify);
}
} else {
pcmk__foreach_ipc_client(send_client_notify, notify);
}
pcmk__xml_free(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
if (pcmk__ipc_client_count() != 0) {
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id);
notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add_int(notify, PCMK__XA_LRMD_RC, rc);
crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, call_id);
crm_xml_add(notify, PCMK__XA_LRMD_OP, op);
crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, rsc_id);
pcmk__foreach_ipc_client(send_client_notify, notify);
pcmk__xml_free(notify);
}
}
static void
cmd_reset(lrmd_cmd_t * cmd)
{
cmd->last_pid = 0;
#ifdef PCMK__TIME_USE_CGT
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
#endif
cmd->epoch_last_run = 0;
pcmk__reset_result(&(cmd->result));
cmd->result.execution_status = PCMK_EXEC_DONE;
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
/* reset original timeout so client notification has correct information */
cmd->timeout = cmd->timeout_orig;
send_cmd_complete_notify(cmd);
if ((cmd->interval_ms != 0)
&& (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval_ms == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd_reset(cmd);
}
}
struct notify_new_client_data {
xmlNode *notify;
pcmk__client_t *new_client;
};
static void
notify_one_client(gpointer key, gpointer value, gpointer user_data)
{
pcmk__client_t *client = value;
struct notify_new_client_data *data = user_data;
if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
send_client_notify(key, (gpointer) client, (gpointer) data->notify);
}
}
void
notify_of_new_client(pcmk__client_t *new_client)
{
struct notify_new_client_data data;
data.new_client = new_client;
data.notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
crm_xml_add(data.notify, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add(data.notify, PCMK__XA_LRMD_OP, LRMD_OP_NEW_CLIENT);
pcmk__foreach_ipc_client(notify_one_client, &data);
pcmk__xml_free(data.notify);
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
enum ocf_exitcode code;
#ifdef PCMK__TIME_USE_CGT
const char *rclass = NULL;
bool goagain = false;
#endif
if (!cmd) {
crm_err("Completed executor action (%s) does not match any known operations",
action->id);
return;
}
#ifdef PCMK__TIME_USE_CGT
if (cmd->result.exit_status != action->rc) {
cmd->epoch_rcchange = time(NULL);
}
#endif
cmd->last_pid = action->pid;
// Cast variable instead of function return to keep compilers happy
code = services_result2ocf(action->standard, cmd->action, action->rc);
pcmk__set_result(&(cmd->result), (int) code,
action->status, services__exit_reason(action));
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
#ifdef PCMK__TIME_USE_CGT
if (rsc != NULL) {
rclass = rsc->class;
#if PCMK__ENABLE_SERVICE
if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SERVICE,
pcmk__str_casei)) {
rclass = resources_find_service_class(rsc->type);
}
#endif
}
if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
if (pcmk__result_ok(&(cmd->result))
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_STOP, NULL)) {
/* systemd returns from start and stop actions after the action
* begins, not after it completes. We have to jump through a few
* hoops so that we don't report 'complete' to the rest of pacemaker
* until it's actually done.
*/
goagain = true;
cmd->real_action = cmd->action;
cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
} else if (cmd->real_action != NULL) {
// This is follow-up monitor to check whether start/stop completed
if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
goagain = true;
} else if (pcmk__result_ok(&(cmd->result))
&& pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
goagain = true;
} else {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;
crm_debug("%s systemd %s is now complete (elapsed=%dms, "
"remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
- services_ocf_exitcode_str(cmd->result.exit_status),
+ crm_exit_str(cmd->result.exit_status),
cmd->result.exit_status);
cmd_original_times(cmd);
// Monitors may return "not running", but start/stop shouldn't
if ((cmd->result.execution_status == PCMK_EXEC_DONE)
&& (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START,
pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
} else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_OK;
}
}
}
}
}
#endif
#ifdef PCMK__TIME_USE_CGT
if (goagain) {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;
int delay = cmd->timeout_orig / 10;
if(delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}
delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
if (pcmk__result_ok(&(cmd->result))) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
} else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
} else {
- crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
+ crm_notice("%s %s failed: %s: Re-scheduling (remaining "
+ "timeout %s) " QB_XS
+ " exitstatus=%d elapsed=%dms start_delay=%dms)",
cmd->rsc_id, cmd->action,
- services_ocf_exitcode_str(cmd->result.exit_status),
- cmd->result.exit_status, time_sum, timeout_left,
- delay);
+ crm_exit_str(cmd->result.exit_status),
+ pcmk__readable_interval(timeout_left),
+ cmd->result.exit_status, time_sum, delay);
}
cmd_reset(cmd);
if(rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);
/* Don't finalize cmd, we're not done with it yet */
return;
} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id,
(cmd->real_action? cmd->real_action : cmd->action),
cmd->result.exit_status, time_sum, timeout_left);
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Investigate reason for timeout, and adjust "
"configured operation timeout if necessary");
cmd_original_times(cmd);
}
}
#endif
pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
services__grab_stderr(action));
cmd_finalize(cmd, rsc);
}
/*!
* \internal
* \brief Process the result of a fence device action (start, stop, or monitor)
*
* \param[in,out] cmd Fence device action that completed
* \param[in] exit_status Fencer API exit status for action
* \param[in] execution_status Fencer API execution status for action
* \param[in] exit_reason Human-friendly detail, if action failed
*/
static void
stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
enum pcmk_exec_status execution_status,
const char *exit_reason)
{
// This can be NULL if resource was removed before command completed
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
// Simplify fencer exit status to uniform exit status
if (exit_status != CRM_EX_OK) {
exit_status = PCMK_OCF_UNKNOWN_ERROR;
}
if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
/* An in-flight fence action was cancelled. The execution status is
* already correct, so don't overwrite it.
*/
execution_status = PCMK_EXEC_CANCELLED;
} else {
/* Some execution status codes have specific meanings for the fencer
* that executor clients may not expect, so map them to a simple error
* status.
*/
switch (execution_status) {
case PCMK_EXEC_NOT_CONNECTED:
case PCMK_EXEC_INVALID:
execution_status = PCMK_EXEC_ERROR;
break;
case PCMK_EXEC_NO_FENCE_DEVICE:
/* This should be possible only for probes in practice, but
* interpret for all actions to be safe.
*/
if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_none)) {
exit_status = PCMK_OCF_NOT_RUNNING;
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
pcmk__str_none)) {
exit_status = PCMK_OCF_OK;
} else {
exit_status = PCMK_OCF_NOT_INSTALLED;
}
execution_status = PCMK_EXEC_ERROR;
break;
case PCMK_EXEC_NOT_SUPPORTED:
exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
break;
default:
break;
}
}
pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
// Certain successful actions change the known state of the resource
if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
PCMK_EXEC_DONE, NULL); // "running"
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
}
}
/* The recurring timer should not be running at this point in any case, but
* as a failsafe, stop it if it is.
*/
stop_recurring_timer(cmd);
/* Reschedule this command if appropriate. If a recurring command is *not*
* rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
* not be removed from recurring_ops by cmd_finalize().
*/
if (rsc && (cmd->interval_ms > 0)
&& (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
start_recurring_timer(cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence action result: "
"Invalid callback arguments (bug?)");
} else {
stonith_action_complete((lrmd_cmd_t *) data->userdata,
stonith__exit_status(data),
stonith__execution_status(data),
stonith__exit_reason(data));
}
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
crm_warn("Connection to fencer lost (any pending operations for "
"fence devices will be considered failed)");
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) {
if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_none)) {
continue;
}
/* If we registered this fence device, we don't know whether the
* fencer still has the registration or not. Cause future probes to
* return an error until the resource is stopped or started
* successfully. This is especially important if the controller also
* went away (possibly due to a cluster layer restart) and won't
* receive our client notification of any monitors finalized below.
*/
if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
PCMK_EXEC_NOT_CONNECTED,
"Lost connection to fencer");
}
// Consider any active, pending, or recurring operations as failed
for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) {
lrmd_cmd_t *cmd = op->data;
/* This won't free a recurring op but instead restart its timer.
* If cmd is rsc->active, this will set rsc->active to NULL, so we
* don't have to worry about finalizing it a second time below.
*/
stonith_action_complete(cmd,
CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
"Lost connection to fencer");
}
if (rsc->active != NULL) {
rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active);
}
while (rsc->pending_ops != NULL) {
// This will free the op and remove it from rsc->pending_ops
stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data,
CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
"Lost connection to fencer");
}
}
}
/*!
* \internal
* \brief Execute a stonith resource "start" action
*
* Start a stonith resource by registering it with the fencer.
* (Stonith agents don't have a start command.)
*
* \param[in,out] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to start
* \param[in] cmd Start command to execute
*
* \return pcmk_ok on success, -errno otherwise
*/
static int
execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc,
const lrmd_cmd_t *cmd)
{
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
int rc = pcmk_ok;
// Convert command parameters to stonith API key/values
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
/* The fencer will automatically register devices via CIB notifications
* when the CIB changes, but to avoid a possible race condition between
* the fencer receiving the notification and the executor requesting that
* resource, the executor registers the device as well. The fencer knows how
* to handle duplicate registrations.
*/
rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
cmd->rsc_id, rsc->provider,
rsc->type, device_params);
stonith_key_value_freeall(device_params, 1, 1);
return rc;
}
/*!
* \internal
* \brief Execute a stonith resource "stop" action
*
* Stop a stonith resource by unregistering it with the fencer.
* (Stonith agents don't have a stop command.)
*
* \param[in,out] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to stop
*
* \return pcmk_ok on success, -errno otherwise
*/
static inline int
execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
{
/* @TODO Failure would indicate a problem communicating with fencer;
* perhaps we should try reconnecting and retrying a few times?
*/
return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
rsc->rsc_id);
}
/*!
* \internal
* \brief Initiate a stonith resource agent recurring "monitor" action
*
* \param[in,out] stonith_api Connection to fencer
* \param[in,out] rsc Stonith resource to monitor
* \param[in] cmd Monitor command being executed
*
* \return pcmk_ok if monitor was successfully initiated, -errno otherwise
*/
static inline int
execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
cmd->timeout / 1000);
rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
"lrmd_stonith_callback",
lrmd_stonith_callback);
if (rc == TRUE) {
rsc->active = cmd;
rc = pcmk_ok;
} else {
rc = -pcmk_err_generic;
}
return rc;
}
static void
execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
int rc = 0;
bool do_monitor = FALSE;
stonith_t *stonith_api = get_stonith_connection();
if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)
&& (cmd->interval_ms == 0)) {
// Probes don't require a fencer connection
stonith_action_complete(cmd, rsc->fence_probe_result.exit_status,
rsc->fence_probe_result.execution_status,
rsc->fence_probe_result.exit_reason);
return;
} else if (stonith_api == NULL) {
stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_NOT_CONNECTED,
"No connection to fencer");
return;
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
rc = execd_stonith_start(stonith_api, rsc, cmd);
if (rc == pcmk_ok) {
do_monitor = TRUE;
}
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
rc = execd_stonith_stop(stonith_api, rsc);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
do_monitor = TRUE;
} else {
stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
PCMK_EXEC_ERROR,
"Invalid fence device action (bug?)");
return;
}
if (do_monitor) {
rc = execd_stonith_monitor(stonith_api, rsc, cmd);
if (rc == pcmk_ok) {
// Don't clean up yet, we will find out result of the monitor later
return;
}
}
stonith_action_complete(cmd,
((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
stonith__legacy2status(rc),
((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc)));
}
static void
execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
pcmk__assert((rsc != NULL) && (cmd != NULL));
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
params_copy = pcmk__str_table_dup(cmd->params);
action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms, cmd->timeout,
params_copy, cmd->service_flags);
if (action == NULL) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, strerror(ENOMEM));
cmd_finalize(cmd, rsc);
return;
}
if (action->rc != PCMK_OCF_UNKNOWN) {
pcmk__set_result(&(cmd->result), action->rc, action->status,
services__exit_reason(action));
services_action_free(action);
cmd_finalize(cmd, rsc);
return;
}
action->cb_data = cmd;
if (services_action_async(action, action_complete)) {
/* The services library has taken responsibility for the action. It
* could be pending, blocked, or merged into a duplicate recurring
* action, in which case the action callback (action_complete())
* will be called when the action completes, otherwise the callback has
* already been called.
*
* action_complete() calls cmd_finalize() which can free cmd, so cmd
* cannot be used here.
*/
} else {
/* This is a recurring action that is not being cancelled and could not
* be initiated. It has been rescheduled, and the action callback
* (action_complete()) has been called, which in this case has already
* called cmd_finalize(), which in this case should only reset (not
* free) cmd.
*/
pcmk__set_result(&(cmd->result), action->rc, action->status,
services__exit_reason(action));
services_action_free(action);
}
}
static gboolean
execute_resource_action(gpointer user_data)
{
lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data;
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_run), &(cmd->t_first_run));
#endif
cmd->epoch_last_run = time(NULL);
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval_ms) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
execute_stonith_action(rsc, cmd);
} else {
execute_nonstonith_action(rsc, cmd);
}
return TRUE;
}
void
free_rsc(gpointer data)
{
GList *gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_casei);
gIter = rsc->pending_ops;
while (gIter != NULL) {
GList *next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
cmd_finalize(cmd, NULL);
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
gIter = rsc->recurring_ops;
while (gIter != NULL) {
GList *next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
/* If a stonith command is in-flight, just mark it as cancelled;
* it is not safe to finalize/free the cmd until the stonith api
* says it has either completed or timed out.
*/
if (rsc->active != cmd) {
cmd_finalize(cmd, NULL);
}
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms);
}
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static int
process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
xmlNode **reply)
{
int rc = pcmk_ok;
time_t now = time(NULL);
const char *protocol_version =
crm_element_value(request, PCMK__XA_LRMD_PROTOCOL_VERSION);
const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
if (compare_version(protocol_version, LRMD_COMPATIBLE_PROTOCOL) < 0) {
crm_err("Cluster API version must be greater than or equal to %s, not %s",
LRMD_COMPATIBLE_PROTOCOL, protocol_version);
rc = -EPROTO;
}
if (pcmk__xe_attr_is_true(request, PCMK__XA_LRMD_IS_IPC_PROVIDER)) {
#ifdef PCMK__COMPILE_REMOTE
if ((client->remote != NULL)
&& pcmk_is_set(client->flags,
pcmk__client_tls_handshake_complete)) {
const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
// This is a remote connection from a cluster node's controller
ipc_proxy_add_provider(client);
/* If this was a register operation, also ask for new schema files but
* only if it's supported by the protocol version.
*/
if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none) &&
LRMD_SUPPORTS_SCHEMA_XFER(protocol_version)) {
remoted_request_cib_schema_files();
}
} else {
rc = -EACCES;
}
#else
rc = -EPROTONOSUPPORT;
#endif
}
*reply = create_lrmd_reply(__func__, rc, call_id);
crm_xml_add(*reply, PCMK__XA_LRMD_OP, CRM_OP_REGISTER);
crm_xml_add(*reply, PCMK__XA_LRMD_CLIENTID, client->id);
crm_xml_add(*reply, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time);
if (start_state) {
crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state);
}
return rc;
}
static int
process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Cached agent information for '%s'", rsc->rsc_id);
return rc;
}
static xmlNode *
process_lrmd_get_rsc_info(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
if (rsc_id == NULL) {
rc = -ENODEV;
} else {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Agent information for '%s' not in cache", rsc_id);
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__func__, rc, call_id);
if (rsc) {
crm_xml_add(reply, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, PCMK__XA_LRMD_CLASS, rsc->class);
crm_xml_add(reply, PCMK__XA_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, PCMK__XA_LRMD_TYPE, rsc->type);
}
return reply;
}
static int
process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
xmlNode *request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Ignoring unregistration of resource '%s', which is not registered",
rsc_id);
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation (%p) still in progress for unregistered resource %s",
rsc->active, rsc_id);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
int call_id;
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client);
call_id = cmd->call_id;
/* Don't reference cmd after handing it off to be scheduled.
* The cmd could get merged and freed. */
schedule_lrmd_cmd(rsc, cmd);
return call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, guint interval_ms)
{
GList *gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then it's either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (action_matches(cmd, action, interval_ms)) {
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith operations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (action_matches(cmd, action, interval_ms)) {
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id,
normalize_action_name(rsc, action),
interval_ms) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval_ms == 0) {
continue;
}
if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
guint interval_ms = 0;
crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &interval_ms);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval_ms);
}
static void
add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
{
xmlNode *rsc_xml = pcmk__xe_create(reply, PCMK__XE_LRMD_RSC);
crm_xml_add(rsc_xml, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
lrmd_cmd_t *cmd = item->data;
xmlNode *op_xml = pcmk__xe_create(rsc_xml, PCMK__XE_LRMD_RSC_OP);
crm_xml_add(op_xml, PCMK__XA_LRMD_RSC_ACTION,
pcmk__s(cmd->real_action, cmd->action));
crm_xml_add_ms(op_xml, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(op_xml, PCMK__XA_LRMD_TIMEOUT, cmd->timeout_orig);
}
}
static xmlNode *
process_lrmd_get_recurring(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
const char *rsc_id = NULL;
lrmd_rsc_t *rsc = NULL;
xmlNode *reply = NULL;
xmlNode *rsc_xml = NULL;
// Resource ID is optional
rsc_xml = pcmk__xe_first_child(request, PCMK__XE_LRMD_CALLDATA, NULL, NULL);
if (rsc_xml) {
rsc_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC, NULL, NULL);
}
if (rsc_xml) {
rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
}
// If resource ID is specified, resource must exist
if (rsc_id != NULL) {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__func__, rc, call_id);
// If resource ID is not specified, check all resources
if (rsc_id == NULL) {
GHashTableIter iter;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rsc)) {
add_recurring_op_xml(reply, rsc);
}
} else if (rsc) {
add_recurring_op_xml(reply, rsc);
}
return reply;
}
void
process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
int do_reply = 0;
int do_notify = 0;
xmlNode *reply = NULL;
/* Certain IPC commands may be done only by privileged users (i.e. root or
* hacluster), because they would otherwise provide a means of bypassing
* ACLs.
*/
bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id);
if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
#ifdef PCMK__COMPILE_REMOTE
if (allowed) {
ipc_proxy_forward_client(client, request);
} else {
rc = -EACCES;
}
#else
rc = -EPROTONOSUPPORT;
#endif
do_reply = 1;
} else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
rc = process_lrmd_signon(client, request, call_id, &reply);
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
if (allowed) {
reply = process_lrmd_get_rsc_info(request, call_id);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_exec(client, id, request);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_cancel(client, id, request);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
do_notify = 1;
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
if (allowed) {
xmlNode *wrapper = pcmk__xe_first_child(request,
PCMK__XE_LRMD_CALLDATA,
NULL, NULL);
xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
const char *timeout = NULL;
CRM_LOG_ASSERT(data != NULL);
timeout = crm_element_value(data, PCMK__XA_LRMD_WATCHDOG);
pcmk__valid_stonith_watchdog_timeout(timeout);
} else {
rc = -EACCES;
}
} else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_alert_exec(client, id, request);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
if (allowed) {
reply = process_lrmd_get_recurring(request, call_id);
} else {
rc = -EACCES;
}
do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown IPC request '%s' from client %s",
op, pcmk__client_name(client));
}
if (rc == -EACCES) {
crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
op, pcmk__client_name(client));
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
op, client->id, rc, do_reply, do_notify);
if (do_reply) {
int send_rc = pcmk_rc_ok;
if (reply == NULL) {
reply = create_lrmd_reply(__func__, rc, call_id);
}
send_rc = lrmd_server_send_reply(client, id, reply);
pcmk__xml_free(reply);
if (send_rc != pcmk_rc_ok) {
crm_warn("Reply to client %s failed: %s " QB_XS " rc=%d",
pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
}
}
if (do_notify) {
send_generic_notify(rc, request);
}
}
diff --git a/include/crm/Makefile.am b/include/crm/Makefile.am
index 47fd80966a..dc5fa71219 100644
--- a/include/crm/Makefile.am
+++ b/include/crm/Makefile.am
@@ -1,32 +1,33 @@
#
# Copyright 2004-2024 the Pacemaker project contributors
#
# The version control history for this file may have further details.
#
# This source code is licensed under the GNU General Public License version 2
# or later (GPLv2+) WITHOUT ANY WARRANTY.
#
MAINTAINERCLEANFILES = Makefile.in
headerdir=$(pkgincludedir)/crm
header_HEADERS = cib.h \
cib_compat.h \
cluster.h \
crm.h \
crm_compat.h \
lrmd.h \
lrmd_events.h \
msg_xml.h \
msg_xml_compat.h \
services.h \
+ services_compat.h \
stonith-ng.h
noinst_HEADERS = $(wildcard *_internal.h)
SUBDIRS = common \
pengine \
cib \
fencing \
cluster
diff --git a/include/crm/services.h b/include/crm/services.h
index 6849d203ad..64f3d3eeae 100644
--- a/include/crm/services.h
+++ b/include/crm/services.h
@@ -1,395 +1,368 @@
/*
* Copyright 2010-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_SERVICES__H
# define PCMK__CRM_SERVICES__H
# include
# include
# include
# include
# include
# include
# include // OCF_ROOT_DIR
# include
# include
#ifdef __cplusplus
extern "C" {
#endif
// NOTE: booth (as of at least 1.1) checks for the existence of this header
/*!
* \file
* \brief Services API
* \ingroup core
*/
/* TODO: Autodetect these two ?*/
# ifndef SYSTEMCTL
# define SYSTEMCTL "/bin/systemctl"
# endif
/* This is the string passed in the OCF_EXIT_REASON_PREFIX environment variable.
* The stderr output that occurs after this prefix is encountered is considered
* the exit reason for a completed operation.
*/
#define PCMK_OCF_REASON_PREFIX "ocf-exit-reason:"
// Agent version to use if agent doesn't specify one
#define PCMK_DEFAULT_AGENT_VERSION "0.1"
enum lsb_exitcode {
PCMK_LSB_OK = 0,
// NOTE: booth (as of at least 1.1) uses this value
PCMK_LSB_UNKNOWN_ERROR = 1,
PCMK_LSB_INVALID_PARAM = 2,
PCMK_LSB_UNIMPLEMENT_FEATURE = 3,
PCMK_LSB_INSUFFICIENT_PRIV = 4,
PCMK_LSB_NOT_INSTALLED = 5,
PCMK_LSB_NOT_CONFIGURED = 6,
PCMK_LSB_NOT_RUNNING = 7,
};
// LSB uses different return codes for status actions
enum lsb_status_exitcode {
PCMK_LSB_STATUS_OK = 0,
PCMK_LSB_STATUS_VAR_PID = 1,
PCMK_LSB_STATUS_VAR_LOCK = 2,
PCMK_LSB_STATUS_NOT_RUNNING = 3,
PCMK_LSB_STATUS_UNKNOWN = 4,
/* custom codes should be in the 150-199 range reserved for application use */
PCMK_LSB_STATUS_NOT_INSTALLED = 150,
PCMK_LSB_STATUS_INSUFFICIENT_PRIV = 151,
};
enum svc_action_flags {
/* On timeout, only kill pid, do not kill entire pid group */
SVC_ACTION_LEAVE_GROUP = 0x01,
SVC_ACTION_NON_BLOCKED = 0x02,
};
typedef struct svc_action_private_s svc_action_private_t;
/*!
* \brief Object for executing external actions
*
* \note This object should never be instantiated directly, but instead created
* using one of the constructor functions (resources_action_create() for
* resource agents, services_alert_create() for alert agents, or
* services_action_create_generic() for generic executables). Similarly,
* do not use sizeof() on this struct.
*/
/*
* NOTE: Internally, services__create_resource_action() is preferable to
* resources_action_create().
*/
typedef struct svc_action_s {
/*! Operation key (__) for resource actions,
* XML ID for alert actions, or NULL for generic actions
*/
char *id;
//! XML ID of resource being executed for resource actions, otherwise NULL
char *rsc;
//! Name of action being executed for resource actions, otherwise NULL
char *action;
//! Action interval for recurring resource actions, otherwise 0
guint interval_ms;
//! Resource standard for resource actions, otherwise NULL
char *standard;
//! Resource provider for resource actions that require it, otherwise NULL
char *provider;
//! Resource agent name for resource actions, otherwise NULL
char *agent;
int timeout; //!< Action timeout (in milliseconds)
/*! A hash table of name/value pairs to use as parameters for resource and
* alert actions, otherwise NULL. These will be used to set environment
* variables for non-fencing resource agents and alert agents, and to send
* stdin to fence agents.
*/
GHashTable *params;
int rc; //!< Exit status of action (set by library upon completion)
//!@{
//! This field should be treated as internal to Pacemaker
int pid; // Process ID of child
int cancel; // Whether this is a cancellation of a recurring action
//!@}
int status; //!< Execution status (enum pcmk_exec_status set by library)
/*! Action counter (set by library for resource actions, or by caller
* otherwise)
*/
int sequence;
//!@{
//! This field should be treated as internal to Pacemaker
int expected_rc; // Unused
int synchronous; // Whether execution should be synchronous (blocking)
//!@}
enum svc_action_flags flags; //!< Flag group of enum svc_action_flags
char *stderr_data; //!< Action stderr (set by library)
char *stdout_data; //!< Action stdout (set by library)
void *cb_data; //!< For caller's use (not used by library)
//! This field should be treated as internal to Pacemaker
svc_action_private_t *opaque;
} svc_action_t;
/*!
* \brief Get a list of files or directories in a given path
*
* \param[in] root Full path to a directory to read
* \param[in] files Return list of files if TRUE or directories if FALSE
* \param[in] executable If TRUE and files is TRUE, only return executable files
*
* \return List of what was found as char * items.
* \note The caller is responsibile for freeing the result with
* g_list_free_full(list, free).
*/
GList *get_directory_list(const char *root, gboolean files,
gboolean executable);
/*!
* \brief Get a list of providers
*
* \param[in] standard List providers of this resource agent standard
*
* \return List of providers as char * list items (or NULL if standard does not
* support providers)
* \note The caller is responsible for freeing the result using
* g_list_free_full(list, free).
*/
GList *resources_list_providers(const char *standard);
/*!
* \brief Get a list of resource agents
*
* \param[in] standard List agents of this standard (or NULL for all)
* \param[in] provider List agents of this provider (or NULL for all)
*
* \return List of resource agents as char * items.
* \note The caller is responsible for freeing the result using
* g_list_free_full(list, free).
*/
GList *resources_list_agents(const char *standard, const char *provider);
/*!
* Get list of available standards
*
* \return List of resource standards as char * items.
* \note The caller is responsible for freeing the result using
* g_list_free_full(list, free).
*/
GList *resources_list_standards(void);
/*!
* \brief Check whether a resource agent exists on the local host
*
* \param[in] standard Resource agent standard of agent to check
* \param[in] provider Provider of agent to check (or NULL)
* \param[in] agent Name of agent to check
*
* \return TRUE if agent exists locally, otherwise FALSE
*/
gboolean resources_agent_exists(const char *standard, const char *provider,
const char *agent);
/*!
* \brief Create a new resource action
*
* \param[in] name Name of resource that action is for
* \param[in] standard Resource agent standard
* \param[in] provider Resource agent provider
* \param[in] agent Resource agent name
* \param[in] action Name of action to create
* \param[in] interval_ms How often to repeat action (if 0, execute once)
* \param[in] timeout Error if not complete within this time (ms)
* \param[in,out] params Action parameters
* \param[in] flags Group of enum svc_action_flags
*
* \return Newly allocated action
* \note This function assumes ownership of (and may free) \p params.
* \note The caller is responsible for freeing the return value using
* services_action_free().
*/
svc_action_t *resources_action_create(const char *name, const char *standard,
const char *provider, const char *agent,
const char *action, guint interval_ms,
int timeout, GHashTable *params,
enum svc_action_flags flags);
/*!
* \brief Reschedule a recurring action for immediate execution
*
* \param[in] name Name of resource that action is for
* \param[in] action Action's name
* \param[in] interval_ms Action's interval (in milliseconds)
*
* \return TRUE on success, otherwise FALSE
*/
gboolean services_action_kick(const char *name, const char *action,
guint interval_ms);
const char *resources_find_service_class(const char *agent);
/*!
* \brief Request execution of an arbitrary command
*
* This API has useful infrastructure in place to be able to run a command
* in the background and get notified via a callback when the command finishes.
*
* \param[in] exec Full path to command executable
* \param[in] args NULL-terminated list of arguments to pass to command
*
* \return Newly allocated action object
*/
svc_action_t *services_action_create_generic(const char *exec,
const char *args[]);
void services_action_cleanup(svc_action_t *op);
void services_action_free(svc_action_t *op);
int services_action_user(svc_action_t *op, const char *user);
gboolean services_action_sync(svc_action_t *op);
/*!
* \brief Run an action asynchronously, with callback after process is forked
*
* \param[in,out] op Action to run
* \param[in] action_callback Function to call when action completes
* (if NULL, any previously set callback will
* continue to be used)
* \param[in] action_fork_callback Function to call after child process is
* forked for action (if NULL, any
* previously set callback will continue to
* be used)
*
* \retval TRUE if the caller should not free or otherwise use \p op again,
* because one of these conditions is true:
*
* * \p op is NULL.
* * The action was successfully initiated, in which case
* \p action_fork_callback has been called, but \p action_callback has
* not (it will be called when the action completes).
* * The action's ID matched an existing recurring action. The existing
* action has taken over the callback and callback data from \p op
* and has been re-initiated asynchronously, and \p op has been freed.
* * Another action for the same resource is in flight, and \p op will
* be blocked until it completes.
* * The action could not be initiated, and is either non-recurring or
* being cancelled. \p action_fork_callback has not been called, but
* \p action_callback has, and \p op has been freed.
*
* \retval FALSE if \op is still valid, because the action cannot be initiated,
* and is a recurring action that is not being cancelled.
* \p action_fork_callback has not been called, but \p action_callback
* has, and a timer has been set for the next invocation of \p op.
*/
gboolean services_action_async_fork_notify(svc_action_t *op,
void (*action_callback) (svc_action_t *),
void (*action_fork_callback) (svc_action_t *));
/*!
* \brief Request asynchronous execution of an action
*
* \param[in,out] op Action to execute
* \param[in] action_callback Function to call when the action completes
* (if NULL, any previously set callback will
* continue to be used)
*
* \retval TRUE if the caller should not free or otherwise use \p op again,
* because one of these conditions is true:
*
* * \p op is NULL.
* * The action was successfully initiated, in which case
* \p action_callback has not been called (it will be called when the
* action completes).
* * The action's ID matched an existing recurring action. The existing
* action has taken over the callback and callback data from \p op
* and has been re-initiated asynchronously, and \p op has been freed.
* * Another action for the same resource is in flight, and \p op will
* be blocked until it completes.
* * The action could not be initiated, and is either non-recurring or
* being cancelled. \p action_callback has been called, and \p op has
* been freed.
*
* \retval FALSE if \op is still valid, because the action cannot be initiated,
* and is a recurring action that is not being cancelled.
* \p action_callback has been called, and a timer has been set for the
* next invocation of \p op.
*/
gboolean services_action_async(svc_action_t *op,
void (*action_callback) (svc_action_t *));
gboolean services_action_cancel(const char *name, const char *action,
guint interval_ms);
/* functions for alert agents */
svc_action_t *services_alert_create(const char *id, const char *exec,
int timeout, GHashTable *params,
int sequence, void *cb_data);
gboolean services_alert_async(svc_action_t *action,
void (*cb)(svc_action_t *op));
enum ocf_exitcode services_result2ocf(const char *standard, const char *action,
int exit_status);
- static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) {
- switch (code) {
- case PCMK_OCF_OK:
- return "ok";
- case PCMK_OCF_UNKNOWN_ERROR:
- return "error";
- case PCMK_OCF_INVALID_PARAM:
- return "invalid parameter";
- case PCMK_OCF_UNIMPLEMENT_FEATURE:
- return "unimplemented feature";
- case PCMK_OCF_INSUFFICIENT_PRIV:
- return "insufficient privileges";
- case PCMK_OCF_NOT_INSTALLED:
- return "not installed";
- case PCMK_OCF_NOT_CONFIGURED:
- return "not configured";
- case PCMK_OCF_NOT_RUNNING:
- return "not running";
- case PCMK_OCF_RUNNING_PROMOTED:
- return "promoted";
- case PCMK_OCF_FAILED_PROMOTED:
- return "promoted (failed)";
- case PCMK_OCF_DEGRADED:
- return "OCF_DEGRADED";
- case PCMK_OCF_DEGRADED_PROMOTED:
- return "promoted (degraded)";
- default:
- return "unknown";
- }
- }
-
# ifdef __cplusplus
}
# endif
+#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
+#include
+#endif
+
#endif /* __PCMK_SERVICES__ */
diff --git a/include/crm/services_compat.h b/include/crm/services_compat.h
new file mode 100644
index 0000000000..62c503abdc
--- /dev/null
+++ b/include/crm/services_compat.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2024 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PCMK__CRM_SERVICES_COMPAT__H
+#define PCMK__CRM_SERVICES_COMPAT__H
+
+#include // enum ocf_exitcode, PCMK_OCF_OK, etc.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//! \deprecated Use crm_exit_str() instead
+static inline const char *
+services_ocf_exitcode_str(enum ocf_exitcode code)
+{
+ switch (code) {
+ case PCMK_OCF_OK:
+ return "ok";
+ case PCMK_OCF_UNKNOWN_ERROR:
+ return "error";
+ case PCMK_OCF_INVALID_PARAM:
+ return "invalid parameter";
+ case PCMK_OCF_UNIMPLEMENT_FEATURE:
+ return "unimplemented feature";
+ case PCMK_OCF_INSUFFICIENT_PRIV:
+ return "insufficient privileges";
+ case PCMK_OCF_NOT_INSTALLED:
+ return "not installed";
+ case PCMK_OCF_NOT_CONFIGURED:
+ return "not configured";
+ case PCMK_OCF_NOT_RUNNING:
+ return "not running";
+ case PCMK_OCF_RUNNING_PROMOTED:
+ return "promoted";
+ case PCMK_OCF_FAILED_PROMOTED:
+ return "promoted (failed)";
+ case PCMK_OCF_DEGRADED:
+ return "OCF_DEGRADED";
+ case PCMK_OCF_DEGRADED_PROMOTED:
+ return "promoted (degraded)";
+ default:
+ return "unknown";
+ }
+}
+
+# ifdef __cplusplus
+}
+# endif
+
+#endif // PCMK__CRM_SERVICES_COMPAT__H
diff --git a/lib/lrmd/lrmd_alerts.c b/lib/lrmd/lrmd_alerts.c
index f04fc958c0..59e8ed8cd9 100644
--- a/lib/lrmd/lrmd_alerts.c
+++ b/lib/lrmd/lrmd_alerts.c
@@ -1,394 +1,394 @@
/*
* Copyright 2015-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static lrmd_key_value_t *
alert_key2param(lrmd_key_value_t *head, enum pcmk__alert_keys_e name,
const char *value)
{
if (value == NULL) {
value = "";
}
crm_trace("Setting alert key %s = '%s'", pcmk__alert_keys[name], value);
return lrmd_key_value_add(head, pcmk__alert_keys[name], value);
}
static lrmd_key_value_t *
alert_key2param_int(lrmd_key_value_t *head, enum pcmk__alert_keys_e name,
int value)
{
char *value_s = pcmk__itoa(value);
head = alert_key2param(head, name, value_s);
free(value_s);
return head;
}
static lrmd_key_value_t *
alert_key2param_ms(lrmd_key_value_t *head, enum pcmk__alert_keys_e name,
guint value)
{
char *value_s = crm_strdup_printf("%u", value);
head = alert_key2param(head, name, value_s);
free(value_s);
return head;
}
static void
set_ev_kv(gpointer key, gpointer value, gpointer user_data)
{
lrmd_key_value_t **head = (lrmd_key_value_t **) user_data;
if (value) {
crm_trace("Setting environment variable %s='%s'",
(char*)key, (char*)value);
*head = lrmd_key_value_add(*head, key, value);
}
}
static lrmd_key_value_t *
alert_envvar2params(lrmd_key_value_t *head, const pcmk__alert_t *entry)
{
if (entry->envvars) {
g_hash_table_foreach(entry->envvars, set_ev_kv, &head);
}
return head;
}
/*
* We could use g_strv_contains() instead of this function,
* but that has only been available since glib 2.43.2.
*/
static gboolean
is_target_alert(char **list, const char *value)
{
int target_list_num = 0;
gboolean rc = FALSE;
CRM_CHECK(value != NULL, return FALSE);
if (list == NULL) {
return TRUE;
}
target_list_num = g_strv_length(list);
for (int cnt = 0; cnt < target_list_num; cnt++) {
if (strcmp(list[cnt], value) == 0) {
rc = TRUE;
break;
}
}
return rc;
}
/*!
* \internal
* \brief Execute alert agents for an event
*
* \param[in,out] lrmd Executor connection to use
* \param[in] alert_list Alerts to execute
* \param[in] kind Type of event that is being alerted for
* \param[in] attr_name If pcmk__alert_attribute, the attribute name
* \param[in,out] params Environment variables to pass to agents
*
* \retval pcmk_ok on success
* \retval -1 if some alerts failed
* \retval -2 if all alerts failed
*/
static int
exec_alert_list(lrmd_t *lrmd, const GList *alert_list,
enum pcmk__alert_flags kind, const char *attr_name,
lrmd_key_value_t *params)
{
bool any_success = FALSE, any_failure = FALSE;
const char *kind_s = pcmk__alert_flag2text(kind);
pcmk__time_hr_t *now = NULL;
char timestamp_epoch[20];
char timestamp_usec[7];
time_t epoch = 0;
params = alert_key2param(params, PCMK__alert_key_kind, kind_s);
params = alert_key2param(params, PCMK__alert_key_version,
PACEMAKER_VERSION);
for (const GList *iter = alert_list;
iter != NULL; iter = g_list_next(iter)) {
const pcmk__alert_t *entry = (pcmk__alert_t *) (iter->data);
lrmd_key_value_t *copy_params = NULL;
lrmd_key_value_t *head = NULL;
int rc;
if (!pcmk_is_set(entry->flags, kind)) {
crm_trace("Filtering unwanted %s alert to %s via %s",
kind_s, entry->recipient, entry->id);
continue;
}
if ((kind == pcmk__alert_attribute)
&& !is_target_alert(entry->select_attribute_name, attr_name)) {
crm_trace("Filtering unwanted attribute '%s' alert to %s via %s",
attr_name, entry->recipient, entry->id);
continue;
}
if (now == NULL) {
now = pcmk__time_hr_now(&epoch);
}
crm_info("Sending %s alert via %s to %s",
kind_s, entry->id, entry->recipient);
/* Make a copy of the parameters, because each alert will be unique */
for (head = params; head != NULL; head = head->next) {
copy_params = lrmd_key_value_add(copy_params, head->key, head->value);
}
copy_params = alert_key2param(copy_params, PCMK__alert_key_recipient,
entry->recipient);
if (now) {
char *timestamp = pcmk__time_format_hr(entry->tstamp_format, now);
if (timestamp) {
copy_params = alert_key2param(copy_params,
PCMK__alert_key_timestamp,
timestamp);
free(timestamp);
}
snprintf(timestamp_epoch, sizeof(timestamp_epoch), "%lld",
(long long) epoch);
copy_params = alert_key2param(copy_params,
PCMK__alert_key_timestamp_epoch,
timestamp_epoch);
snprintf(timestamp_usec, sizeof(timestamp_usec), "%06d", now->useconds);
copy_params = alert_key2param(copy_params,
PCMK__alert_key_timestamp_usec,
timestamp_usec);
}
copy_params = alert_envvar2params(copy_params, entry);
rc = lrmd->cmds->exec_alert(lrmd, entry->id, entry->path,
entry->timeout, copy_params);
if (rc < 0) {
crm_err("Could not execute alert %s: %s " QB_XS " rc=%d",
entry->id, pcmk_strerror(rc), rc);
any_failure = TRUE;
} else {
any_success = TRUE;
}
}
if (now) {
free(now);
}
if (any_failure) {
return (any_success? -1 : -2);
}
return pcmk_ok;
}
/*!
* \internal
* \brief Send an alert for a node attribute change
*
* \param[in,out] lrmd Executor connection to use
* \param[in] alert_list List of alert agents to execute
* \param[in] node Name of node with attribute change
* \param[in] nodeid Node ID of node with attribute change
* \param[in] attr_name Name of attribute that changed
* \param[in] attr_value New value of attribute that changed
*
* \retval pcmk_ok on success
* \retval -1 if some alert agents failed
* \retval -2 if all alert agents failed
*/
int
lrmd_send_attribute_alert(lrmd_t *lrmd, const GList *alert_list,
const char *node, uint32_t nodeid,
const char *attr_name, const char *attr_value)
{
int rc = pcmk_ok;
lrmd_key_value_t *params = NULL;
if (lrmd == NULL) {
return -2;
}
params = alert_key2param(params, PCMK__alert_key_node, node);
params = alert_key2param_int(params, PCMK__alert_key_nodeid, nodeid);
params = alert_key2param(params, PCMK__alert_key_attribute_name, attr_name);
params = alert_key2param(params, PCMK__alert_key_attribute_value,
attr_value);
rc = exec_alert_list(lrmd, alert_list, pcmk__alert_attribute, attr_name,
params);
lrmd_key_value_freeall(params);
return rc;
}
/*!
* \internal
* \brief Send an alert for a node membership event
*
* \param[in,out] lrmd Executor connection to use
* \param[in] alert_list List of alert agents to execute
* \param[in] node Name of node with change
* \param[in] nodeid Node ID of node with change
* \param[in] state New state of node with change
*
* \retval pcmk_ok on success
* \retval -1 if some alert agents failed
* \retval -2 if all alert agents failed
*/
int
lrmd_send_node_alert(lrmd_t *lrmd, const GList *alert_list,
const char *node, uint32_t nodeid, const char *state)
{
int rc = pcmk_ok;
lrmd_key_value_t *params = NULL;
if (lrmd == NULL) {
return -2;
}
params = alert_key2param(params, PCMK__alert_key_node, node);
params = alert_key2param(params, PCMK__alert_key_desc, state);
params = alert_key2param_int(params, PCMK__alert_key_nodeid, nodeid);
rc = exec_alert_list(lrmd, alert_list, pcmk__alert_node, NULL, params);
lrmd_key_value_freeall(params);
return rc;
}
/*!
* \internal
* \brief Send an alert for a fencing event
*
* \param[in,out] lrmd Executor connection to use
* \param[in] alert_list List of alert agents to execute
* \param[in] target Name of fence target node
* \param[in] task Type of fencing event that occurred
* \param[in] desc Readable description of event
* \param[in] op_rc Result of fence action
*
* \retval pcmk_ok on success
* \retval -1 if some alert agents failed
* \retval -2 if all alert agents failed
*/
int
lrmd_send_fencing_alert(lrmd_t *lrmd, const GList *alert_list,
const char *target, const char *task, const char *desc,
int op_rc)
{
int rc = pcmk_ok;
lrmd_key_value_t *params = NULL;
if (lrmd == NULL) {
return -2;
}
params = alert_key2param(params, PCMK__alert_key_node, target);
params = alert_key2param(params, PCMK__alert_key_task, task);
params = alert_key2param(params, PCMK__alert_key_desc, desc);
params = alert_key2param_int(params, PCMK__alert_key_rc, op_rc);
rc = exec_alert_list(lrmd, alert_list, pcmk__alert_fencing, NULL, params);
lrmd_key_value_freeall(params);
return rc;
}
/*!
* \internal
* \brief Send an alert for a resource operation
*
* \param[in,out] lrmd Executor connection to use
* \param[in] alert_list List of alert agents to execute
* \param[in] node Name of node that executed operation
* \param[in] op Resource operation
*
* \retval pcmk_ok on success
* \retval -1 if some alert agents failed
* \retval -2 if all alert agents failed
*/
int
lrmd_send_resource_alert(lrmd_t *lrmd, const GList *alert_list,
const char *node, const lrmd_event_data_t *op)
{
int rc = pcmk_ok;
int target_rc = pcmk_ok;
lrmd_key_value_t *params = NULL;
if (lrmd == NULL) {
return -2;
}
target_rc = rsc_op_expected_rc(op);
if ((op->interval_ms == 0) && (target_rc == op->rc)
&& pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
/* Don't send alerts for probes with the expected result. Leave it up to
* the agent whether to alert for 'failed' probes. (Even if we find a
* resource running, it was probably because someone did a clean-up of
* the status section.)
*/
return pcmk_ok;
}
params = alert_key2param(params, PCMK__alert_key_node, node);
params = alert_key2param(params, PCMK__alert_key_rsc, op->rsc_id);
params = alert_key2param(params, PCMK__alert_key_task, op->op_type);
params = alert_key2param_ms(params, PCMK__alert_key_interval,
op->interval_ms);
params = alert_key2param_int(params, PCMK__alert_key_target_rc, target_rc);
params = alert_key2param_int(params, PCMK__alert_key_status, op->op_status);
params = alert_key2param_int(params, PCMK__alert_key_rc, op->rc);
/* Reoccurring operations do not set exec_time, so on timeout, set it
* to the operation timeout since that's closer to the actual value.
*/
if ((op->op_status == PCMK_EXEC_TIMEOUT) && (op->exec_time == 0)) {
params = alert_key2param_int(params, PCMK__alert_key_exec_time,
op->timeout);
} else {
params = alert_key2param_int(params, PCMK__alert_key_exec_time,
op->exec_time);
}
if (op->op_status == PCMK_EXEC_DONE) {
params = alert_key2param(params, PCMK__alert_key_desc,
- services_ocf_exitcode_str(op->rc));
+ crm_exit_str((crm_exit_t) op->rc));
} else {
params = alert_key2param(params, PCMK__alert_key_desc,
pcmk_exec_status_str(op->op_status));
}
rc = exec_alert_list(lrmd, alert_list, pcmk__alert_resource, NULL, params);
lrmd_key_value_freeall(params);
return rc;
}
diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c
index 0fb9bb9c83..5f63c4208e 100644
--- a/lib/pengine/clone.c
+++ b/lib/pengine/clone.c
@@ -1,1256 +1,1257 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
typedef struct clone_variant_data_s {
int clone_max;
int clone_node_max;
int promoted_max;
int promoted_node_max;
int total_clones;
uint32_t flags; // Group of enum pcmk__clone_flags
notify_data_t *stop_notify;
notify_data_t *start_notify;
notify_data_t *demote_notify;
notify_data_t *promote_notify;
xmlNode *xml_obj_child;
} clone_variant_data_t;
#define get_clone_variant_data(data, rsc) do { \
pcmk__assert(pcmk__is_clone(rsc)); \
data = rsc->priv->variant_opaque; \
} while (0)
/*!
* \internal
* \brief Return the maximum number of clone instances allowed to be run
*
* \param[in] clone Clone or clone instance to check
*
* \return Maximum instances for \p clone
*/
int
pe__clone_max(const pcmk_resource_t *clone)
{
const clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, pe__const_top_resource(clone, false));
return clone_data->clone_max;
}
/*!
* \internal
* \brief Return the maximum number of clone instances allowed per node
*
* \param[in] clone Promotable clone or clone instance to check
*
* \return Maximum allowed instances per node for \p clone
*/
int
pe__clone_node_max(const pcmk_resource_t *clone)
{
const clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, pe__const_top_resource(clone, false));
return clone_data->clone_node_max;
}
/*!
* \internal
* \brief Return the maximum number of clone instances allowed to be promoted
*
* \param[in] clone Promotable clone or clone instance to check
*
* \return Maximum promoted instances for \p clone
*/
int
pe__clone_promoted_max(const pcmk_resource_t *clone)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, pe__const_top_resource(clone, false));
return clone_data->promoted_max;
}
/*!
* \internal
* \brief Return the maximum number of clone instances allowed to be promoted
*
* \param[in] clone Promotable clone or clone instance to check
*
* \return Maximum promoted instances for \p clone
*/
int
pe__clone_promoted_node_max(const pcmk_resource_t *clone)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, pe__const_top_resource(clone, false));
return clone_data->promoted_node_max;
}
static GList *
sorted_hash_table_values(GHashTable *table)
{
GList *retval = NULL;
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
if (!g_list_find_custom(retval, value, (GCompareFunc) strcmp)) {
retval = g_list_prepend(retval, (char *) value);
}
}
retval = g_list_sort(retval, (GCompareFunc) strcmp);
return retval;
}
static GList *
nodes_with_status(GHashTable *table, const char *status)
{
GList *retval = NULL;
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
if (!strcmp((char *) value, status)) {
retval = g_list_prepend(retval, key);
}
}
retval = g_list_sort(retval, (GCompareFunc) pcmk__numeric_strcasecmp);
return retval;
}
static GString *
node_list_to_str(const GList *list)
{
GString *retval = NULL;
for (const GList *iter = list; iter != NULL; iter = iter->next) {
pcmk__add_word(&retval, 1024, (const char *) iter->data);
}
return retval;
}
static void
clone_header(pcmk__output_t *out, int *rc, const pcmk_resource_t *rsc,
clone_variant_data_t *clone_data, const char *desc)
{
GString *attrs = NULL;
if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
pcmk__add_separated_word(&attrs, 64, "promotable", ", ");
}
if (pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
pcmk__add_separated_word(&attrs, 64, "unique", ", ");
}
if (pe__resource_is_disabled(rsc)) {
pcmk__add_separated_word(&attrs, 64, "disabled", ", ");
}
if (pcmk_is_set(rsc->flags, pcmk__rsc_maintenance)) {
pcmk__add_separated_word(&attrs, 64, "maintenance", ", ");
} else if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
pcmk__add_separated_word(&attrs, 64, "unmanaged", ", ");
}
if (attrs != NULL) {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Clone Set: %s [%s] (%s)%s%s%s",
rsc->id,
pcmk__xe_id(clone_data->xml_obj_child),
(const char *) attrs->str, desc ? " (" : "",
desc ? desc : "", desc ? ")" : "");
g_string_free(attrs, TRUE);
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Clone Set: %s [%s]%s%s%s",
rsc->id,
pcmk__xe_id(clone_data->xml_obj_child),
desc ? " (" : "", desc ? desc : "",
desc ? ")" : "");
}
}
void
pe__force_anon(const char *standard, pcmk_resource_t *rsc, const char *rid,
pcmk_scheduler_t *scheduler)
{
if (pcmk__is_clone(rsc)) {
clone_variant_data_t *clone_data = rsc->priv->variant_opaque;
pcmk__config_warn("Ignoring " PCMK_META_GLOBALLY_UNIQUE " for %s "
"because %s resources such as %s can be used only as "
"anonymous clones", rsc->id, standard, rid);
clone_data->clone_node_max = 1;
clone_data->clone_max = QB_MIN(clone_data->clone_max,
g_list_length(scheduler->nodes));
}
}
pcmk_resource_t *
pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
{
gboolean as_orphan = FALSE;
char *inc_num = NULL;
char *inc_max = NULL;
pcmk_resource_t *child_rsc = NULL;
xmlNode *child_copy = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
CRM_CHECK(clone_data->xml_obj_child != NULL, return FALSE);
if (clone_data->total_clones >= clone_data->clone_max) {
// If we've already used all available instances, this is an orphan
as_orphan = TRUE;
}
// Allocate instance numbers in numerical order (starting at 0)
inc_num = pcmk__itoa(clone_data->total_clones);
inc_max = pcmk__itoa(clone_data->clone_max);
child_copy = pcmk__xml_copy(NULL, clone_data->xml_obj_child);
crm_xml_add(child_copy, PCMK__META_CLONE, inc_num);
if (pe__unpack_resource(child_copy, &child_rsc, rsc,
scheduler) != pcmk_rc_ok) {
goto bail;
}
/* child_rsc->globally_unique = rsc->globally_unique; */
pcmk__assert(child_rsc != NULL);
clone_data->total_clones += 1;
pcmk__rsc_trace(child_rsc, "Setting clone attributes for: %s",
child_rsc->id);
rsc->priv->children = g_list_append(rsc->priv->children, child_rsc);
if (as_orphan) {
pe__set_resource_flags_recursive(child_rsc, pcmk__rsc_removed);
}
pcmk__insert_meta(child_rsc->priv, PCMK_META_CLONE_MAX, inc_max);
pcmk__rsc_trace(rsc, "Added %s instance %s", rsc->id, child_rsc->id);
bail:
free(inc_num);
free(inc_max);
return child_rsc;
}
/*!
* \internal
* \brief Unpack a nonnegative integer value from a resource meta-attribute
*
* \param[in] rsc Resource with meta-attribute
* \param[in] meta_name Name of meta-attribute to unpack
* \param[in] deprecated_name If not NULL, try unpacking this
* if \p meta_name is unset
* \param[in] default_value Value to use if unset
*
* \return Integer parsed from resource's specified meta-attribute if a valid
* nonnegative integer, \p default_value if unset, or 0 if invalid
*/
static int
unpack_meta_int(const pcmk_resource_t *rsc, const char *meta_name,
const char *deprecated_name, int default_value)
{
int integer = default_value;
const char *value = g_hash_table_lookup(rsc->priv->meta, meta_name);
if ((value == NULL) && (deprecated_name != NULL)) {
value = g_hash_table_lookup(rsc->priv->meta, deprecated_name);
if (value != NULL) {
if (pcmk__str_eq(deprecated_name, PCMK__META_PROMOTED_MAX_LEGACY,
pcmk__str_none)) {
pcmk__warn_once(pcmk__wo_clone_master_max,
"Support for the " PCMK__META_PROMOTED_MAX_LEGACY
" meta-attribute (such as in %s) is deprecated "
"and will be removed in a future release. Use the "
PCMK_META_PROMOTED_MAX " meta-attribute instead.",
rsc->id);
} else if (pcmk__str_eq(deprecated_name, PCMK__META_PROMOTED_NODE_MAX_LEGACY,
pcmk__str_none)) {
pcmk__warn_once(pcmk__wo_clone_master_node_max,
"Support for the " PCMK__META_PROMOTED_NODE_MAX_LEGACY
" meta-attribute (such as in %s) is deprecated "
"and will be removed in a future release. Use the "
PCMK_META_PROMOTED_NODE_MAX " meta-attribute instead.",
rsc->id);
}
}
}
if (value != NULL) {
pcmk__scan_min_int(value, &integer, 0);
}
return integer;
}
gboolean
clone_unpack(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
{
int lpc = 0;
xmlNode *a_child = NULL;
xmlNode *xml_obj = rsc->priv->xml;
clone_variant_data_t *clone_data = NULL;
pcmk__rsc_trace(rsc, "Processing resource %s...", rsc->id);
clone_data = pcmk__assert_alloc(1, sizeof(clone_variant_data_t));
rsc->priv->variant_opaque = clone_data;
if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
// Use 1 as default but 0 for minimum and invalid
// @COMPAT PCMK__META_PROMOTED_MAX_LEGACY deprecated since 2.0.0
clone_data->promoted_max =
unpack_meta_int(rsc, PCMK_META_PROMOTED_MAX,
PCMK__META_PROMOTED_MAX_LEGACY, 1);
// Use 1 as default but 0 for minimum and invalid
// @COMPAT PCMK__META_PROMOTED_NODE_MAX_LEGACY deprecated since 2.0.0
clone_data->promoted_node_max =
unpack_meta_int(rsc, PCMK_META_PROMOTED_NODE_MAX,
PCMK__META_PROMOTED_NODE_MAX_LEGACY, 1);
}
// Use 1 as default but 0 for minimum and invalid
clone_data->clone_node_max = unpack_meta_int(rsc, PCMK_META_CLONE_NODE_MAX,
NULL, 1);
/* Use number of nodes (but always at least 1, which is handy for crm_verify
* for a CIB without nodes) as default, but 0 for minimum and invalid
*/
clone_data->clone_max = unpack_meta_int(rsc, PCMK_META_CLONE_MAX, NULL,
QB_MAX(1, g_list_length(scheduler->nodes)));
if (crm_is_true(g_hash_table_lookup(rsc->priv->meta,
PCMK_META_ORDERED))) {
clone_data->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,
"Clone", rsc->id,
clone_data->flags,
pcmk__clone_ordered,
"pcmk__clone_ordered");
}
if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)
&& (clone_data->clone_node_max > 1)) {
pcmk__config_err("Ignoring " PCMK_META_CLONE_NODE_MAX " of %d for %s "
"because anonymous clones support only one instance "
"per node", clone_data->clone_node_max, rsc->id);
clone_data->clone_node_max = 1;
}
pcmk__rsc_trace(rsc, "Options for %s", rsc->id);
pcmk__rsc_trace(rsc, "\tClone max: %d", clone_data->clone_max);
pcmk__rsc_trace(rsc, "\tClone node max: %d", clone_data->clone_node_max);
pcmk__rsc_trace(rsc, "\tClone is unique: %s",
pcmk__flag_text(rsc->flags, pcmk__rsc_unique));
pcmk__rsc_trace(rsc, "\tClone is promotable: %s",
pcmk__flag_text(rsc->flags, pcmk__rsc_promotable));
// Clones may contain a single group or primitive
for (a_child = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
a_child != NULL; a_child = pcmk__xe_next(a_child)) {
if (pcmk__str_any_of((const char *) a_child->name,
PCMK_XE_PRIMITIVE, PCMK_XE_GROUP, NULL)) {
clone_data->xml_obj_child = a_child;
break;
}
}
if (clone_data->xml_obj_child == NULL) {
pcmk__config_err("%s has nothing to clone", rsc->id);
return FALSE;
}
/*
* Make clones ever so slightly sticky by default
*
* This helps ensure clone instances are not shuffled around the cluster
* for no benefit in situations when pre-allocation is not appropriate
*/
if (g_hash_table_lookup(rsc->priv->meta,
PCMK_META_RESOURCE_STICKINESS) == NULL) {
pcmk__insert_meta(rsc->priv, PCMK_META_RESOURCE_STICKINESS, "1");
}
/* This ensures that the PCMK_META_GLOBALLY_UNIQUE value always exists for
* children to inherit when being unpacked, as well as in resource agents'
* environment.
*/
pcmk__insert_meta(rsc->priv, PCMK_META_GLOBALLY_UNIQUE,
pcmk__flag_text(rsc->flags, pcmk__rsc_unique));
if (clone_data->clone_max <= 0) {
/* Create one child instance so that unpack_find_resource() will hook up
* any orphans up to the parent correctly.
*/
if (pe__create_clone_child(rsc, scheduler) == NULL) {
return FALSE;
}
} else {
// Create a child instance for each available instance number
for (lpc = 0; lpc < clone_data->clone_max; lpc++) {
if (pe__create_clone_child(rsc, scheduler) == NULL) {
return FALSE;
}
}
}
pcmk__rsc_trace(rsc, "Added %d children to resource %s...",
clone_data->clone_max, rsc->id);
return TRUE;
}
gboolean
clone_active(pcmk_resource_t * rsc, gboolean all)
{
for (GList *gIter = rsc->priv->children;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *child_rsc = (pcmk_resource_t *) gIter->data;
gboolean child_active = child_rsc->priv->fns->active(child_rsc, all);
if (all == FALSE && child_active) {
return TRUE;
} else if (all && child_active == FALSE) {
return FALSE;
}
}
if (all) {
return TRUE;
} else {
return FALSE;
}
}
static const char *
configured_role_str(pcmk_resource_t * rsc)
{
const char *target_role = g_hash_table_lookup(rsc->priv->meta,
PCMK_META_TARGET_ROLE);
if ((target_role == NULL) && (rsc->priv->children != NULL)) {
// Any instance will do
pcmk_resource_t *instance = rsc->priv->children->data;
target_role = g_hash_table_lookup(instance->priv->meta,
PCMK_META_TARGET_ROLE);
}
return target_role;
}
static enum rsc_role_e
configured_role(pcmk_resource_t *rsc)
{
enum rsc_role_e role = pcmk_role_unknown;
const char *target_role = configured_role_str(rsc);
if (target_role != NULL) {
role = pcmk_parse_role(target_role);
if (role == pcmk_role_unknown) {
pcmk__config_err("Invalid " PCMK_META_TARGET_ROLE
" for resource %s", rsc->id);
}
}
return role;
}
bool
is_set_recursive(const pcmk_resource_t *rsc, long long flag, bool any)
{
bool all = !any;
if (pcmk_is_set(rsc->flags, flag)) {
if(any) {
return TRUE;
}
} else if(all) {
return FALSE;
}
for (GList *gIter = rsc->priv->children;
gIter != NULL; gIter = gIter->next) {
if(is_set_recursive(gIter->data, flag, any)) {
if(any) {
return TRUE;
}
} else if(all) {
return FALSE;
}
}
if(all) {
return TRUE;
}
return FALSE;
}
PCMK__OUTPUT_ARGS("clone", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__clone_xml(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
GList *all = NULL;
int rc = pcmk_rc_no_output;
gboolean printed_header = FALSE;
gboolean print_everything = TRUE;
if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
(strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches));
all = g_list_prepend(all, (gpointer) "*");
for (GList *gIter = rsc->priv->children;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *child_rsc = (pcmk_resource_t *) gIter->data;
if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) {
continue;
}
if (child_rsc->priv->fns->is_filtered(child_rsc, only_rsc,
print_everything)) {
continue;
}
if (!printed_header) {
const char *multi_state = pcmk__flag_text(rsc->flags,
pcmk__rsc_promotable);
const char *unique = pcmk__flag_text(rsc->flags, pcmk__rsc_unique);
const char *maintenance = pcmk__flag_text(rsc->flags,
pcmk__rsc_maintenance);
const char *managed = pcmk__flag_text(rsc->flags,
pcmk__rsc_managed);
const char *disabled = pcmk__btoa(pe__resource_is_disabled(rsc));
const char *failed = pcmk__flag_text(rsc->flags, pcmk__rsc_failed);
const char *ignored = pcmk__flag_text(rsc->flags,
pcmk__rsc_ignore_failure);
const char *target_role = configured_role_str(rsc);
const char *desc = pe__resource_description(rsc, show_opts);
printed_header = TRUE;
rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_CLONE,
PCMK_XA_ID, rsc->id,
PCMK_XA_MULTI_STATE, multi_state,
PCMK_XA_UNIQUE, unique,
PCMK_XA_MAINTENANCE, maintenance,
PCMK_XA_MANAGED, managed,
PCMK_XA_DISABLED, disabled,
PCMK_XA_FAILED, failed,
PCMK_XA_FAILURE_IGNORED, ignored,
PCMK_XA_TARGET_ROLE, target_role,
PCMK_XA_DESCRIPTION, desc,
NULL);
pcmk__assert(rc == pcmk_rc_ok);
}
out->message(out, (const char *) child_rsc->priv->xml->name,
show_opts, child_rsc, only_node, all);
}
if (printed_header) {
pcmk__output_xml_pop_parent(out);
}
g_list_free(all);
return rc;
}
PCMK__OUTPUT_ARGS("clone", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__clone_default(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
GHashTable *stopped = NULL;
GString *list_text = NULL;
GList *promoted_list = NULL;
GList *started_list = NULL;
GList *gIter = NULL;
const char *desc = NULL;
clone_variant_data_t *clone_data = NULL;
int active_instances = 0;
int rc = pcmk_rc_no_output;
gboolean print_everything = TRUE;
desc = pe__resource_description(rsc, show_opts);
get_clone_variant_data(clone_data, rsc);
if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
(strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches));
for (gIter = rsc->priv->children; gIter != NULL; gIter = gIter->next) {
gboolean print_full = FALSE;
pcmk_resource_t *child_rsc = (pcmk_resource_t *) gIter->data;
gboolean partially_active = child_rsc->priv->fns->active(child_rsc,
FALSE);
if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) {
continue;
}
if (child_rsc->priv->fns->is_filtered(child_rsc, only_rsc,
print_everything)) {
continue;
}
if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
print_full = TRUE;
}
if (pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
// Print individual instance when unique (except stopped orphans)
if (partially_active
|| !pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
print_full = TRUE;
}
// Everything else in this block is for anonymous clones
} else if (pcmk_is_set(show_opts, pcmk_show_pending)
&& (child_rsc->priv->pending_action != NULL)
&& (strcmp(child_rsc->priv->pending_action,
"probe") != 0)) {
// Print individual instance when non-probe action is pending
print_full = TRUE;
} else if (partially_active == FALSE) {
// List stopped instances when requested (except orphans)
if (!pcmk_is_set(child_rsc->flags, pcmk__rsc_removed)
&& !pcmk_is_set(show_opts, pcmk_show_clone_detail)
&& pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
if (stopped == NULL) {
stopped = pcmk__strkey_table(free, free);
}
pcmk__insert_dup(stopped, child_rsc->id, "Stopped");
}
} else if (is_set_recursive(child_rsc, pcmk__rsc_removed, TRUE)
|| !is_set_recursive(child_rsc, pcmk__rsc_managed, FALSE)
|| is_set_recursive(child_rsc, pcmk__rsc_failed, TRUE)) {
// Print individual instance when active orphaned/unmanaged/failed
print_full = TRUE;
} else if (child_rsc->priv->fns->active(child_rsc, TRUE)) {
// Instance of fully active anonymous clone
pcmk_node_t *location = NULL;
location = child_rsc->priv->fns->location(child_rsc, NULL,
pcmk__rsc_node_current);
if (location) {
// Instance is active on a single node
enum rsc_role_e a_role;
a_role = child_rsc->priv->fns->state(child_rsc, TRUE);
if (location->details->online == FALSE && location->details->unclean) {
print_full = TRUE;
} else if (a_role > pcmk_role_unpromoted) {
promoted_list = g_list_append(promoted_list, location);
} else {
started_list = g_list_append(started_list, location);
}
} else {
/* uncolocated group - bleh */
print_full = TRUE;
}
} else {
// Instance of partially active anonymous clone
print_full = TRUE;
}
if (print_full) {
GList *all = NULL;
clone_header(out, &rc, rsc, clone_data, desc);
/* Print every resource that's a child of this clone. */
all = g_list_prepend(all, (gpointer) "*");
out->message(out, (const char *) child_rsc->priv->xml->name,
show_opts, child_rsc, only_node, all);
g_list_free(all);
}
}
if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return pcmk_rc_ok;
}
/* Promoted */
promoted_list = g_list_sort(promoted_list, pe__cmp_node_name);
for (gIter = promoted_list; gIter; gIter = gIter->next) {
pcmk_node_t *host = gIter->data;
if (!pcmk__str_in_list(host->priv->name, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
pcmk__add_word(&list_text, 1024, host->priv->name);
active_instances++;
}
g_list_free(promoted_list);
if ((list_text != NULL) && (list_text->len > 0)) {
clone_header(out, &rc, rsc, clone_data, desc);
out->list_item(out, NULL, PCMK_ROLE_PROMOTED ": [ %s ]",
(const char *) list_text->str);
g_string_truncate(list_text, 0);
}
/* Started/Unpromoted */
started_list = g_list_sort(started_list, pe__cmp_node_name);
for (gIter = started_list; gIter; gIter = gIter->next) {
pcmk_node_t *host = gIter->data;
if (!pcmk__str_in_list(host->priv->name, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
pcmk__add_word(&list_text, 1024, host->priv->name);
active_instances++;
}
g_list_free(started_list);
if ((list_text != NULL) && (list_text->len > 0)) {
clone_header(out, &rc, rsc, clone_data, desc);
if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
enum rsc_role_e role = configured_role(rsc);
if (role == pcmk_role_unpromoted) {
out->list_item(out, NULL,
PCMK_ROLE_UNPROMOTED
" (" PCMK_META_TARGET_ROLE "): [ %s ]",
(const char *) list_text->str);
} else {
out->list_item(out, NULL, PCMK_ROLE_UNPROMOTED ": [ %s ]",
(const char *) list_text->str);
}
} else {
out->list_item(out, NULL, "Started: [ %s ]",
(const char *) list_text->str);
}
}
if (list_text != NULL) {
g_string_free(list_text, TRUE);
}
if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)
&& (clone_data->clone_max > active_instances)) {
GList *nIter;
GList *list = g_hash_table_get_values(rsc->priv->allowed_nodes);
/* Custom stopped table for non-unique clones */
if (stopped != NULL) {
g_hash_table_destroy(stopped);
stopped = NULL;
}
if (list == NULL) {
/* Clusters with PCMK_OPT_SYMMETRIC_CLUSTER=false haven't
* calculated allowed nodes yet. If we've not probed for them
* yet, the Stopped list will be empty.
*/
list = g_hash_table_get_values(rsc->priv->probed_nodes);
}
list = g_list_sort(list, pe__cmp_node_name);
for (nIter = list; nIter != NULL; nIter = nIter->next) {
pcmk_node_t *node = (pcmk_node_t *) nIter->data;
if ((pcmk__find_node_in_list(rsc->priv->active_nodes,
node->priv->name) == NULL)
&& pcmk__str_in_list(node->priv->name, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
xmlNode *probe_op = NULL;
const char *state = "Stopped";
if (configured_role(rsc) == pcmk_role_stopped) {
state = "Stopped (disabled)";
}
if (stopped == NULL) {
stopped = pcmk__strkey_table(free, free);
}
probe_op = pe__failed_probe_for_rsc(rsc,
node->priv->name);
if (probe_op != NULL) {
int rc;
pcmk__scan_min_int(crm_element_value(probe_op,
PCMK__XA_RC_CODE),
&rc, 0);
g_hash_table_insert(stopped, strdup(node->priv->name),
- crm_strdup_printf("Stopped (%s)", services_ocf_exitcode_str(rc)));
+ crm_strdup_printf("Stopped (%s)",
+ crm_exit_str(rc)));
} else {
pcmk__insert_dup(stopped, node->priv->name, state);
}
}
}
g_list_free(list);
}
if (stopped != NULL) {
GList *list = sorted_hash_table_values(stopped);
clone_header(out, &rc, rsc, clone_data, desc);
for (GList *status_iter = list; status_iter != NULL; status_iter = status_iter->next) {
const char *status = status_iter->data;
GList *nodes = nodes_with_status(stopped, status);
GString *nodes_str = node_list_to_str(nodes);
if (nodes_str != NULL) {
if (nodes_str->len > 0) {
out->list_item(out, NULL, "%s: [ %s ]", status,
(const char *) nodes_str->str);
}
g_string_free(nodes_str, TRUE);
}
g_list_free(nodes);
}
g_list_free(list);
g_hash_table_destroy(stopped);
/* If there are no instances of this clone (perhaps because there are no
* nodes configured), simply output the clone header by itself. This can
* come up in PCS testing.
*/
} else if (active_instances == 0) {
clone_header(out, &rc, rsc, clone_data, desc);
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
void
clone_free(pcmk_resource_t * rsc)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pcmk__rsc_trace(rsc, "Freeing %s", rsc->id);
for (GList *gIter = rsc->priv->children;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *child_rsc = (pcmk_resource_t *) gIter->data;
pcmk__assert(child_rsc != NULL);
pcmk__rsc_trace(child_rsc, "Freeing child %s", child_rsc->id);
pcmk__xml_free(child_rsc->priv->xml);
child_rsc->priv->xml = NULL;
/* There could be a saved unexpanded xml */
pcmk__xml_free(child_rsc->priv->orig_xml);
child_rsc->priv->orig_xml = NULL;
child_rsc->priv->fns->free(child_rsc);
}
g_list_free(rsc->priv->children);
if (clone_data) {
pcmk__assert((clone_data->demote_notify == NULL)
&& (clone_data->stop_notify == NULL)
&& (clone_data->start_notify == NULL)
&& (clone_data->promote_notify == NULL));
}
common_free(rsc);
}
enum rsc_role_e
clone_resource_state(const pcmk_resource_t * rsc, gboolean current)
{
enum rsc_role_e clone_role = pcmk_role_unknown;
for (GList *gIter = rsc->priv->children;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *child_rsc = (pcmk_resource_t *) gIter->data;
enum rsc_role_e a_role = child_rsc->priv->fns->state(child_rsc,
current);
if (a_role > clone_role) {
clone_role = a_role;
}
}
pcmk__rsc_trace(rsc, "%s role: %s", rsc->id, pcmk_role_text(clone_role));
return clone_role;
}
/*!
* \internal
* \brief Check whether a clone has an instance for every node
*
* \param[in] rsc Clone to check
* \param[in] scheduler Scheduler data
*/
bool
pe__is_universal_clone(const pcmk_resource_t *rsc,
const pcmk_scheduler_t *scheduler)
{
if (pcmk__is_clone(rsc)) {
clone_variant_data_t *clone_data = rsc->priv->variant_opaque;
if (clone_data->clone_max == g_list_length(scheduler->nodes)) {
return TRUE;
}
}
return FALSE;
}
gboolean
pe__clone_is_filtered(const pcmk_resource_t *rsc, GList *only_rsc,
gboolean check_parent)
{
gboolean passes = FALSE;
clone_variant_data_t *clone_data = NULL;
if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) {
passes = TRUE;
} else {
get_clone_variant_data(clone_data, rsc);
passes = pcmk__str_in_list(pcmk__xe_id(clone_data->xml_obj_child),
only_rsc, pcmk__str_star_matches);
if (!passes) {
for (const GList *iter = rsc->priv->children;
iter != NULL; iter = iter->next) {
const pcmk_resource_t *child_rsc = NULL;
child_rsc = (const pcmk_resource_t *) iter->data;
if (!child_rsc->priv->fns->is_filtered(child_rsc, only_rsc,
FALSE)) {
passes = TRUE;
break;
}
}
}
}
return !passes;
}
const char *
pe__clone_child_id(const pcmk_resource_t *rsc)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
return pcmk__xe_id(clone_data->xml_obj_child);
}
/*!
* \internal
* \brief Check whether a clone is ordered
*
* \param[in] clone Clone resource to check
*
* \return true if clone is ordered, otherwise false
*/
bool
pe__clone_is_ordered(const pcmk_resource_t *clone)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
return pcmk_is_set(clone_data->flags, pcmk__clone_ordered);
}
/*!
* \internal
* \brief Set a clone flag
*
* \param[in,out] clone Clone resource to set flag for
* \param[in] flag Clone flag to set
*
* \return Standard Pacemaker return code (either pcmk_rc_ok if flag was not
* already set or pcmk_rc_already if it was)
*/
int
pe__set_clone_flag(pcmk_resource_t *clone, enum pcmk__clone_flags flag)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
if (pcmk_is_set(clone_data->flags, flag)) {
return pcmk_rc_already;
}
clone_data->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,
"Clone", clone->id,
clone_data->flags, flag, "flag");
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Check whether a clone flag is set
*
* \param[in] group Clone resource to check
* \param[in] flags Flag or flags to check
*
* \return \c true if all \p flags are set for \p clone, otherwise \c false
*/
bool
pe__clone_flag_is_set(const pcmk_resource_t *clone, uint32_t flags)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
pcmk__assert(clone_data != NULL);
return pcmk_all_flags_set(clone_data->flags, flags);
}
/*!
* \internal
* \brief Create pseudo-actions needed for promotable clones
*
* \param[in,out] clone Promotable clone to create actions for
* \param[in] any_promoting Whether any instances will be promoted
* \param[in] any_demoting Whether any instance will be demoted
*/
void
pe__create_promotable_pseudo_ops(pcmk_resource_t *clone, bool any_promoting,
bool any_demoting)
{
pcmk_action_t *action = NULL;
pcmk_action_t *action_complete = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
// Create a "promote" action for the clone itself
action = pe__new_rsc_pseudo_action(clone, PCMK_ACTION_PROMOTE,
!any_promoting, true);
// Create a "promoted" action for when all promotions are done
action_complete = pe__new_rsc_pseudo_action(clone, PCMK_ACTION_PROMOTED,
!any_promoting, true);
action_complete->priority = PCMK_SCORE_INFINITY;
// Create notification pseudo-actions for promotion
if (clone_data->promote_notify == NULL) {
clone_data->promote_notify = pe__action_notif_pseudo_ops(clone,
PCMK_ACTION_PROMOTE,
action,
action_complete);
}
// Create a "demote" action for the clone itself
action = pe__new_rsc_pseudo_action(clone, PCMK_ACTION_DEMOTE,
!any_demoting, true);
// Create a "demoted" action for when all demotions are done
action_complete = pe__new_rsc_pseudo_action(clone, PCMK_ACTION_DEMOTED,
!any_demoting, true);
action_complete->priority = PCMK_SCORE_INFINITY;
// Create notification pseudo-actions for demotion
if (clone_data->demote_notify == NULL) {
clone_data->demote_notify = pe__action_notif_pseudo_ops(clone,
PCMK_ACTION_DEMOTE,
action,
action_complete);
if (clone_data->promote_notify != NULL) {
order_actions(clone_data->stop_notify->post_done,
clone_data->promote_notify->pre, pcmk__ar_ordered);
order_actions(clone_data->start_notify->post_done,
clone_data->promote_notify->pre, pcmk__ar_ordered);
order_actions(clone_data->demote_notify->post_done,
clone_data->promote_notify->pre, pcmk__ar_ordered);
order_actions(clone_data->demote_notify->post_done,
clone_data->start_notify->pre, pcmk__ar_ordered);
order_actions(clone_data->demote_notify->post_done,
clone_data->stop_notify->pre, pcmk__ar_ordered);
}
}
}
/*!
* \internal
* \brief Create all notification data and actions for a clone
*
* \param[in,out] clone Clone to create notifications for
*/
void
pe__create_clone_notifications(pcmk_resource_t *clone)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
pe__create_action_notifications(clone, clone_data->start_notify);
pe__create_action_notifications(clone, clone_data->stop_notify);
pe__create_action_notifications(clone, clone_data->promote_notify);
pe__create_action_notifications(clone, clone_data->demote_notify);
}
/*!
* \internal
* \brief Free all notification data for a clone
*
* \param[in,out] clone Clone to free notification data for
*/
void
pe__free_clone_notification_data(pcmk_resource_t *clone)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
pe__free_action_notification_data(clone_data->demote_notify);
clone_data->demote_notify = NULL;
pe__free_action_notification_data(clone_data->stop_notify);
clone_data->stop_notify = NULL;
pe__free_action_notification_data(clone_data->start_notify);
clone_data->start_notify = NULL;
pe__free_action_notification_data(clone_data->promote_notify);
clone_data->promote_notify = NULL;
}
/*!
* \internal
* \brief Create pseudo-actions for clone start/stop notifications
*
* \param[in,out] clone Clone to create pseudo-actions for
* \param[in,out] start Start action for \p clone
* \param[in,out] stop Stop action for \p clone
* \param[in,out] started Started action for \p clone
* \param[in,out] stopped Stopped action for \p clone
*/
void
pe__create_clone_notif_pseudo_ops(pcmk_resource_t *clone,
pcmk_action_t *start, pcmk_action_t *started,
pcmk_action_t *stop, pcmk_action_t *stopped)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, clone);
if (clone_data->start_notify == NULL) {
clone_data->start_notify = pe__action_notif_pseudo_ops(clone,
PCMK_ACTION_START,
start, started);
}
if (clone_data->stop_notify == NULL) {
clone_data->stop_notify = pe__action_notif_pseudo_ops(clone,
PCMK_ACTION_STOP,
stop, stopped);
if ((clone_data->start_notify != NULL)
&& (clone_data->stop_notify != NULL)) {
order_actions(clone_data->stop_notify->post_done,
clone_data->start_notify->pre, pcmk__ar_ordered);
}
}
}
/*!
* \internal
* \brief Get maximum clone resource instances per node
*
* \param[in] rsc Clone resource to check
*
* \return Maximum number of \p rsc instances that can be active on one node
*/
unsigned int
pe__clone_max_per_node(const pcmk_resource_t *rsc)
{
const clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
return clone_data->clone_node_max;
}
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
index 45eb62dfe2..e2b8b32a95 100644
--- a/lib/pengine/native.c
+++ b/lib/pengine/native.c
@@ -1,1179 +1,1178 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
/*!
* \internal
* \brief Check whether a resource is active on multiple nodes
*/
static bool
is_multiply_active(const pcmk_resource_t *rsc)
{
unsigned int count = 0;
if (pcmk__is_primitive(rsc)) {
pe__find_active_requires(rsc, &count);
}
return count > 1;
}
static void
native_priority_to_node(pcmk_resource_t *rsc, pcmk_node_t *node,
gboolean failed)
{
int priority = 0;
const bool promoted = (rsc->priv->orig_role == pcmk_role_promoted);
if ((rsc->priv->priority == 0) || failed) {
return;
}
if (promoted) {
// Promoted instance takes base priority + 1
priority = rsc->priv->priority + 1;
} else {
priority = rsc->priv->priority;
}
node->priv->priority += priority;
pcmk__rsc_trace(rsc, "%s now has priority %d with %s'%s' (priority: %d%s)",
pcmk__node_name(node), node->priv->priority,
(promoted? "promoted " : ""),
rsc->id, rsc->priv->priority, (promoted? " + 1" : ""));
/* Priority of a resource running on a guest node is added to the cluster
* node as well. */
if ((node->priv->remote != NULL)
&& (node->priv->remote->priv->launcher != NULL)) {
const pcmk_resource_t *launcher = NULL;
launcher = node->priv->remote->priv->launcher;
for (GList *gIter = launcher->priv->active_nodes;
gIter != NULL; gIter = gIter->next) {
pcmk_node_t *a_node = gIter->data;
a_node->priv->priority += priority;
pcmk__rsc_trace(rsc,
"%s now has priority %d with %s'%s' "
"(priority: %d%s) from guest node %s",
pcmk__node_name(a_node), a_node->priv->priority,
(promoted? "promoted " : ""), rsc->id,
rsc->priv->priority, (promoted? " + 1" : ""),
pcmk__node_name(node));
}
}
}
void
native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node,
pcmk_scheduler_t *scheduler, gboolean failed)
{
pcmk_resource_t *parent = rsc->priv->parent;
CRM_CHECK(node != NULL, return);
for (GList *gIter = rsc->priv->active_nodes;
gIter != NULL; gIter = gIter->next) {
pcmk_node_t *a_node = (pcmk_node_t *) gIter->data;
if (pcmk__same_node(a_node, node)) {
return;
}
}
pcmk__rsc_trace(rsc, "Adding %s to %s %s", rsc->id, pcmk__node_name(node),
pcmk_is_set(rsc->flags, pcmk__rsc_managed)? "" : "(unmanaged)");
rsc->priv->active_nodes = g_list_append(rsc->priv->active_nodes, node);
if (pcmk__is_primitive(rsc)) {
node->details->running_rsc = g_list_append(node->details->running_rsc, rsc);
native_priority_to_node(rsc, node, failed);
if (node->details->maintenance) {
pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
pcmk__set_rsc_flags(rsc, pcmk__rsc_maintenance);
}
}
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
pcmk_resource_t *p = parent;
pcmk__rsc_info(rsc, "resource %s isn't managed", rsc->id);
resource_location(rsc, node, PCMK_SCORE_INFINITY,
"not_managed_default", scheduler);
while(p && node->details->online) {
/* add without the additional location constraint */
p->priv->active_nodes = g_list_append(p->priv->active_nodes, node);
p = p->priv->parent;
}
return;
}
if (is_multiply_active(rsc)) {
switch (rsc->priv->multiply_active_policy) {
case pcmk__multiply_active_stop:
{
GHashTableIter gIter;
pcmk_node_t *local_node = NULL;
/* make sure it doesn't come up again */
if (rsc->priv->allowed_nodes != NULL) {
g_hash_table_destroy(rsc->priv->allowed_nodes);
}
rsc->priv->allowed_nodes =
pe__node_list2table(scheduler->nodes);
g_hash_table_iter_init(&gIter, rsc->priv->allowed_nodes);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&local_node)) {
local_node->assign->score = -PCMK_SCORE_INFINITY;
}
}
break;
case pcmk__multiply_active_block:
pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked);
/* If the resource belongs to a group or bundle configured with
* PCMK_META_MULTIPLE_ACTIVE=PCMK_VALUE_BLOCK, block the entire
* entity.
*/
if ((pcmk__is_group(parent) || pcmk__is_bundle(parent))
&& (parent->priv->multiply_active_policy
== pcmk__multiply_active_block)) {
for (GList *gIter = parent->priv->children;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *child = gIter->data;
pcmk__clear_rsc_flags(child, pcmk__rsc_managed);
pcmk__set_rsc_flags(child, pcmk__rsc_blocked);
}
}
break;
// pcmk__multiply_active_restart, pcmk__multiply_active_unexpected
default:
/* The scheduler will do the right thing because the relevant
* variables and flags are set when unpacking the history.
*/
break;
}
crm_debug("%s is active on multiple nodes including %s: %s",
rsc->id, pcmk__node_name(node),
pcmk__multiply_active_text(rsc));
} else {
pcmk__rsc_trace(rsc, "Resource %s is active on %s",
rsc->id, pcmk__node_name(node));
}
if (parent != NULL) {
native_add_running(parent, node, scheduler, FALSE);
}
}
static void
recursive_clear_unique(pcmk_resource_t *rsc, gpointer user_data)
{
pcmk__clear_rsc_flags(rsc, pcmk__rsc_unique);
pcmk__insert_meta(rsc->priv, PCMK_META_GLOBALLY_UNIQUE,
PCMK_VALUE_FALSE);
g_list_foreach(rsc->priv->children, (GFunc) recursive_clear_unique,
NULL);
}
gboolean
native_unpack(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
{
pcmk_resource_t *parent = uber_parent(rsc);
const char *standard = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS);
uint32_t ra_caps = pcmk_get_ra_caps(standard);
pcmk__rsc_trace(rsc, "Processing resource %s...", rsc->id);
// Only some agent standards support unique and promotable clones
if (!pcmk_is_set(ra_caps, pcmk_ra_cap_unique)
&& pcmk_is_set(rsc->flags, pcmk__rsc_unique)
&& pcmk__is_clone(parent)) {
/* @COMPAT We should probably reject this situation as an error (as we
* do for promotable below) rather than warn and convert, but that would
* be a backward-incompatible change that we should probably do with a
* transform at a schema major version bump.
*/
pe__force_anon(standard, parent, rsc->id, scheduler);
/* Clear PCMK_META_GLOBALLY_UNIQUE on the parent and all its descendants
* unpacked so far (clearing the parent should make any future children
* unpacking correct). We have to clear this resource explicitly because
* it isn't hooked into the parent's children yet.
*/
recursive_clear_unique(parent, NULL);
recursive_clear_unique(rsc, NULL);
}
if (!pcmk_is_set(ra_caps, pcmk_ra_cap_promotable)
&& pcmk_is_set(parent->flags, pcmk__rsc_promotable)) {
pcmk__config_err("Resource %s is of type %s and therefore "
"cannot be used as a promotable clone resource",
rsc->id, standard);
return FALSE;
}
return TRUE;
}
static bool
rsc_is_on_node(pcmk_resource_t *rsc, const pcmk_node_t *node, int flags)
{
pcmk__rsc_trace(rsc, "Checking whether %s is on %s",
rsc->id, pcmk__node_name(node));
if (pcmk_is_set(flags, pcmk_rsc_match_current_node)
&& (rsc->priv->active_nodes != NULL)) {
for (GList *iter = rsc->priv->active_nodes;
iter != NULL; iter = iter->next) {
if (pcmk__same_node((pcmk_node_t *) iter->data, node)) {
return true;
}
}
} else if (!pcmk_is_set(flags, pcmk_rsc_match_current_node)
&& (rsc->priv->assigned_node != NULL)
&& pcmk__same_node(rsc->priv->assigned_node, node)) {
return true;
}
return false;
}
pcmk_resource_t *
native_find_rsc(pcmk_resource_t *rsc, const char *id,
const pcmk_node_t *on_node, int flags)
{
bool match = false;
pcmk_resource_t *result = NULL;
CRM_CHECK(id && rsc && rsc->id, return NULL);
if (pcmk_is_set(flags, pcmk_rsc_match_clone_only)) {
const char *rid = pcmk__xe_id(rsc->priv->xml);
if (!pcmk__is_clone(pe__const_top_resource(rsc, false))) {
match = false;
} else if (!strcmp(id, rsc->id) || pcmk__str_eq(id, rid, pcmk__str_none)) {
match = true;
}
} else if (!strcmp(id, rsc->id)) {
match = true;
} else if (pcmk_is_set(flags, pcmk_rsc_match_history)
&& pcmk__str_eq(rsc->priv->history_id, id, pcmk__str_none)) {
match = true;
} else if (pcmk_is_set(flags, pcmk_rsc_match_basename)
|| (pcmk_is_set(flags, pcmk_rsc_match_anon_basename)
&& !pcmk_is_set(rsc->flags, pcmk__rsc_unique))) {
match = pe_base_name_eq(rsc, id);
}
if (match && on_node) {
if (!rsc_is_on_node(rsc, on_node, flags)) {
match = false;
}
}
if (match) {
return rsc;
}
for (GList *gIter = rsc->priv->children;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *child = (pcmk_resource_t *) gIter->data;
result = rsc->priv->fns->find_rsc(child, id, on_node, flags);
if (result) {
return result;
}
}
return NULL;
}
// create is ignored
char *
native_parameter(pcmk_resource_t *rsc, pcmk_node_t *node, gboolean create,
const char *name, pcmk_scheduler_t *scheduler)
{
const char *value = NULL;
GHashTable *params = NULL;
CRM_CHECK(rsc != NULL, return NULL);
CRM_CHECK(name != NULL && strlen(name) != 0, return NULL);
pcmk__rsc_trace(rsc, "Looking up %s in %s", name, rsc->id);
params = pe_rsc_params(rsc, node, scheduler);
value = g_hash_table_lookup(params, name);
if (value == NULL) {
/* try meta attributes instead */
value = g_hash_table_lookup(rsc->priv->meta, name);
}
return pcmk__str_copy(value);
}
gboolean
native_active(pcmk_resource_t * rsc, gboolean all)
{
for (GList *gIter = rsc->priv->active_nodes;
gIter != NULL; gIter = gIter->next) {
pcmk_node_t *a_node = (pcmk_node_t *) gIter->data;
if (a_node->details->unclean) {
pcmk__rsc_trace(rsc, "Resource %s: %s is unclean",
rsc->id, pcmk__node_name(a_node));
return TRUE;
} else if (!a_node->details->online
&& pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
pcmk__rsc_trace(rsc, "Resource %s: %s is offline",
rsc->id, pcmk__node_name(a_node));
} else {
pcmk__rsc_trace(rsc, "Resource %s active on %s",
rsc->id, pcmk__node_name(a_node));
return TRUE;
}
}
return FALSE;
}
struct print_data_s {
long options;
void *print_data;
};
static const char *
native_pending_state(const pcmk_resource_t *rsc)
{
const char *pending_state = NULL;
if (pcmk__str_eq(rsc->priv->pending_action, PCMK_ACTION_START,
pcmk__str_none)) {
pending_state = "Starting";
} else if (pcmk__str_eq(rsc->priv->pending_action, PCMK_ACTION_STOP,
pcmk__str_none)) {
pending_state = "Stopping";
} else if (pcmk__str_eq(rsc->priv->pending_action, PCMK_ACTION_MIGRATE_TO,
pcmk__str_none)) {
pending_state = "Migrating";
} else if (pcmk__str_eq(rsc->priv->pending_action,
PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) {
/* Work might be done in here. */
pending_state = "Migrating";
} else if (pcmk__str_eq(rsc->priv->pending_action, PCMK_ACTION_PROMOTE,
pcmk__str_none)) {
pending_state = "Promoting";
} else if (pcmk__str_eq(rsc->priv->pending_action, PCMK_ACTION_DEMOTE,
pcmk__str_none)) {
pending_state = "Demoting";
}
return pending_state;
}
static const char *
native_pending_action(const pcmk_resource_t *rsc)
{
const char *pending_action = NULL;
if (pcmk__str_eq(rsc->priv->pending_action, PCMK_ACTION_MONITOR,
pcmk__str_none)) {
pending_action = "Monitoring";
/* Pending probes are not printed, even if pending
* operations are requested. If someone ever requests that
* behavior, uncomment this and the corresponding part of
* unpack.c:unpack_rsc_op().
*/
#if 0
} else if (pcmk__str_eq(rsc->private->pending_action, "probe",
pcmk__str_none)) {
pending_action = "Checking";
#endif
}
return pending_action;
}
static enum rsc_role_e
native_displayable_role(const pcmk_resource_t *rsc)
{
enum rsc_role_e role = rsc->priv->orig_role;
if ((role == pcmk_role_started)
&& pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk__rsc_promotable)) {
role = pcmk_role_unpromoted;
}
return role;
}
static const char *
native_displayable_state(const pcmk_resource_t *rsc, bool print_pending)
{
const char *rsc_state = NULL;
if (print_pending) {
rsc_state = native_pending_state(rsc);
}
if (rsc_state == NULL) {
rsc_state = pcmk_role_text(native_displayable_role(rsc));
}
return rsc_state;
}
// Append a flag to resource description string's flags list
static bool
add_output_flag(GString *s, const char *flag_desc, bool have_flags)
{
g_string_append(s, (have_flags? ", " : " ("));
g_string_append(s, flag_desc);
return true;
}
// Append a node name to resource description string's node list
static bool
add_output_node(GString *s, const char *node, bool have_nodes)
{
g_string_append(s, (have_nodes? " " : " [ "));
g_string_append(s, node);
return true;
}
/*!
* \internal
* \brief Create a string description of a resource
*
* \param[in] rsc Resource to describe
* \param[in] name Desired identifier for the resource
* \param[in] node If not NULL, node that resource is "on"
* \param[in] show_opts Bitmask of pcmk_show_opt_e.
* \param[in] target_role Resource's target role
* \param[in] show_nodes Whether to display nodes when multiply active
*
* \return Newly allocated string description of resource
* \note Caller must free the result with g_free().
*/
gchar *
pcmk__native_output_string(const pcmk_resource_t *rsc, const char *name,
const pcmk_node_t *node, uint32_t show_opts,
const char *target_role, bool show_nodes)
{
const char *class = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS);
const char *provider = NULL;
const char *kind = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE);
GString *outstr = NULL;
bool have_flags = false;
if (!pcmk__is_primitive(rsc)) {
return NULL;
}
CRM_CHECK(name != NULL, name = "unknown");
CRM_CHECK(kind != NULL, kind = "unknown");
CRM_CHECK(class != NULL, class = "unknown");
if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
provider = crm_element_value(rsc->priv->xml, PCMK_XA_PROVIDER);
}
if ((node == NULL) && (rsc->priv->lock_node != NULL)) {
node = rsc->priv->lock_node;
}
if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only)
|| pcmk__list_of_multiple(rsc->priv->active_nodes)) {
node = NULL;
}
outstr = g_string_sized_new(128);
// Resource name and agent
pcmk__g_strcat(outstr,
name, "\t(", class, ((provider == NULL)? "" : ":"),
pcmk__s(provider, ""), ":", kind, "):\t", NULL);
// State on node
if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
g_string_append(outstr, " ORPHANED");
}
if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
enum rsc_role_e role = native_displayable_role(rsc);
g_string_append(outstr, " FAILED");
if (role > pcmk_role_unpromoted) {
pcmk__add_word(&outstr, 0, pcmk_role_text(role));
}
} else {
bool show_pending = pcmk_is_set(show_opts, pcmk_show_pending);
pcmk__add_word(&outstr, 0, native_displayable_state(rsc, show_pending));
}
if (node) {
pcmk__add_word(&outstr, 0, pcmk__node_name(node));
}
// Failed probe operation
if (native_displayable_role(rsc) == pcmk_role_stopped) {
xmlNode *probe_op = pe__failed_probe_for_rsc(rsc,
node ? node->priv->name : NULL);
if (probe_op != NULL) {
int rc;
pcmk__scan_min_int(crm_element_value(probe_op, PCMK__XA_RC_CODE),
&rc, 0);
- pcmk__g_strcat(outstr, " (", services_ocf_exitcode_str(rc), ") ",
- NULL);
+ pcmk__g_strcat(outstr, " (", crm_exit_str(rc), ") ", NULL);
}
}
// Flags, as: ( [...])
if (node && !(node->details->online) && node->details->unclean) {
have_flags = add_output_flag(outstr, "UNCLEAN", have_flags);
}
if ((node != NULL) && pcmk__same_node(node, rsc->priv->lock_node)) {
have_flags = add_output_flag(outstr, "LOCKED", have_flags);
}
if (pcmk_is_set(show_opts, pcmk_show_pending)) {
const char *pending_action = native_pending_action(rsc);
if (pending_action != NULL) {
have_flags = add_output_flag(outstr, pending_action, have_flags);
}
}
if (target_role != NULL) {
switch (pcmk_parse_role(target_role)) {
case pcmk_role_unknown:
pcmk__config_err("Invalid " PCMK_META_TARGET_ROLE
" %s for resource %s", target_role, rsc->id);
break;
case pcmk_role_stopped:
have_flags = add_output_flag(outstr, "disabled", have_flags);
break;
case pcmk_role_unpromoted:
if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk__rsc_promotable)) {
have_flags = add_output_flag(outstr,
PCMK_META_TARGET_ROLE ":",
have_flags);
g_string_append(outstr, target_role);
}
break;
default:
/* Only show target role if it limits our abilities (i.e. ignore
* Started, as it is the default anyways, and doesn't prevent
* the resource from becoming promoted).
*/
break;
}
}
// Blocked or maintenance implies unmanaged
if (pcmk_any_flags_set(rsc->flags,
pcmk__rsc_blocked|pcmk__rsc_maintenance)) {
if (pcmk_is_set(rsc->flags, pcmk__rsc_blocked)) {
have_flags = add_output_flag(outstr, "blocked", have_flags);
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_maintenance)) {
have_flags = add_output_flag(outstr, "maintenance", have_flags);
}
} else if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
have_flags = add_output_flag(outstr, "unmanaged", have_flags);
}
if (pcmk_is_set(rsc->flags, pcmk__rsc_ignore_failure)) {
have_flags = add_output_flag(outstr, "failure ignored", have_flags);
}
if (have_flags) {
g_string_append_c(outstr, ')');
}
// User-supplied description
if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only|pcmk_show_description)
|| pcmk__list_of_multiple(rsc->priv->active_nodes)) {
const char *desc = crm_element_value(rsc->priv->xml,
PCMK_XA_DESCRIPTION);
if (desc) {
g_string_append(outstr, " (");
g_string_append(outstr, desc);
g_string_append(outstr, ")");
}
}
if (show_nodes && !pcmk_is_set(show_opts, pcmk_show_rsc_only)
&& pcmk__list_of_multiple(rsc->priv->active_nodes)) {
bool have_nodes = false;
for (GList *iter = rsc->priv->active_nodes;
iter != NULL; iter = iter->next) {
pcmk_node_t *n = (pcmk_node_t *) iter->data;
have_nodes = add_output_node(outstr, n->priv->name, have_nodes);
}
if (have_nodes) {
g_string_append(outstr, " ]");
}
}
return g_string_free(outstr, FALSE);
}
int
pe__common_output_html(pcmk__output_t *out, const pcmk_resource_t *rsc,
const char *name, const pcmk_node_t *node,
uint32_t show_opts)
{
const char *kind = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE);
const char *target_role = NULL;
const char *cl = NULL;
xmlNode *child = NULL;
gchar *content = NULL;
pcmk__assert((kind != NULL) && pcmk__is_primitive(rsc));
if (crm_is_true(g_hash_table_lookup(rsc->priv->meta,
PCMK__META_INTERNAL_RSC))
&& !pcmk_is_set(show_opts, pcmk_show_implicit_rscs)) {
crm_trace("skipping print of internal resource %s", rsc->id);
return pcmk_rc_no_output;
}
target_role = g_hash_table_lookup(rsc->priv->meta,
PCMK_META_TARGET_ROLE);
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
cl = PCMK__VALUE_RSC_MANAGED;
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
cl = PCMK__VALUE_RSC_FAILED;
} else if (pcmk__is_primitive(rsc)
&& (rsc->priv->active_nodes == NULL)) {
cl = PCMK__VALUE_RSC_FAILED;
} else if (pcmk__list_of_multiple(rsc->priv->active_nodes)) {
cl = PCMK__VALUE_RSC_MULTIPLE;
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_ignore_failure)) {
cl = PCMK__VALUE_RSC_FAILURE_IGNORED;
} else {
cl = PCMK__VALUE_RSC_OK;
}
child = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL);
child = pcmk__html_create(child, PCMK__XE_SPAN, NULL, cl);
content = pcmk__native_output_string(rsc, name, node, show_opts,
target_role, true);
pcmk__xe_set_content(child, "%s", content);
g_free(content);
return pcmk_rc_ok;
}
int
pe__common_output_text(pcmk__output_t *out, const pcmk_resource_t *rsc,
const char *name, const pcmk_node_t *node,
uint32_t show_opts)
{
const char *target_role = NULL;
pcmk__assert(pcmk__is_primitive(rsc));
if (crm_is_true(g_hash_table_lookup(rsc->priv->meta,
PCMK__META_INTERNAL_RSC))
&& !pcmk_is_set(show_opts, pcmk_show_implicit_rscs)) {
crm_trace("skipping print of internal resource %s", rsc->id);
return pcmk_rc_no_output;
}
target_role = g_hash_table_lookup(rsc->priv->meta,
PCMK_META_TARGET_ROLE);
{
gchar *s = pcmk__native_output_string(rsc, name, node, show_opts,
target_role, true);
out->list_item(out, NULL, "%s", s);
g_free(s);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("primitive", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__resource_xml(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node G_GNUC_UNUSED = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
int rc = pcmk_rc_no_output;
bool print_pending = pcmk_is_set(show_opts, pcmk_show_pending);
const char *class = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS);
const char *prov = crm_element_value(rsc->priv->xml, PCMK_XA_PROVIDER);
char ra_name[LINE_MAX];
const char *rsc_state = native_displayable_state(rsc, print_pending);
const char *target_role = NULL;
const char *active = pcmk__btoa(rsc->priv->fns->active(rsc, TRUE));
const char *orphaned = pcmk__flag_text(rsc->flags, pcmk__rsc_removed);
const char *blocked = pcmk__flag_text(rsc->flags, pcmk__rsc_blocked);
const char *maintenance = pcmk__flag_text(rsc->flags,
pcmk__rsc_maintenance);
const char *managed = pcmk__flag_text(rsc->flags, pcmk__rsc_managed);
const char *failed = pcmk__flag_text(rsc->flags, pcmk__rsc_failed);
const char *ignored = pcmk__flag_text(rsc->flags, pcmk__rsc_ignore_failure);
char *nodes_running_on = NULL;
const char *pending = print_pending? native_pending_action(rsc) : NULL;
const char *locked_to = NULL;
const char *desc = pe__resource_description(rsc, show_opts);
pcmk__assert(pcmk__is_primitive(rsc));
if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) {
return pcmk_rc_no_output;
}
// Resource information
snprintf(ra_name, LINE_MAX, "%s%s%s:%s", class,
((prov == NULL)? "" : ":"), ((prov == NULL)? "" : prov),
crm_element_value(rsc->priv->xml, PCMK_XA_TYPE));
target_role = g_hash_table_lookup(rsc->priv->meta,
PCMK_META_TARGET_ROLE);
nodes_running_on = pcmk__itoa(g_list_length(rsc->priv->active_nodes));
if (rsc->priv->lock_node != NULL) {
locked_to = rsc->priv->lock_node->priv->name;
}
rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_RESOURCE,
PCMK_XA_ID, rsc_printable_id(rsc),
PCMK_XA_RESOURCE_AGENT, ra_name,
PCMK_XA_ROLE, rsc_state,
PCMK_XA_TARGET_ROLE, target_role,
PCMK_XA_ACTIVE, active,
PCMK_XA_ORPHANED, orphaned,
PCMK_XA_BLOCKED, blocked,
PCMK_XA_MAINTENANCE, maintenance,
PCMK_XA_MANAGED, managed,
PCMK_XA_FAILED, failed,
PCMK_XA_FAILURE_IGNORED, ignored,
PCMK_XA_NODES_RUNNING_ON, nodes_running_on,
PCMK_XA_PENDING, pending,
PCMK_XA_LOCKED_TO, locked_to,
PCMK_XA_DESCRIPTION, desc,
NULL);
free(nodes_running_on);
pcmk__assert(rc == pcmk_rc_ok);
for (GList *gIter = rsc->priv->active_nodes;
gIter != NULL; gIter = gIter->next) {
pcmk_node_t *node = (pcmk_node_t *) gIter->data;
const char *cached = pcmk__btoa(node->details->online);
rc = pe__name_and_nvpairs_xml(out, false, PCMK_XE_NODE,
PCMK_XA_NAME, node->priv->name,
PCMK_XA_ID, node->priv->id,
PCMK_XA_CACHED, cached,
NULL);
pcmk__assert(rc == pcmk_rc_ok);
}
pcmk__output_xml_pop_parent(out);
return rc;
}
PCMK__OUTPUT_ARGS("primitive", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__resource_html(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node G_GNUC_UNUSED = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
const pcmk_node_t *node = pcmk__current_node(rsc);
if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) {
return pcmk_rc_no_output;
}
pcmk__assert(pcmk__is_primitive(rsc));
if (node == NULL) {
// This is set only if a non-probe action is pending on this node
node = rsc->priv->pending_node;
}
return pe__common_output_html(out, rsc, rsc_printable_id(rsc), node, show_opts);
}
PCMK__OUTPUT_ARGS("primitive", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__resource_text(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node G_GNUC_UNUSED = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
const pcmk_node_t *node = pcmk__current_node(rsc);
pcmk__assert(pcmk__is_primitive(rsc));
if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) {
return pcmk_rc_no_output;
}
if (node == NULL) {
// This is set only if a non-probe action is pending on this node
node = rsc->priv->pending_node;
}
return pe__common_output_text(out, rsc, rsc_printable_id(rsc), node, show_opts);
}
void
native_free(pcmk_resource_t * rsc)
{
pcmk__rsc_trace(rsc, "Freeing resource action list (not the data)");
common_free(rsc);
}
enum rsc_role_e
native_resource_state(const pcmk_resource_t * rsc, gboolean current)
{
enum rsc_role_e role = rsc->priv->next_role;
if (current) {
role = rsc->priv->orig_role;
}
pcmk__rsc_trace(rsc, "%s state: %s", rsc->id, pcmk_role_text(role));
return role;
}
/*!
* \internal
* \brief List nodes where a resource (or any of its children) is
*
* \param[in] rsc Resource to check
* \param[out] list List to add result to
* \param[in] target Which resource conditions to target (group of
* enum pcmk__rsc_node flags)
*
* \return If list contains only one node, that node, or NULL otherwise
*/
pcmk_node_t *
native_location(const pcmk_resource_t *rsc, GList **list, uint32_t target)
{
pcmk_node_t *one = NULL;
GList *result = NULL;
if (rsc->priv->children != NULL) {
for (GList *gIter = rsc->priv->children;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *child = (pcmk_resource_t *) gIter->data;
child->priv->fns->location(child, &result, target);
}
} else {
if (pcmk_is_set(target, pcmk__rsc_node_current)) {
result = g_list_copy(rsc->priv->active_nodes);
}
if (pcmk_is_set(target, pcmk__rsc_node_pending)
&& (rsc->priv->pending_node != NULL)
&& !pe_find_node_id(result, rsc->priv->pending_node->priv->id)) {
result = g_list_append(result, (gpointer) rsc->priv->pending_node);
}
if (pcmk_is_set(target, pcmk__rsc_node_assigned)
&& (rsc->priv->assigned_node != NULL)) {
result = g_list_append(result, rsc->priv->assigned_node);
}
}
if (result && (result->next == NULL)) {
one = result->data;
}
if (list) {
GList *gIter = result;
for (; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *node = (pcmk_node_t *) gIter->data;
if ((*list == NULL)
|| (pe_find_node_id(*list, node->priv->id) == NULL)) {
*list = g_list_append(*list, node);
}
}
}
g_list_free(result);
return one;
}
static void
get_rscs_brief(GList *rsc_list, GHashTable * rsc_table, GHashTable * active_table)
{
GList *gIter = rsc_list;
for (; gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
const char *class = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS);
const char *kind = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE);
int offset = 0;
char buffer[LINE_MAX];
int *rsc_counter = NULL;
int *active_counter = NULL;
if (!pcmk__is_primitive(rsc)) {
continue;
}
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", class);
if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
const char *prov = crm_element_value(rsc->priv->xml,
PCMK_XA_PROVIDER);
if (prov != NULL) {
offset += snprintf(buffer + offset, LINE_MAX - offset,
":%s", prov);
}
}
offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s", kind);
CRM_LOG_ASSERT(offset > 0);
if (rsc_table) {
rsc_counter = g_hash_table_lookup(rsc_table, buffer);
if (rsc_counter == NULL) {
rsc_counter = pcmk__assert_alloc(1, sizeof(int));
*rsc_counter = 0;
g_hash_table_insert(rsc_table, strdup(buffer), rsc_counter);
}
(*rsc_counter)++;
}
if (active_table) {
for (GList *gIter2 = rsc->priv->active_nodes;
gIter2 != NULL; gIter2 = gIter2->next) {
pcmk_node_t *node = (pcmk_node_t *) gIter2->data;
GHashTable *node_table = NULL;
if (node->details->unclean == FALSE && node->details->online == FALSE &&
pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
continue;
}
node_table = g_hash_table_lookup(active_table,
node->priv->name);
if (node_table == NULL) {
node_table = pcmk__strkey_table(free, free);
g_hash_table_insert(active_table,
strdup(node->priv->name),
node_table);
}
active_counter = g_hash_table_lookup(node_table, buffer);
if (active_counter == NULL) {
active_counter = pcmk__assert_alloc(1, sizeof(int));
*active_counter = 0;
g_hash_table_insert(node_table, strdup(buffer), active_counter);
}
(*active_counter)++;
}
}
}
}
static void
destroy_node_table(gpointer data)
{
GHashTable *node_table = data;
if (node_table) {
g_hash_table_destroy(node_table);
}
}
int
pe__rscs_brief_output(pcmk__output_t *out, GList *rsc_list, uint32_t show_opts)
{
GHashTable *rsc_table = pcmk__strkey_table(free, free);
GHashTable *active_table = pcmk__strkey_table(free, destroy_node_table);
GList *sorted_rscs;
int rc = pcmk_rc_no_output;
get_rscs_brief(rsc_list, rsc_table, active_table);
/* Make a list of the rsc_table keys so that it can be sorted. This is to make sure
* output order stays consistent between systems.
*/
sorted_rscs = g_hash_table_get_keys(rsc_table);
sorted_rscs = g_list_sort(sorted_rscs, (GCompareFunc) strcmp);
for (GList *gIter = sorted_rscs; gIter; gIter = gIter->next) {
char *type = (char *) gIter->data;
int *rsc_counter = g_hash_table_lookup(rsc_table, type);
GList *sorted_nodes = NULL;
int active_counter_all = 0;
/* Also make a list of the active_table keys so it can be sorted. If there's
* more than one instance of a type of resource running, we need the nodes to
* be sorted to make sure output order stays consistent between systems.
*/
sorted_nodes = g_hash_table_get_keys(active_table);
sorted_nodes = g_list_sort(sorted_nodes, (GCompareFunc) pcmk__numeric_strcasecmp);
for (GList *gIter2 = sorted_nodes; gIter2; gIter2 = gIter2->next) {
char *node_name = (char *) gIter2->data;
GHashTable *node_table = g_hash_table_lookup(active_table, node_name);
int *active_counter = NULL;
if (node_table == NULL) {
continue;
}
active_counter = g_hash_table_lookup(node_table, type);
if (active_counter == NULL || *active_counter == 0) {
continue;
} else {
active_counter_all += *active_counter;
}
if (pcmk_is_set(show_opts, pcmk_show_rsc_only)) {
node_name = NULL;
}
if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
out->list_item(out, NULL, "%d/%d\t(%s):\tActive %s",
*active_counter,
rsc_counter ? *rsc_counter : 0, type,
(*active_counter > 0) && node_name ? node_name : "");
} else {
out->list_item(out, NULL, "%d\t(%s):\tActive %s",
*active_counter, type,
(*active_counter > 0) && node_name ? node_name : "");
}
rc = pcmk_rc_ok;
}
if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs) && active_counter_all == 0) {
out->list_item(out, NULL, "%d/%d\t(%s):\tActive",
active_counter_all,
rsc_counter ? *rsc_counter : 0, type);
rc = pcmk_rc_ok;
}
if (sorted_nodes) {
g_list_free(sorted_nodes);
}
}
if (rsc_table) {
g_hash_table_destroy(rsc_table);
rsc_table = NULL;
}
if (active_table) {
g_hash_table_destroy(active_table);
active_table = NULL;
}
if (sorted_rscs) {
g_list_free(sorted_rscs);
}
return rc;
}
gboolean
pe__native_is_filtered(const pcmk_resource_t *rsc, GList *only_rsc,
gboolean check_parent)
{
if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches)) {
return FALSE;
} else if (check_parent && (rsc->priv->parent != NULL)) {
const pcmk_resource_t *up = pe__const_top_resource(rsc, true);
return up->priv->fns->is_filtered(up, only_rsc, FALSE);
}
return TRUE;
}
/*!
* \internal
* \brief Get maximum primitive resource instances per node
*
* \param[in] rsc Primitive resource to check
*
* \return Maximum number of \p rsc instances that can be active on one node
*/
unsigned int
pe__primitive_max_per_node(const pcmk_resource_t *rsc)
{
pcmk__assert(pcmk__is_primitive(rsc));
return 1U;
}
diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c
index dc3080c205..323952eac5 100644
--- a/lib/pengine/pe_output.c
+++ b/lib/pengine/pe_output.c
@@ -1,3476 +1,3475 @@
/*
* Copyright 2019-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
const char *
pe__resource_description(const pcmk_resource_t *rsc, uint32_t show_opts)
{
const char * desc = NULL;
// User-supplied description
if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only|pcmk_show_description)) {
desc = crm_element_value(rsc->priv->xml, PCMK_XA_DESCRIPTION);
}
return desc;
}
/* Never display node attributes whose name starts with one of these prefixes */
#define FILTER_STR { PCMK__FAIL_COUNT_PREFIX, PCMK__LAST_FAILURE_PREFIX, \
PCMK__NODE_ATTR_SHUTDOWN, PCMK_NODE_ATTR_TERMINATE, \
PCMK_NODE_ATTR_STANDBY, "#", NULL }
static int
compare_attribute(gconstpointer a, gconstpointer b)
{
int rc;
rc = strcmp((const char *)a, (const char *)b);
return rc;
}
/*!
* \internal
* \brief Determine whether extended information about an attribute should be added.
*
* \param[in] node Node that ran this resource
* \param[in,out] rsc_list List of resources for this node
* \param[in,out] scheduler Scheduler data
* \param[in] attrname Attribute to find
* \param[out] expected_score Expected value for this attribute
*
* \return true if extended information should be printed, false otherwise
* \note Currently, extended information is only supported for ping/pingd
* resources, for which a message will be printed if connectivity is lost
* or degraded.
*/
static bool
add_extra_info(const pcmk_node_t *node, GList *rsc_list,
pcmk_scheduler_t *scheduler, const char *attrname,
int *expected_score)
{
GList *gIter = NULL;
for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
const char *type = g_hash_table_lookup(rsc->priv->meta,
PCMK_XA_TYPE);
const char *name = NULL;
GHashTable *params = NULL;
if (rsc->priv->children != NULL) {
if (add_extra_info(node, rsc->priv->children, scheduler,
attrname, expected_score)) {
return true;
}
}
if (!pcmk__strcase_any_of(type, "ping", "pingd", NULL)) {
continue;
}
params = pe_rsc_params(rsc, node, scheduler);
name = g_hash_table_lookup(params, PCMK_XA_NAME);
if (name == NULL) {
name = "pingd";
}
/* To identify the resource with the attribute name. */
if (pcmk__str_eq(name, attrname, pcmk__str_casei)) {
int host_list_num = 0;
const char *hosts = g_hash_table_lookup(params, "host_list");
const char *multiplier = g_hash_table_lookup(params, "multiplier");
int multiplier_i;
if (hosts) {
char **host_list = g_strsplit(hosts, " ", 0);
host_list_num = g_strv_length(host_list);
g_strfreev(host_list);
}
if ((multiplier == NULL)
|| (pcmk__scan_min_int(multiplier, &multiplier_i,
INT_MIN) != pcmk_rc_ok)) {
/* The ocf:pacemaker:ping resource agent defaults multiplier to
* 1. The agent currently does not handle invalid text, but it
* should, and this would be a reasonable choice ...
*/
multiplier_i = 1;
}
*expected_score = host_list_num * multiplier_i;
return true;
}
}
return false;
}
static GList *
filter_attr_list(GList *attr_list, char *name)
{
int i;
const char *filt_str[] = FILTER_STR;
CRM_CHECK(name != NULL, return attr_list);
/* filtering automatic attributes */
for (i = 0; filt_str[i] != NULL; i++) {
if (g_str_has_prefix(name, filt_str[i])) {
return attr_list;
}
}
return g_list_insert_sorted(attr_list, name, compare_attribute);
}
static GList *
get_operation_list(xmlNode *rsc_entry) {
GList *op_list = NULL;
xmlNode *rsc_op = NULL;
for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL);
rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
const char *task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
const char *interval_ms_s = crm_element_value(rsc_op,
PCMK_META_INTERVAL);
const char *op_rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
int op_rc_i;
pcmk__scan_min_int(op_rc, &op_rc_i, 0);
/* Display 0-interval monitors as "probe" */
if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)
&& pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) {
task = "probe";
}
/* Ignore notifies and some probes */
if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)
|| (pcmk__str_eq(task, "probe", pcmk__str_none)
&& (op_rc_i == CRM_EX_NOT_RUNNING))) {
continue;
}
if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) {
op_list = g_list_append(op_list, rsc_op);
}
}
op_list = g_list_sort(op_list, sort_op_by_callid);
return op_list;
}
static void
add_dump_node(gpointer key, gpointer value, gpointer user_data)
{
xmlNodePtr node = user_data;
node = pcmk__xe_create(node, (const char *) key);
pcmk__xe_set_content(node, "%s", (const char *) value);
}
static void
append_dump_text(gpointer key, gpointer value, gpointer user_data)
{
char **dump_text = user_data;
char *new_text = crm_strdup_printf("%s %s=%s",
*dump_text, (char *)key, (char *)value);
free(*dump_text);
*dump_text = new_text;
}
#define XPATH_STACK "//" PCMK_XE_NVPAIR \
"[@" PCMK_XA_NAME "='" \
PCMK_OPT_CLUSTER_INFRASTRUCTURE "']"
static const char *
get_cluster_stack(pcmk_scheduler_t *scheduler)
{
xmlNode *stack = get_xpath_object(XPATH_STACK, scheduler->input, LOG_DEBUG);
if (stack != NULL) {
return crm_element_value(stack, PCMK_XA_VALUE);
}
return PCMK_VALUE_UNKNOWN;
}
static char *
last_changed_string(const char *last_written, const char *user,
const char *client, const char *origin) {
if (last_written != NULL || user != NULL || client != NULL || origin != NULL) {
return crm_strdup_printf("%s%s%s%s%s%s%s",
last_written ? last_written : "",
user ? " by " : "",
user ? user : "",
client ? " via " : "",
client ? client : "",
origin ? " on " : "",
origin ? origin : "");
} else {
return strdup("");
}
}
static char *
op_history_string(xmlNode *xml_op, const char *task, const char *interval_ms_s,
int rc, bool print_timing) {
const char *call = crm_element_value(xml_op, PCMK__XA_CALL_ID);
char *interval_str = NULL;
char *buf = NULL;
if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) {
char *pair = pcmk__format_nvpair(PCMK_XA_INTERVAL, interval_ms_s, "ms");
interval_str = crm_strdup_printf(" %s", pair);
free(pair);
}
if (print_timing) {
char *last_change_str = NULL;
char *exec_str = NULL;
char *queue_str = NULL;
const char *value = NULL;
time_t epoch = 0;
if ((crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
&epoch) == pcmk_ok)
&& (epoch > 0)) {
char *epoch_str = pcmk__epoch2str(&epoch, 0);
last_change_str = crm_strdup_printf(" %s=\"%s\"",
PCMK_XA_LAST_RC_CHANGE,
pcmk__s(epoch_str, ""));
free(epoch_str);
}
value = crm_element_value(xml_op, PCMK_XA_EXEC_TIME);
if (value) {
char *pair = pcmk__format_nvpair(PCMK_XA_EXEC_TIME, value, "ms");
exec_str = crm_strdup_printf(" %s", pair);
free(pair);
}
value = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME);
if (value) {
char *pair = pcmk__format_nvpair(PCMK_XA_QUEUE_TIME, value, "ms");
queue_str = crm_strdup_printf(" %s", pair);
free(pair);
}
buf = crm_strdup_printf("(%s) %s:%s%s%s%s rc=%d (%s)", call, task,
interval_str ? interval_str : "",
last_change_str ? last_change_str : "",
exec_str ? exec_str : "",
queue_str ? queue_str : "",
- rc, services_ocf_exitcode_str(rc));
+ rc, crm_exit_str(rc));
if (last_change_str) {
free(last_change_str);
}
if (exec_str) {
free(exec_str);
}
if (queue_str) {
free(queue_str);
}
} else {
buf = crm_strdup_printf("(%s) %s%s%s", call, task,
interval_str ? ":" : "",
interval_str ? interval_str : "");
}
if (interval_str) {
free(interval_str);
}
return buf;
}
static char *
resource_history_string(pcmk_resource_t *rsc, const char *rsc_id, bool all,
int failcount, time_t last_failure) {
char *buf = NULL;
if (rsc == NULL) {
buf = crm_strdup_printf("%s: orphan", rsc_id);
} else if (all || failcount || last_failure > 0) {
char *failcount_s = NULL;
char *lastfail_s = NULL;
if (failcount > 0) {
failcount_s = crm_strdup_printf(" %s=%d",
PCMK_XA_FAIL_COUNT, failcount);
} else {
failcount_s = strdup("");
}
if (last_failure > 0) {
buf = pcmk__epoch2str(&last_failure, 0);
lastfail_s = crm_strdup_printf(" %s='%s'",
PCMK_XA_LAST_FAILURE, buf);
free(buf);
}
buf = crm_strdup_printf("%s: " PCMK_META_MIGRATION_THRESHOLD "=%d%s%s",
rsc_id, rsc->priv->ban_after_failures,
failcount_s, pcmk__s(lastfail_s, ""));
free(failcount_s);
free(lastfail_s);
} else {
buf = crm_strdup_printf("%s:", rsc_id);
}
return buf;
}
/*!
* \internal
* \brief Get a node's feature set for status display purposes
*
* \param[in] node Node to check
*
* \return String representation of feature set if the node is fully up (using
* "<3.15.1" for older nodes that don't set the #feature-set attribute),
* otherwise NULL
*/
static const char *
get_node_feature_set(const pcmk_node_t *node)
{
if (node->details->online
&& pcmk_is_set(node->priv->flags, pcmk__node_expected_up)
&& !pcmk__is_pacemaker_remote_node(node)) {
const char *feature_set = g_hash_table_lookup(node->priv->attrs,
CRM_ATTR_FEATURE_SET);
/* The feature set attribute is present since 3.15.1. If it is missing,
* then the node must be running an earlier version.
*/
return pcmk__s(feature_set, "<3.15.1");
}
return NULL;
}
static bool
is_mixed_version(pcmk_scheduler_t *scheduler)
{
const char *feature_set = NULL;
for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *node = gIter->data;
const char *node_feature_set = get_node_feature_set(node);
if (node_feature_set != NULL) {
if (feature_set == NULL) {
feature_set = node_feature_set;
} else if (strcmp(feature_set, node_feature_set) != 0) {
return true;
}
}
}
return false;
}
static void
formatted_xml_buf(const pcmk_resource_t *rsc, GString *xml_buf, bool raw)
{
if (raw && (rsc->priv->orig_xml != NULL)) {
pcmk__xml_string(rsc->priv->orig_xml, pcmk__xml_fmt_pretty, xml_buf,
0);
} else {
pcmk__xml_string(rsc->priv->xml, pcmk__xml_fmt_pretty, xml_buf, 0);
}
}
#define XPATH_DC_VERSION "//" PCMK_XE_NVPAIR \
"[@" PCMK_XA_NAME "='" PCMK_OPT_DC_VERSION "']"
PCMK__OUTPUT_ARGS("cluster-summary", "pcmk_scheduler_t *",
"enum pcmk_pacemakerd_state", "uint32_t", "uint32_t")
static int
cluster_summary(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
enum pcmk_pacemakerd_state pcmkd_state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
int rc = pcmk_rc_no_output;
const char *stack_s = get_cluster_stack(scheduler);
if (pcmk_is_set(section_opts, pcmk_section_stack)) {
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary");
out->message(out, "cluster-stack", stack_s, pcmkd_state);
}
if (pcmk_is_set(section_opts, pcmk_section_dc)) {
xmlNode *dc_version = get_xpath_object(XPATH_DC_VERSION,
scheduler->input, LOG_DEBUG);
const char *dc_version_s = dc_version?
crm_element_value(dc_version, PCMK_XA_VALUE)
: NULL;
const char *quorum = crm_element_value(scheduler->input,
PCMK_XA_HAVE_QUORUM);
char *dc_name = scheduler->dc_node? pe__node_display_name(scheduler->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL;
bool mixed_version = is_mixed_version(scheduler);
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary");
out->message(out, "cluster-dc", scheduler->dc_node, quorum,
dc_version_s, dc_name, mixed_version);
free(dc_name);
}
if (pcmk_is_set(section_opts, pcmk_section_times)) {
const char *last_written = crm_element_value(scheduler->input,
PCMK_XA_CIB_LAST_WRITTEN);
const char *user = crm_element_value(scheduler->input,
PCMK_XA_UPDATE_USER);
const char *client = crm_element_value(scheduler->input,
PCMK_XA_UPDATE_CLIENT);
const char *origin = crm_element_value(scheduler->input,
PCMK_XA_UPDATE_ORIGIN);
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary");
out->message(out, "cluster-times", scheduler->priv->local_node_name,
last_written, user, client, origin);
}
if (pcmk_is_set(section_opts, pcmk_section_counts)) {
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary");
out->message(out, "cluster-counts", g_list_length(scheduler->nodes),
scheduler->priv->ninstances,
scheduler->priv->disabled_resources,
scheduler->priv->blocked_resources);
}
if (pcmk_is_set(section_opts, pcmk_section_options)) {
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary");
out->message(out, "cluster-options", scheduler);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) {
if (out->message(out, "maint-mode", scheduler->flags) == pcmk_rc_ok) {
rc = pcmk_rc_ok;
}
}
return rc;
}
PCMK__OUTPUT_ARGS("cluster-summary", "pcmk_scheduler_t *",
"enum pcmk_pacemakerd_state", "uint32_t", "uint32_t")
static int
cluster_summary_html(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
enum pcmk_pacemakerd_state pcmkd_state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
int rc = pcmk_rc_no_output;
const char *stack_s = get_cluster_stack(scheduler);
if (pcmk_is_set(section_opts, pcmk_section_stack)) {
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary");
out->message(out, "cluster-stack", stack_s, pcmkd_state);
}
/* Always print DC if none, even if not requested */
if ((scheduler->dc_node == NULL)
|| pcmk_is_set(section_opts, pcmk_section_dc)) {
xmlNode *dc_version = get_xpath_object(XPATH_DC_VERSION,
scheduler->input, LOG_DEBUG);
const char *dc_version_s = dc_version?
crm_element_value(dc_version, PCMK_XA_VALUE)
: NULL;
const char *quorum = crm_element_value(scheduler->input,
PCMK_XA_HAVE_QUORUM);
char *dc_name = scheduler->dc_node? pe__node_display_name(scheduler->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL;
bool mixed_version = is_mixed_version(scheduler);
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary");
out->message(out, "cluster-dc", scheduler->dc_node, quorum,
dc_version_s, dc_name, mixed_version);
free(dc_name);
}
if (pcmk_is_set(section_opts, pcmk_section_times)) {
const char *last_written = crm_element_value(scheduler->input,
PCMK_XA_CIB_LAST_WRITTEN);
const char *user = crm_element_value(scheduler->input,
PCMK_XA_UPDATE_USER);
const char *client = crm_element_value(scheduler->input,
PCMK_XA_UPDATE_CLIENT);
const char *origin = crm_element_value(scheduler->input,
PCMK_XA_UPDATE_ORIGIN);
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary");
out->message(out, "cluster-times", scheduler->priv->local_node_name,
last_written, user, client, origin);
}
if (pcmk_is_set(section_opts, pcmk_section_counts)) {
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary");
out->message(out, "cluster-counts", g_list_length(scheduler->nodes),
scheduler->priv->ninstances,
scheduler->priv->disabled_resources,
scheduler->priv->blocked_resources);
}
if (pcmk_is_set(section_opts, pcmk_section_options)) {
/* Kind of a hack - close the list we may have opened earlier in this
* function so we can put all the options into their own list. We
* only want to do this on HTML output, though.
*/
PCMK__OUTPUT_LIST_FOOTER(out, rc);
out->begin_list(out, NULL, NULL, "Config Options");
out->message(out, "cluster-options", scheduler);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) {
if (out->message(out, "maint-mode", scheduler->flags) == pcmk_rc_ok) {
rc = pcmk_rc_ok;
}
}
return rc;
}
char *
pe__node_display_name(pcmk_node_t *node, bool print_detail)
{
char *node_name;
const char *node_host = NULL;
const char *node_id = NULL;
int name_len;
pcmk__assert((node != NULL) && (node->priv->name != NULL));
/* Host is displayed only if this is a guest node and detail is requested */
if (print_detail && pcmk__is_guest_or_bundle_node(node)) {
const pcmk_resource_t *launcher = NULL;
const pcmk_node_t *host_node = NULL;
launcher = node->priv->remote->priv->launcher;
host_node = pcmk__current_node(launcher);
if (host_node && host_node->details) {
node_host = host_node->priv->name;
}
if (node_host == NULL) {
node_host = ""; /* so we at least get "uname@" to indicate guest */
}
}
/* Node ID is displayed if different from uname and detail is requested */
if (print_detail
&& !pcmk__str_eq(node->priv->name, node->priv->id,
pcmk__str_casei)) {
node_id = node->priv->id;
}
/* Determine name length */
name_len = strlen(node->priv->name) + 1;
if (node_host) {
name_len += strlen(node_host) + 1; /* "@node_host" */
}
if (node_id) {
name_len += strlen(node_id) + 3; /* + " (node_id)" */
}
/* Allocate and populate display name */
node_name = pcmk__assert_alloc(name_len, sizeof(char));
strcpy(node_name, node->priv->name);
if (node_host) {
strcat(node_name, "@");
strcat(node_name, node_host);
}
if (node_id) {
strcat(node_name, " (");
strcat(node_name, node_id);
strcat(node_name, ")");
}
return node_name;
}
int
pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name,
...)
{
xmlNodePtr xml_node = NULL;
va_list pairs;
pcmk__assert(tag_name != NULL);
xml_node = pcmk__output_xml_peek_parent(out);
pcmk__assert(xml_node != NULL);
xml_node = pcmk__xe_create(xml_node, tag_name);
va_start(pairs, tag_name);
pcmk__xe_set_propv(xml_node, pairs);
va_end(pairs);
if (is_list) {
pcmk__output_xml_push_parent(out, xml_node);
}
return pcmk_rc_ok;
}
static const char *
role_desc(enum rsc_role_e role)
{
if (role == pcmk_role_promoted) {
return "in " PCMK_ROLE_PROMOTED " role ";
}
return "";
}
PCMK__OUTPUT_ARGS("ban", "pcmk_node_t *", "pcmk__location_t *", "uint32_t")
static int
ban_html(pcmk__output_t *out, va_list args) {
pcmk_node_t *pe_node = va_arg(args, pcmk_node_t *);
pcmk__location_t *location = va_arg(args, pcmk__location_t *);
uint32_t show_opts = va_arg(args, uint32_t);
char *node_name = pe__node_display_name(pe_node,
pcmk_is_set(show_opts, pcmk_show_node_id));
char *buf = crm_strdup_printf("%s\tprevents %s from running %son %s",
location->id, location->rsc->id,
role_desc(location->role_filter), node_name);
pcmk__output_create_html_node(out, "li", NULL, NULL, buf);
free(node_name);
free(buf);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("ban", "pcmk_node_t *", "pcmk__location_t *", "uint32_t")
static int
ban_text(pcmk__output_t *out, va_list args) {
pcmk_node_t *pe_node = va_arg(args, pcmk_node_t *);
pcmk__location_t *location = va_arg(args, pcmk__location_t *);
uint32_t show_opts = va_arg(args, uint32_t);
char *node_name = pe__node_display_name(pe_node,
pcmk_is_set(show_opts, pcmk_show_node_id));
out->list_item(out, NULL, "%s\tprevents %s from running %son %s",
location->id, location->rsc->id,
role_desc(location->role_filter), node_name);
free(node_name);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("ban", "pcmk_node_t *", "pcmk__location_t *", "uint32_t")
static int
ban_xml(pcmk__output_t *out, va_list args) {
pcmk_node_t *pe_node = va_arg(args, pcmk_node_t *);
pcmk__location_t *location = va_arg(args, pcmk__location_t *);
uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t);
const char *promoted_only = pcmk__btoa(location->role_filter == pcmk_role_promoted);
char *weight_s = pcmk__itoa(pe_node->assign->score);
pcmk__output_create_xml_node(out, PCMK_XE_BAN,
PCMK_XA_ID, location->id,
PCMK_XA_RESOURCE, location->rsc->id,
PCMK_XA_NODE, pe_node->priv->name,
PCMK_XA_WEIGHT, weight_s,
PCMK_XA_PROMOTED_ONLY, promoted_only,
/* This is a deprecated alias for
* promoted_only. Removing it will break
* backward compatibility of the API schema,
* which will require an API schema major
* version bump.
*/
PCMK__XA_PROMOTED_ONLY_LEGACY, promoted_only,
NULL);
free(weight_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("ban-list", "pcmk_scheduler_t *", "const char *", "GList *",
"uint32_t", "bool")
static int
ban_list(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
const char *prefix = va_arg(args, const char *);
GList *only_rsc = va_arg(args, GList *);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer = va_arg(args, int);
GList *gIter, *gIter2;
int rc = pcmk_rc_no_output;
/* Print each ban */
for (gIter = scheduler->priv->location_constraints;
gIter != NULL; gIter = gIter->next) {
pcmk__location_t *location = gIter->data;
const pcmk_resource_t *rsc = location->rsc;
if (prefix != NULL && !g_str_has_prefix(location->id, prefix)) {
continue;
}
if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc,
pcmk__str_star_matches)
&& !pcmk__str_in_list(rsc_printable_id(pe__const_top_resource(rsc, false)),
only_rsc, pcmk__str_star_matches)) {
continue;
}
for (gIter2 = location->nodes; gIter2 != NULL; gIter2 = gIter2->next) {
pcmk_node_t *node = (pcmk_node_t *) gIter2->data;
if (node->assign->score < 0) {
PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Negative Location Constraints");
out->message(out, "ban", node, location, show_opts);
}
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int")
static int
cluster_counts_html(pcmk__output_t *out, va_list args) {
unsigned int nnodes = va_arg(args, unsigned int);
int nresources = va_arg(args, int);
int ndisabled = va_arg(args, int);
int nblocked = va_arg(args, int);
xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "li", NULL);
xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "li", NULL);
xmlNode *child = NULL;
child = pcmk__html_create(nodes_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%d node%s configured",
nnodes, pcmk__plural_s(nnodes));
if (ndisabled && nblocked) {
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%d resource instance%s configured (%d ",
nresources, pcmk__plural_s(nresources), ndisabled);
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "DISABLED");
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, ", %d ", nblocked);
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "BLOCKED");
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, " from further action due to failure)");
} else if (ndisabled && !nblocked) {
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%d resource instance%s configured (%d ",
nresources, pcmk__plural_s(nresources),
ndisabled);
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "DISABLED");
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, ")");
} else if (!ndisabled && nblocked) {
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%d resource instance%s configured (%d ",
nresources, pcmk__plural_s(nresources),
nblocked);
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "BLOCKED");
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, " from further action due to failure)");
} else {
child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%d resource instance%s configured",
nresources, pcmk__plural_s(nresources));
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int")
static int
cluster_counts_text(pcmk__output_t *out, va_list args) {
unsigned int nnodes = va_arg(args, unsigned int);
int nresources = va_arg(args, int);
int ndisabled = va_arg(args, int);
int nblocked = va_arg(args, int);
out->list_item(out, NULL, "%d node%s configured",
nnodes, pcmk__plural_s(nnodes));
if (ndisabled && nblocked) {
out->list_item(out, NULL, "%d resource instance%s configured "
"(%d DISABLED, %d BLOCKED from "
"further action due to failure)",
nresources, pcmk__plural_s(nresources), ndisabled,
nblocked);
} else if (ndisabled && !nblocked) {
out->list_item(out, NULL, "%d resource instance%s configured "
"(%d DISABLED)",
nresources, pcmk__plural_s(nresources), ndisabled);
} else if (!ndisabled && nblocked) {
out->list_item(out, NULL, "%d resource instance%s configured "
"(%d BLOCKED from further action "
"due to failure)",
nresources, pcmk__plural_s(nresources), nblocked);
} else {
out->list_item(out, NULL, "%d resource instance%s configured",
nresources, pcmk__plural_s(nresources));
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int")
static int
cluster_counts_xml(pcmk__output_t *out, va_list args) {
unsigned int nnodes = va_arg(args, unsigned int);
int nresources = va_arg(args, int);
int ndisabled = va_arg(args, int);
int nblocked = va_arg(args, int);
xmlNodePtr nodes_node = NULL;
xmlNodePtr resources_node = NULL;
char *s = NULL;
nodes_node = pcmk__output_create_xml_node(out, PCMK_XE_NODES_CONFIGURED,
NULL);
resources_node = pcmk__output_create_xml_node(out,
PCMK_XE_RESOURCES_CONFIGURED,
NULL);
s = pcmk__itoa(nnodes);
crm_xml_add(nodes_node, PCMK_XA_NUMBER, s);
free(s);
s = pcmk__itoa(nresources);
crm_xml_add(resources_node, PCMK_XA_NUMBER, s);
free(s);
s = pcmk__itoa(ndisabled);
crm_xml_add(resources_node, PCMK_XA_DISABLED, s);
free(s);
s = pcmk__itoa(nblocked);
crm_xml_add(resources_node, PCMK_XA_BLOCKED, s);
free(s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-dc", "pcmk_node_t *", "const char *", "const char *",
"char *", "int")
static int
cluster_dc_html(pcmk__output_t *out, va_list args) {
pcmk_node_t *dc = va_arg(args, pcmk_node_t *);
const char *quorum = va_arg(args, const char *);
const char *dc_version_s = va_arg(args, const char *);
char *dc_name = va_arg(args, char *);
bool mixed_version = va_arg(args, int);
xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL);
xmlNode *child = NULL;
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "Current DC: ");
if (dc) {
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%s (version %s) -",
dc_name, pcmk__s(dc_version_s, "unknown"));
if (mixed_version) {
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_WARNING);
pcmk__xe_set_content(child, " MIXED-VERSION");
}
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, " partition");
if (crm_is_true(quorum)) {
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, " with");
} else {
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_WARNING);
pcmk__xe_set_content(child, " WITHOUT");
}
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, " quorum");
} else {
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_WARNING);
pcmk__xe_set_content(child, "NONE");
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-dc", "pcmk_node_t *", "const char *", "const char *",
"char *", "int")
static int
cluster_dc_text(pcmk__output_t *out, va_list args) {
pcmk_node_t *dc = va_arg(args, pcmk_node_t *);
const char *quorum = va_arg(args, const char *);
const char *dc_version_s = va_arg(args, const char *);
char *dc_name = va_arg(args, char *);
bool mixed_version = va_arg(args, int);
if (dc) {
out->list_item(out, "Current DC",
"%s (version %s) - %spartition %s quorum",
dc_name, dc_version_s ? dc_version_s : "unknown",
mixed_version ? "MIXED-VERSION " : "",
crm_is_true(quorum) ? "with" : "WITHOUT");
} else {
out->list_item(out, "Current DC", "NONE");
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-dc", "pcmk_node_t *", "const char *", "const char *",
"char *", "int")
static int
cluster_dc_xml(pcmk__output_t *out, va_list args) {
pcmk_node_t *dc = va_arg(args, pcmk_node_t *);
const char *quorum = va_arg(args, const char *);
const char *dc_version_s = va_arg(args, const char *);
char *dc_name G_GNUC_UNUSED = va_arg(args, char *);
bool mixed_version = va_arg(args, int);
if (dc) {
const char *with_quorum = pcmk__btoa(crm_is_true(quorum));
const char *mixed_version_s = pcmk__btoa(mixed_version);
pcmk__output_create_xml_node(out, PCMK_XE_CURRENT_DC,
PCMK_XA_PRESENT, PCMK_VALUE_TRUE,
PCMK_XA_VERSION, pcmk__s(dc_version_s, ""),
PCMK_XA_NAME, dc->priv->name,
PCMK_XA_ID, dc->priv->id,
PCMK_XA_WITH_QUORUM, with_quorum,
PCMK_XA_MIXED_VERSION, mixed_version_s,
NULL);
} else {
pcmk__output_create_xml_node(out, PCMK_XE_CURRENT_DC,
PCMK_XA_PRESENT, PCMK_VALUE_FALSE,
NULL);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("maint-mode", "uint64_t")
static int
cluster_maint_mode_text(pcmk__output_t *out, va_list args) {
uint64_t flags = va_arg(args, uint64_t);
if (pcmk_is_set(flags, pcmk__sched_in_maintenance)) {
pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n");
pcmk__formatted_printf(out, " The cluster will not attempt to start, stop or recover services\n");
return pcmk_rc_ok;
} else if (pcmk_is_set(flags, pcmk__sched_stop_all)) {
pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n");
pcmk__formatted_printf(out, " The cluster will keep all resources stopped\n");
return pcmk_rc_ok;
} else {
return pcmk_rc_no_output;
}
}
PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *")
static int
cluster_options_html(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
out->list_item(out, NULL, "STONITH of failed nodes enabled");
} else {
out->list_item(out, NULL, "STONITH of failed nodes disabled");
}
if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) {
out->list_item(out, NULL, "Cluster is symmetric");
} else {
out->list_item(out, NULL, "Cluster is asymmetric");
}
switch (scheduler->no_quorum_policy) {
case pcmk_no_quorum_freeze:
out->list_item(out, NULL, "No quorum policy: Freeze resources");
break;
case pcmk_no_quorum_stop:
out->list_item(out, NULL, "No quorum policy: Stop ALL resources");
break;
case pcmk_no_quorum_demote:
out->list_item(out, NULL, "No quorum policy: Demote promotable "
"resources and stop all other resources");
break;
case pcmk_no_quorum_ignore:
out->list_item(out, NULL, "No quorum policy: Ignore");
break;
case pcmk_no_quorum_fence:
out->list_item(out, NULL,
"No quorum policy: Fence nodes in partition");
break;
}
if (pcmk_is_set(scheduler->flags, pcmk__sched_in_maintenance)) {
xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL);
xmlNode *child = NULL;
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "Resource management: ");
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "DISABLED");
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child,
" (the cluster will not attempt to start, stop,"
" or recover services)");
} else if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_all)) {
xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL);
xmlNode *child = NULL;
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "Resource management: ");
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "STOPPED");
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child,
" (the cluster will keep all resources stopped)");
} else {
out->list_item(out, NULL, "Resource management: enabled");
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *")
static int
cluster_options_log(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
if (pcmk_is_set(scheduler->flags, pcmk__sched_in_maintenance)) {
return out->info(out, "Resource management is DISABLED. The cluster will not attempt to start, stop or recover services.");
} else if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_all)) {
return out->info(out, "Resource management is DISABLED. The cluster has stopped all resources.");
} else {
return pcmk_rc_no_output;
}
}
PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *")
static int
cluster_options_text(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
out->list_item(out, NULL, "STONITH of failed nodes enabled");
} else {
out->list_item(out, NULL, "STONITH of failed nodes disabled");
}
if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) {
out->list_item(out, NULL, "Cluster is symmetric");
} else {
out->list_item(out, NULL, "Cluster is asymmetric");
}
switch (scheduler->no_quorum_policy) {
case pcmk_no_quorum_freeze:
out->list_item(out, NULL, "No quorum policy: Freeze resources");
break;
case pcmk_no_quorum_stop:
out->list_item(out, NULL, "No quorum policy: Stop ALL resources");
break;
case pcmk_no_quorum_demote:
out->list_item(out, NULL, "No quorum policy: Demote promotable "
"resources and stop all other resources");
break;
case pcmk_no_quorum_ignore:
out->list_item(out, NULL, "No quorum policy: Ignore");
break;
case pcmk_no_quorum_fence:
out->list_item(out, NULL,
"No quorum policy: Fence nodes in partition");
break;
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Get readable string representation of a no-quorum policy
*
* \param[in] policy No-quorum policy
*
* \return String representation of \p policy
*/
static const char *
no_quorum_policy_text(enum pe_quorum_policy policy)
{
switch (policy) {
case pcmk_no_quorum_freeze:
return PCMK_VALUE_FREEZE;
case pcmk_no_quorum_stop:
return PCMK_VALUE_STOP;
case pcmk_no_quorum_demote:
return PCMK_VALUE_DEMOTE;
case pcmk_no_quorum_ignore:
return PCMK_VALUE_IGNORE;
case pcmk_no_quorum_fence:
return PCMK_VALUE_FENCE;
default:
return PCMK_VALUE_UNKNOWN;
}
}
PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *")
static int
cluster_options_xml(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
const char *stonith_enabled = pcmk__flag_text(scheduler->flags,
pcmk__sched_fencing_enabled);
const char *symmetric_cluster =
pcmk__flag_text(scheduler->flags, pcmk__sched_symmetric_cluster);
const char *no_quorum_policy =
no_quorum_policy_text(scheduler->no_quorum_policy);
const char *maintenance_mode = pcmk__flag_text(scheduler->flags,
pcmk__sched_in_maintenance);
const char *stop_all_resources = pcmk__flag_text(scheduler->flags,
pcmk__sched_stop_all);
char *stonith_timeout_ms_s =
crm_strdup_printf("%u", scheduler->priv->fence_timeout_ms);
char *priority_fencing_delay_ms_s =
crm_strdup_printf("%u", scheduler->priv->priority_fencing_ms);
pcmk__output_create_xml_node(out, PCMK_XE_CLUSTER_OPTIONS,
PCMK_XA_STONITH_ENABLED, stonith_enabled,
PCMK_XA_SYMMETRIC_CLUSTER, symmetric_cluster,
PCMK_XA_NO_QUORUM_POLICY, no_quorum_policy,
PCMK_XA_MAINTENANCE_MODE, maintenance_mode,
PCMK_XA_STOP_ALL_RESOURCES, stop_all_resources,
PCMK_XA_STONITH_TIMEOUT_MS,
stonith_timeout_ms_s,
PCMK_XA_PRIORITY_FENCING_DELAY_MS,
priority_fencing_delay_ms_s,
NULL);
free(stonith_timeout_ms_s);
free(priority_fencing_delay_ms_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state")
static int
cluster_stack_html(pcmk__output_t *out, va_list args) {
const char *stack_s = va_arg(args, const char *);
enum pcmk_pacemakerd_state pcmkd_state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL);
xmlNode *child = NULL;
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "Stack: ");
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%s", stack_s);
if (pcmkd_state != pcmk_pacemakerd_state_invalid) {
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, " (");
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%s",
pcmk__pcmkd_state_enum2friendly(pcmkd_state));
child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, ")");
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state")
static int
cluster_stack_text(pcmk__output_t *out, va_list args) {
const char *stack_s = va_arg(args, const char *);
enum pcmk_pacemakerd_state pcmkd_state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
if (pcmkd_state != pcmk_pacemakerd_state_invalid) {
out->list_item(out, "Stack", "%s (%s)",
stack_s, pcmk__pcmkd_state_enum2friendly(pcmkd_state));
} else {
out->list_item(out, "Stack", "%s", stack_s);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state")
static int
cluster_stack_xml(pcmk__output_t *out, va_list args) {
const char *stack_s = va_arg(args, const char *);
enum pcmk_pacemakerd_state pcmkd_state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = NULL;
if (pcmkd_state != pcmk_pacemakerd_state_invalid) {
state_s = pcmk_pacemakerd_api_daemon_state_enum2text(pcmkd_state);
}
pcmk__output_create_xml_node(out, PCMK_XE_STACK,
PCMK_XA_TYPE, stack_s,
PCMK_XA_PACEMAKERD_STATE, state_s,
NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *",
"const char *", "const char *", "const char *")
static int
cluster_times_html(pcmk__output_t *out, va_list args) {
const char *our_nodename = va_arg(args, const char *);
const char *last_written = va_arg(args, const char *);
const char *user = va_arg(args, const char *);
const char *client = va_arg(args, const char *);
const char *origin = va_arg(args, const char *);
xmlNodePtr updated_node = pcmk__output_create_xml_node(out, "li", NULL);
xmlNodePtr changed_node = pcmk__output_create_xml_node(out, "li", NULL);
xmlNode *child = NULL;
char *time_s = NULL;
child = pcmk__html_create(updated_node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "Last updated: ");
child = pcmk__html_create(updated_node, PCMK__XE_SPAN, NULL, NULL);
time_s = pcmk__epoch2str(NULL, 0);
pcmk__xe_set_content(child, "%s", time_s);
free(time_s);
if (our_nodename != NULL) {
child = pcmk__html_create(updated_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, " on ");
child = pcmk__html_create(updated_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%s", our_nodename);
}
child = pcmk__html_create(changed_node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "Last change: ");
child = pcmk__html_create(changed_node, PCMK__XE_SPAN, NULL, NULL);
time_s = last_changed_string(last_written, user, client, origin);
pcmk__xe_set_content(child, "%s", time_s);
free(time_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *",
"const char *", "const char *", "const char *")
static int
cluster_times_xml(pcmk__output_t *out, va_list args) {
const char *our_nodename = va_arg(args, const char *);
const char *last_written = va_arg(args, const char *);
const char *user = va_arg(args, const char *);
const char *client = va_arg(args, const char *);
const char *origin = va_arg(args, const char *);
char *time_s = pcmk__epoch2str(NULL, 0);
pcmk__output_create_xml_node(out, PCMK_XE_LAST_UPDATE,
PCMK_XA_TIME, time_s,
PCMK_XA_ORIGIN, our_nodename,
NULL);
pcmk__output_create_xml_node(out, PCMK_XE_LAST_CHANGE,
PCMK_XA_TIME, pcmk__s(last_written, ""),
PCMK_XA_USER, pcmk__s(user, ""),
PCMK_XA_CLIENT, pcmk__s(client, ""),
PCMK_XA_ORIGIN, pcmk__s(origin, ""),
NULL);
free(time_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *",
"const char *", "const char *", "const char *")
static int
cluster_times_text(pcmk__output_t *out, va_list args) {
const char *our_nodename = va_arg(args, const char *);
const char *last_written = va_arg(args, const char *);
const char *user = va_arg(args, const char *);
const char *client = va_arg(args, const char *);
const char *origin = va_arg(args, const char *);
char *time_s = pcmk__epoch2str(NULL, 0);
out->list_item(out, "Last updated", "%s%s%s",
time_s, (our_nodename != NULL)? " on " : "",
pcmk__s(our_nodename, ""));
free(time_s);
time_s = last_changed_string(last_written, user, client, origin);
out->list_item(out, "Last change", " %s", time_s);
free(time_s);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Display a failed action in less-technical natural language
*
* \param[in,out] out Output object to use for display
* \param[in] xml_op XML containing failed action
* \param[in] op_key Operation key of failed action
* \param[in] node_name Where failed action occurred
* \param[in] rc OCF exit code of failed action
* \param[in] status Execution status of failed action
* \param[in] exit_reason Exit reason given for failed action
* \param[in] exec_time String containing execution time in milliseconds
*/
static void
failed_action_friendly(pcmk__output_t *out, const xmlNode *xml_op,
const char *op_key, const char *node_name, int rc,
int status, const char *exit_reason,
const char *exec_time)
{
char *rsc_id = NULL;
char *task = NULL;
guint interval_ms = 0;
time_t last_change_epoch = 0;
GString *str = NULL;
if (pcmk__str_empty(op_key)
|| !parse_op_key(op_key, &rsc_id, &task, &interval_ms)) {
pcmk__str_update(&rsc_id, "unknown resource");
pcmk__str_update(&task, "unknown action");
interval_ms = 0;
}
pcmk__assert((rsc_id != NULL) && (task != NULL));
str = g_string_sized_new(256); // Should be sufficient for most messages
pcmk__g_strcat(str, rsc_id, " ", NULL);
if (interval_ms != 0) {
pcmk__g_strcat(str, pcmk__readable_interval(interval_ms), "-interval ",
NULL);
}
pcmk__g_strcat(str, pcmk__readable_action(task, interval_ms), " on ",
node_name, NULL);
if (status == PCMK_EXEC_DONE) {
- pcmk__g_strcat(str, " returned '", services_ocf_exitcode_str(rc), "'",
- NULL);
+ pcmk__g_strcat(str, " returned '", crm_exit_str(rc), "'", NULL);
if (!pcmk__str_empty(exit_reason)) {
pcmk__g_strcat(str, " (", exit_reason, ")", NULL);
}
} else {
pcmk__g_strcat(str, " could not be executed (",
pcmk_exec_status_str(status), NULL);
if (!pcmk__str_empty(exit_reason)) {
pcmk__g_strcat(str, ": ", exit_reason, NULL);
}
g_string_append_c(str, ')');
}
if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
&last_change_epoch) == pcmk_ok) {
char *s = pcmk__epoch2str(&last_change_epoch, 0);
pcmk__g_strcat(str, " at ", s, NULL);
free(s);
}
if (!pcmk__str_empty(exec_time)) {
int exec_time_ms = 0;
if ((pcmk__scan_min_int(exec_time, &exec_time_ms, 0) == pcmk_rc_ok)
&& (exec_time_ms > 0)) {
pcmk__g_strcat(str, " after ",
pcmk__readable_interval(exec_time_ms), NULL);
}
}
out->list_item(out, NULL, "%s", str->str);
g_string_free(str, TRUE);
free(rsc_id);
free(task);
}
/*!
* \internal
* \brief Display a failed action with technical details
*
* \param[in,out] out Output object to use for display
* \param[in] xml_op XML containing failed action
* \param[in] op_key Operation key of failed action
* \param[in] node_name Where failed action occurred
* \param[in] rc OCF exit code of failed action
* \param[in] status Execution status of failed action
* \param[in] exit_reason Exit reason given for failed action
* \param[in] exec_time String containing execution time in milliseconds
*/
static void
failed_action_technical(pcmk__output_t *out, const xmlNode *xml_op,
const char *op_key, const char *node_name, int rc,
int status, const char *exit_reason,
const char *exec_time)
{
const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID);
const char *queue_time = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME);
- const char *exit_status = services_ocf_exitcode_str(rc);
+ const char *exit_status = crm_exit_str(rc);
const char *lrm_status = pcmk_exec_status_str(status);
time_t last_change_epoch = 0;
GString *str = NULL;
if (pcmk__str_empty(op_key)) {
op_key = "unknown operation";
}
if (pcmk__str_empty(exit_status)) {
exit_status = "unknown exit status";
}
if (pcmk__str_empty(call_id)) {
call_id = "unknown";
}
str = g_string_sized_new(256);
g_string_append_printf(str, "%s on %s '%s' (%d): call=%s, status='%s'",
op_key, node_name, exit_status, rc, call_id,
lrm_status);
if (!pcmk__str_empty(exit_reason)) {
pcmk__g_strcat(str, ", exitreason='", exit_reason, "'", NULL);
}
if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
&last_change_epoch) == pcmk_ok) {
char *last_change_str = pcmk__epoch2str(&last_change_epoch, 0);
pcmk__g_strcat(str,
", " PCMK_XA_LAST_RC_CHANGE "="
"'", last_change_str, "'", NULL);
free(last_change_str);
}
if (!pcmk__str_empty(queue_time)) {
pcmk__g_strcat(str, ", queued=", queue_time, "ms", NULL);
}
if (!pcmk__str_empty(exec_time)) {
pcmk__g_strcat(str, ", exec=", exec_time, "ms", NULL);
}
out->list_item(out, NULL, "%s", str->str);
g_string_free(str, TRUE);
}
PCMK__OUTPUT_ARGS("failed-action", "xmlNode *", "uint32_t")
static int
failed_action_default(pcmk__output_t *out, va_list args)
{
xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
uint32_t show_opts = va_arg(args, uint32_t);
const char *op_key = pcmk__xe_history_key(xml_op);
const char *node_name = crm_element_value(xml_op, PCMK_XA_UNAME);
const char *exit_reason = crm_element_value(xml_op, PCMK_XA_EXIT_REASON);
const char *exec_time = crm_element_value(xml_op, PCMK_XA_EXEC_TIME);
int rc;
int status;
pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_RC_CODE), &rc, 0);
pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status,
0);
if (pcmk__str_empty(node_name)) {
node_name = "unknown node";
}
if (pcmk_is_set(show_opts, pcmk_show_failed_detail)) {
failed_action_technical(out, xml_op, op_key, node_name, rc, status,
exit_reason, exec_time);
} else {
failed_action_friendly(out, xml_op, op_key, node_name, rc, status,
exit_reason, exec_time);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("failed-action", "xmlNode *", "uint32_t")
static int
failed_action_xml(pcmk__output_t *out, va_list args) {
xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t);
const char *op_key = pcmk__xe_history_key(xml_op);
const char *op_key_name = PCMK_XA_OP_KEY;
int rc;
int status;
const char *uname = crm_element_value(xml_op, PCMK_XA_UNAME);
const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID);
const char *exitstatus = NULL;
const char *exit_reason = pcmk__s(crm_element_value(xml_op,
PCMK_XA_EXIT_REASON),
"none");
const char *status_s = NULL;
time_t epoch = 0;
gchar *exit_reason_esc = NULL;
char *rc_s = NULL;
xmlNodePtr node = NULL;
if (pcmk__xml_needs_escape(exit_reason, pcmk__xml_escape_attr)) {
exit_reason_esc = pcmk__xml_escape(exit_reason, pcmk__xml_escape_attr);
exit_reason = exit_reason_esc;
}
pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_RC_CODE), &rc, 0);
pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status,
0);
if (crm_element_value(xml_op, PCMK__XA_OPERATION_KEY) == NULL) {
op_key_name = PCMK_XA_ID;
}
- exitstatus = services_ocf_exitcode_str(rc);
+ exitstatus = crm_exit_str(rc);
rc_s = pcmk__itoa(rc);
status_s = pcmk_exec_status_str(status);
node = pcmk__output_create_xml_node(out, PCMK_XE_FAILURE,
op_key_name, op_key,
PCMK_XA_NODE, uname,
PCMK_XA_EXITSTATUS, exitstatus,
PCMK_XA_EXITREASON, exit_reason,
PCMK_XA_EXITCODE, rc_s,
PCMK_XA_CALL, call_id,
PCMK_XA_STATUS, status_s,
NULL);
free(rc_s);
if ((crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
&epoch) == pcmk_ok) && (epoch > 0)) {
const char *queue_time = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME);
const char *exec = crm_element_value(xml_op, PCMK_XA_EXEC_TIME);
const char *task = crm_element_value(xml_op, PCMK_XA_OPERATION);
guint interval_ms = 0;
char *interval_ms_s = NULL;
char *rc_change = pcmk__epoch2str(&epoch,
crm_time_log_date
|crm_time_log_timeofday
|crm_time_log_with_timezone);
crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &interval_ms);
interval_ms_s = crm_strdup_printf("%u", interval_ms);
pcmk__xe_set_props(node,
PCMK_XA_LAST_RC_CHANGE, rc_change,
PCMK_XA_QUEUED, queue_time,
PCMK_XA_EXEC, exec,
PCMK_XA_INTERVAL, interval_ms_s,
PCMK_XA_TASK, task,
NULL);
free(interval_ms_s);
free(rc_change);
}
g_free(exit_reason_esc);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("failed-action-list", "pcmk_scheduler_t *", "GList *",
"GList *", "uint32_t", "bool")
static int
failed_action_list(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer = va_arg(args, int);
xmlNode *xml_op = NULL;
int rc = pcmk_rc_no_output;
if (xmlChildElementCount(scheduler->priv->failed) == 0) {
return rc;
}
for (xml_op = pcmk__xe_first_child(scheduler->priv->failed, NULL, NULL,
NULL);
xml_op != NULL; xml_op = pcmk__xe_next(xml_op)) {
char *rsc = NULL;
if (!pcmk__str_in_list(crm_element_value(xml_op, PCMK_XA_UNAME),
only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
if (pcmk_xe_mask_probe_failure(xml_op)) {
continue;
}
if (!parse_op_key(pcmk__xe_history_key(xml_op), &rsc, NULL, NULL)) {
continue;
}
if (!pcmk__str_in_list(rsc, only_rsc, pcmk__str_star_matches)) {
free(rsc);
continue;
}
free(rsc);
PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Failed Resource Actions");
out->message(out, "failed-action", xml_op, show_opts);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
static void
status_node(pcmk_node_t *node, xmlNodePtr parent, uint32_t show_opts)
{
int health = pe__node_health(node);
xmlNode *child = NULL;
// Cluster membership
if (node->details->online) {
child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL,
PCMK_VALUE_ONLINE);
pcmk__xe_set_content(child, " online");
} else {
child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL,
PCMK_VALUE_OFFLINE);
pcmk__xe_set_content(child, " OFFLINE");
}
// Standby mode
if (pcmk_is_set(node->priv->flags, pcmk__node_fail_standby)) {
child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL,
PCMK_VALUE_STANDBY);
if (node->details->running_rsc == NULL) {
pcmk__xe_set_content(child,
" (in standby due to " PCMK_META_ON_FAIL ")");
} else {
pcmk__xe_set_content(child,
" (in standby due to " PCMK_META_ON_FAIL ","
" with active resources)");
}
} else if (pcmk_is_set(node->priv->flags, pcmk__node_standby)) {
child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL,
PCMK_VALUE_STANDBY);
if (node->details->running_rsc == NULL) {
pcmk__xe_set_content(child, " (in standby)");
} else {
pcmk__xe_set_content(child, " (in standby, with active resources)");
}
}
// Maintenance mode
if (node->details->maintenance) {
child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL,
PCMK__VALUE_MAINT);
pcmk__xe_set_content(child, " (in maintenance mode)");
}
// Node health
if (health < 0) {
child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL,
PCMK__VALUE_HEALTH_RED);
pcmk__xe_set_content(child, " (health is RED)");
} else if (health == 0) {
child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL,
PCMK__VALUE_HEALTH_YELLOW);
pcmk__xe_set_content(child, " (health is YELLOW)");
}
// Feature set
if (pcmk_is_set(show_opts, pcmk_show_feature_set)) {
const char *feature_set = get_node_feature_set(node);
if (feature_set != NULL) {
child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, ", feature set %s", feature_set);
}
}
}
PCMK__OUTPUT_ARGS("node", "pcmk_node_t *", "uint32_t", "bool",
"GList *", "GList *")
static int
node_html(pcmk__output_t *out, va_list args) {
pcmk_node_t *node = va_arg(args, pcmk_node_t *);
uint32_t show_opts = va_arg(args, uint32_t);
bool full = va_arg(args, int);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id));
if (full) {
xmlNode *item_node = NULL;
xmlNode *child = NULL;
if (pcmk_all_flags_set(show_opts, pcmk_show_brief | pcmk_show_rscs_by_node)) {
GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc);
out->begin_list(out, NULL, NULL, "%s:", node_name);
item_node = pcmk__output_xml_create_parent(out, "li", NULL);
child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "Status:");
status_node(node, item_node, show_opts);
if (rscs != NULL) {
uint32_t new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs;
out->begin_list(out, NULL, NULL, "Resources");
pe__rscs_brief_output(out, rscs, new_show_opts);
out->end_list(out);
}
pcmk__output_xml_pop_parent(out);
out->end_list(out);
} else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) {
GList *lpc2 = NULL;
int rc = pcmk_rc_no_output;
out->begin_list(out, NULL, NULL, "%s:", node_name);
item_node = pcmk__output_xml_create_parent(out, "li", NULL);
child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "Status:");
status_node(node, item_node, show_opts);
for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) lpc2->data;
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources");
show_opts |= pcmk_show_rsc_only;
out->message(out, (const char *) rsc->priv->xml->name,
show_opts, rsc, only_node, only_rsc);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
pcmk__output_xml_pop_parent(out);
out->end_list(out);
} else {
item_node = pcmk__output_create_xml_node(out, "li", NULL);
child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "%s:", node_name);
status_node(node, item_node, show_opts);
}
} else {
out->begin_list(out, NULL, NULL, "%s:", node_name);
}
free(node_name);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Get a human-friendly textual description of a node's status
*
* \param[in] node Node to check
*
* \return String representation of node's status
*/
static const char *
node_text_status(const pcmk_node_t *node)
{
if (node->details->unclean) {
if (node->details->online) {
return "UNCLEAN (online)";
} else if (node->details->pending) {
return "UNCLEAN (pending)";
} else {
return "UNCLEAN (offline)";
}
} else if (node->details->pending) {
return "pending";
} else if (pcmk_is_set(node->priv->flags, pcmk__node_fail_standby)
&& node->details->online) {
return "standby (" PCMK_META_ON_FAIL ")";
} else if (pcmk_is_set(node->priv->flags, pcmk__node_standby)) {
if (!node->details->online) {
return "OFFLINE (standby)";
} else if (node->details->running_rsc == NULL) {
return "standby";
} else {
return "standby (with active resources)";
}
} else if (node->details->maintenance) {
if (node->details->online) {
return "maintenance";
} else {
return "OFFLINE (maintenance)";
}
} else if (node->details->online) {
return "online";
}
return "OFFLINE";
}
PCMK__OUTPUT_ARGS("node", "pcmk_node_t *", "uint32_t", "bool", "GList *",
"GList *")
static int
node_text(pcmk__output_t *out, va_list args) {
pcmk_node_t *node = va_arg(args, pcmk_node_t *);
uint32_t show_opts = va_arg(args, uint32_t);
bool full = va_arg(args, int);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
if (full) {
char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id));
GString *str = g_string_sized_new(64);
int health = pe__node_health(node);
// Create a summary line with node type, name, and status
if (pcmk__is_guest_or_bundle_node(node)) {
g_string_append(str, "GuestNode");
} else if (pcmk__is_remote_node(node)) {
g_string_append(str, "RemoteNode");
} else {
g_string_append(str, "Node");
}
pcmk__g_strcat(str, " ", node_name, ": ", node_text_status(node), NULL);
if (health < 0) {
g_string_append(str, " (health is RED)");
} else if (health == 0) {
g_string_append(str, " (health is YELLOW)");
}
if (pcmk_is_set(show_opts, pcmk_show_feature_set)) {
const char *feature_set = get_node_feature_set(node);
if (feature_set != NULL) {
pcmk__g_strcat(str, ", feature set ", feature_set, NULL);
}
}
/* If we're grouping by node, print its resources */
if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) {
if (pcmk_is_set(show_opts, pcmk_show_brief)) {
GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc);
if (rscs != NULL) {
uint32_t new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs;
out->begin_list(out, NULL, NULL, "%s", str->str);
out->begin_list(out, NULL, NULL, "Resources");
pe__rscs_brief_output(out, rscs, new_show_opts);
out->end_list(out);
out->end_list(out);
g_list_free(rscs);
}
} else {
GList *gIter2 = NULL;
out->begin_list(out, NULL, NULL, "%s", str->str);
out->begin_list(out, NULL, NULL, "Resources");
for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) gIter2->data;
show_opts |= pcmk_show_rsc_only;
out->message(out, (const char *) rsc->priv->xml->name,
show_opts, rsc, only_node, only_rsc);
}
out->end_list(out);
out->end_list(out);
}
} else {
out->list_item(out, NULL, "%s", str->str);
}
g_string_free(str, TRUE);
free(node_name);
} else {
char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id));
out->begin_list(out, NULL, NULL, "Node: %s", node_name);
free(node_name);
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Convert an integer health value to a string representation
*
* \param[in] health Integer health value
*
* \retval \c PCMK_VALUE_RED if \p health is less than 0
* \retval \c PCMK_VALUE_YELLOW if \p health is equal to 0
* \retval \c PCMK_VALUE_GREEN if \p health is greater than 0
*/
static const char *
health_text(int health)
{
if (health < 0) {
return PCMK_VALUE_RED;
} else if (health == 0) {
return PCMK_VALUE_YELLOW;
} else {
return PCMK_VALUE_GREEN;
}
}
/*!
* \internal
* \brief Convert a node variant to a string representation
*
* \param[in] variant Node variant
*
* \retval \c PCMK_VALUE_MEMBER if \p node_type is \c pcmk__node_variant_cluster
* \retval \c PCMK_VALUE_REMOTE if \p node_type is \c pcmk__node_variant_remote
* \retval \c PCMK_VALUE_UNKNOWN otherwise
*/
static const char *
node_variant_text(enum pcmk__node_variant variant)
{
switch (variant) {
case pcmk__node_variant_cluster:
return PCMK_VALUE_MEMBER;
case pcmk__node_variant_remote:
return PCMK_VALUE_REMOTE;
default:
return PCMK_VALUE_UNKNOWN;
}
}
PCMK__OUTPUT_ARGS("node", "pcmk_node_t *", "uint32_t", "bool", "GList *",
"GList *")
static int
node_xml(pcmk__output_t *out, va_list args) {
pcmk_node_t *node = va_arg(args, pcmk_node_t *);
uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t);
bool full = va_arg(args, int);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
if (full) {
const char *online = pcmk__btoa(node->details->online);
const char *standby = pcmk__flag_text(node->priv->flags,
pcmk__node_standby);
const char *standby_onfail = pcmk__flag_text(node->priv->flags,
pcmk__node_fail_standby);
const char *maintenance = pcmk__btoa(node->details->maintenance);
const char *pending = pcmk__btoa(node->details->pending);
const char *unclean = pcmk__btoa(node->details->unclean);
const char *health = health_text(pe__node_health(node));
const char *feature_set = get_node_feature_set(node);
const char *shutdown = pcmk__btoa(node->details->shutdown);
const char *expected_up = pcmk__flag_text(node->priv->flags,
pcmk__node_expected_up);
const bool is_dc = pcmk__same_node(node,
node->priv->scheduler->dc_node);
int length = g_list_length(node->details->running_rsc);
char *resources_running = pcmk__itoa(length);
const char *node_type = node_variant_text(node->priv->variant);
int rc = pcmk_rc_ok;
rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_NODE,
PCMK_XA_NAME, node->priv->name,
PCMK_XA_ID, node->priv->id,
PCMK_XA_ONLINE, online,
PCMK_XA_STANDBY, standby,
PCMK_XA_STANDBY_ONFAIL, standby_onfail,
PCMK_XA_MAINTENANCE, maintenance,
PCMK_XA_PENDING, pending,
PCMK_XA_UNCLEAN, unclean,
PCMK_XA_HEALTH, health,
PCMK_XA_FEATURE_SET, feature_set,
PCMK_XA_SHUTDOWN, shutdown,
PCMK_XA_EXPECTED_UP, expected_up,
PCMK_XA_IS_DC, pcmk__btoa(is_dc),
PCMK_XA_RESOURCES_RUNNING, resources_running,
PCMK_XA_TYPE, node_type,
NULL);
free(resources_running);
pcmk__assert(rc == pcmk_rc_ok);
if (pcmk__is_guest_or_bundle_node(node)) {
xmlNodePtr xml_node = pcmk__output_xml_peek_parent(out);
crm_xml_add(xml_node, PCMK_XA_ID_AS_RESOURCE,
node->priv->remote->priv->launcher->id);
}
if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) {
GList *lpc = NULL;
for (lpc = node->details->running_rsc; lpc != NULL; lpc = lpc->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) lpc->data;
show_opts |= pcmk_show_rsc_only;
out->message(out, (const char *) rsc->priv->xml->name,
show_opts, rsc, only_node, only_rsc);
}
}
out->end_list(out);
} else {
pcmk__output_xml_create_parent(out, PCMK_XE_NODE,
PCMK_XA_NAME, node->priv->name,
NULL);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int")
static int
node_attribute_text(pcmk__output_t *out, va_list args) {
const char *name = va_arg(args, const char *);
const char *value = va_arg(args, const char *);
bool add_extra = va_arg(args, int);
int expected_score = va_arg(args, int);
if (add_extra) {
int v;
if (value == NULL) {
v = 0;
} else {
pcmk__scan_min_int(value, &v, INT_MIN);
}
if (v <= 0) {
out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is lost", name, value);
} else if (v < expected_score) {
out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is degraded (Expected=%d)", name, value, expected_score);
} else {
out->list_item(out, NULL, "%-32s\t: %-10s", name, value);
}
} else {
out->list_item(out, NULL, "%-32s\t: %-10s", name, value);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int")
static int
node_attribute_html(pcmk__output_t *out, va_list args) {
const char *name = va_arg(args, const char *);
const char *value = va_arg(args, const char *);
bool add_extra = va_arg(args, int);
int expected_score = va_arg(args, int);
if (add_extra) {
int v = 0;
xmlNodePtr item_node = pcmk__output_create_xml_node(out, "li", NULL);
xmlNode *child = NULL;
if (value != NULL) {
pcmk__scan_min_int(value, &v, INT_MIN);
}
child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL, NULL);
pcmk__xe_set_content(child, "%s: %s", name, value);
if (v <= 0) {
child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_BOLD);
pcmk__xe_set_content(child, "(connectivity is lost)");
} else if (v < expected_score) {
child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL,
PCMK__VALUE_BOLD);
pcmk__xe_set_content(child,
"(connectivity is degraded -- expected %d)",
expected_score);
}
} else {
out->list_item(out, NULL, "%s: %s", name, value);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-and-op", "pcmk_scheduler_t *", "xmlNode *")
static int
node_and_op(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
pcmk_resource_t *rsc = NULL;
gchar *node_str = NULL;
char *last_change_str = NULL;
const char *op_rsc = crm_element_value(xml_op, PCMK_XA_RESOURCE);
int status;
time_t last_change = 0;
pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status,
PCMK_EXEC_UNKNOWN);
rsc = pe_find_resource(scheduler->priv->resources, op_rsc);
if (rsc) {
const pcmk_node_t *node = pcmk__current_node(rsc);
const char *target_role = g_hash_table_lookup(rsc->priv->meta,
PCMK_META_TARGET_ROLE);
uint32_t show_opts = pcmk_show_rsc_only | pcmk_show_pending;
if (node == NULL) {
node = rsc->priv->pending_node;
}
node_str = pcmk__native_output_string(rsc, rsc_printable_id(rsc), node,
show_opts, target_role, false);
} else {
node_str = crm_strdup_printf("Unknown resource %s", op_rsc);
}
if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
&last_change) == pcmk_ok) {
const char *exec_time = crm_element_value(xml_op, PCMK_XA_EXEC_TIME);
last_change_str = crm_strdup_printf(", %s='%s', exec=%sms",
PCMK_XA_LAST_RC_CHANGE,
pcmk__trim(ctime(&last_change)),
exec_time);
}
out->list_item(out, NULL, "%s: %s (node=%s, call=%s, rc=%s%s): %s",
node_str, pcmk__xe_history_key(xml_op),
crm_element_value(xml_op, PCMK_XA_UNAME),
crm_element_value(xml_op, PCMK__XA_CALL_ID),
crm_element_value(xml_op, PCMK__XA_RC_CODE),
last_change_str ? last_change_str : "",
pcmk_exec_status_str(status));
g_free(node_str);
free(last_change_str);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-and-op", "pcmk_scheduler_t *", "xmlNode *")
static int
node_and_op_xml(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
pcmk_resource_t *rsc = NULL;
const char *uname = crm_element_value(xml_op, PCMK_XA_UNAME);
const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID);
const char *rc_s = crm_element_value(xml_op, PCMK__XA_RC_CODE);
const char *status_s = NULL;
const char *op_rsc = crm_element_value(xml_op, PCMK_XA_RESOURCE);
int status;
time_t last_change = 0;
xmlNode *node = NULL;
pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS),
&status, PCMK_EXEC_UNKNOWN);
status_s = pcmk_exec_status_str(status);
node = pcmk__output_create_xml_node(out, PCMK_XE_OPERATION,
PCMK_XA_OP, pcmk__xe_history_key(xml_op),
PCMK_XA_NODE, uname,
PCMK_XA_CALL, call_id,
PCMK_XA_RC, rc_s,
PCMK_XA_STATUS, status_s,
NULL);
rsc = pe_find_resource(scheduler->priv->resources, op_rsc);
if (rsc) {
const char *class = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS);
const char *provider = crm_element_value(rsc->priv->xml,
PCMK_XA_PROVIDER);
const char *kind = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE);
bool has_provider = pcmk_is_set(pcmk_get_ra_caps(class),
pcmk_ra_cap_provider);
char *agent_tuple = crm_strdup_printf("%s:%s:%s",
class,
(has_provider? provider : ""),
kind);
pcmk__xe_set_props(node,
PCMK_XA_RSC, rsc_printable_id(rsc),
PCMK_XA_AGENT, agent_tuple,
NULL);
free(agent_tuple);
}
if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
&last_change) == pcmk_ok) {
const char *last_rc_change = pcmk__trim(ctime(&last_change));
const char *exec_time = crm_element_value(xml_op, PCMK_XA_EXEC_TIME);
pcmk__xe_set_props(node,
PCMK_XA_LAST_RC_CHANGE, last_rc_change,
PCMK_XA_EXEC_TIME, exec_time,
NULL);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int")
static int
node_attribute_xml(pcmk__output_t *out, va_list args) {
const char *name = va_arg(args, const char *);
const char *value = va_arg(args, const char *);
bool add_extra = va_arg(args, int);
int expected_score = va_arg(args, int);
xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_ATTRIBUTE,
PCMK_XA_NAME, name,
PCMK_XA_VALUE, value,
NULL);
if (add_extra) {
char *buf = pcmk__itoa(expected_score);
crm_xml_add(node, PCMK_XA_EXPECTED, buf);
free(buf);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-attribute-list", "pcmk_scheduler_t *", "uint32_t",
"bool", "GList *", "GList *")
static int
node_attribute_list(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer = va_arg(args, int);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
int rc = pcmk_rc_no_output;
/* Display each node's attributes */
for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *node = gIter->data;
GList *attr_list = NULL;
GHashTableIter iter;
gpointer key;
if (!node || !node->details || !node->details->online) {
continue;
}
g_hash_table_iter_init(&iter, node->priv->attrs);
while (g_hash_table_iter_next (&iter, &key, NULL)) {
attr_list = filter_attr_list(attr_list, key);
}
if (attr_list == NULL) {
continue;
}
if (!pcmk__str_in_list(node->priv->name, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
g_list_free(attr_list);
continue;
}
PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node Attributes");
out->message(out, "node", node, show_opts, false, only_node, only_rsc);
for (GList *aIter = attr_list; aIter != NULL; aIter = aIter->next) {
const char *name = aIter->data;
const char *value = NULL;
int expected_score = 0;
bool add_extra = false;
value = pcmk__node_attr(node, name, NULL, pcmk__rsc_node_current);
add_extra = add_extra_info(node, node->details->running_rsc,
scheduler, name, &expected_score);
/* Print attribute name and value */
out->message(out, "node-attribute", name, value, add_extra,
expected_score);
}
g_list_free(attr_list);
out->end_list(out);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("node-capacity", "const pcmk_node_t *", "const char *")
static int
node_capacity(pcmk__output_t *out, va_list args)
{
const pcmk_node_t *node = va_arg(args, pcmk_node_t *);
const char *comment = va_arg(args, const char *);
char *dump_text = crm_strdup_printf("%s: %s capacity:",
comment, pcmk__node_name(node));
g_hash_table_foreach(node->priv->utilization, append_dump_text,
&dump_text);
out->list_item(out, NULL, "%s", dump_text);
free(dump_text);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-capacity", "const pcmk_node_t *", "const char *")
static int
node_capacity_xml(pcmk__output_t *out, va_list args)
{
const pcmk_node_t *node = va_arg(args, pcmk_node_t *);
const char *uname = node->priv->name;
const char *comment = va_arg(args, const char *);
xmlNodePtr xml_node = pcmk__output_create_xml_node(out, PCMK_XE_CAPACITY,
PCMK_XA_NODE, uname,
PCMK_XA_COMMENT, comment,
NULL);
g_hash_table_foreach(node->priv->utilization, add_dump_node, xml_node);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-history-list", "pcmk_scheduler_t *", "pcmk_node_t *",
"xmlNode *", "GList *", "GList *", "uint32_t", "uint32_t")
static int
node_history_list(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
pcmk_node_t *node = va_arg(args, pcmk_node_t *);
xmlNode *node_state = va_arg(args, xmlNode *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
xmlNode *lrm_rsc = NULL;
xmlNode *rsc_entry = NULL;
int rc = pcmk_rc_no_output;
lrm_rsc = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL, NULL);
lrm_rsc = pcmk__xe_first_child(lrm_rsc, PCMK__XE_LRM_RESOURCES, NULL, NULL);
/* Print history of each of the node's resources */
for (rsc_entry = pcmk__xe_first_child(lrm_rsc, PCMK__XE_LRM_RESOURCE, NULL,
NULL);
rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) {
const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
pcmk_resource_t *rsc = NULL;
const pcmk_resource_t *parent = NULL;
if (rsc_id == NULL) {
continue; // Malformed entry
}
rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
if (rsc == NULL) {
continue; // Resource was removed from configuration
}
/* We can't use is_filtered here to filter group resources. For is_filtered,
* we have to decide whether to check the parent or not. If we check the
* parent, all elements of a group will always be printed because that's how
* is_filtered works for groups. If we do not check the parent, sometimes
* this will filter everything out.
*
* For other resource types, is_filtered is okay.
*/
parent = pe__const_top_resource(rsc, false);
if (pcmk__is_group(parent)) {
if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc,
pcmk__str_star_matches)
&& !pcmk__str_in_list(rsc_printable_id(parent), only_rsc,
pcmk__str_star_matches)) {
continue;
}
} else if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) {
continue;
}
if (!pcmk_is_set(section_opts, pcmk_section_operations)) {
time_t last_failure = 0;
int failcount = pe_get_failcount(node, rsc, &last_failure,
pcmk__fc_default, NULL);
if (failcount <= 0) {
continue;
}
if (rc == pcmk_rc_no_output) {
rc = pcmk_rc_ok;
out->message(out, "node", node, show_opts, false, only_node,
only_rsc);
}
out->message(out, "resource-history", rsc, rsc_id, false,
failcount, last_failure, false);
} else {
GList *op_list = get_operation_list(rsc_entry);
pcmk_resource_t *rsc = NULL;
if (op_list == NULL) {
continue;
}
rsc = pe_find_resource(scheduler->priv->resources,
crm_element_value(rsc_entry, PCMK_XA_ID));
if (rc == pcmk_rc_no_output) {
rc = pcmk_rc_ok;
out->message(out, "node", node, show_opts, false, only_node,
only_rsc);
}
out->message(out, "resource-operation-list", scheduler, rsc, node,
op_list, show_opts);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool")
static int
node_list_html(pcmk__output_t *out, va_list args) {
GList *nodes = va_arg(args, GList *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer G_GNUC_UNUSED = va_arg(args, int);
int rc = pcmk_rc_no_output;
for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *node = (pcmk_node_t *) gIter->data;
if (!pcmk__str_in_list(node->priv->name, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Node List");
out->message(out, "node", node, show_opts, true, only_node, only_rsc);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool")
static int
node_list_text(pcmk__output_t *out, va_list args) {
GList *nodes = va_arg(args, GList *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer = va_arg(args, int);
/* space-separated lists of node names */
GString *online_nodes = NULL;
GString *online_remote_nodes = NULL;
GString *online_guest_nodes = NULL;
GString *offline_nodes = NULL;
GString *offline_remote_nodes = NULL;
int rc = pcmk_rc_no_output;
for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *node = (pcmk_node_t *) gIter->data;
char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id));
if (!pcmk__str_in_list(node->priv->name, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
free(node_name);
continue;
}
PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node List");
// Determine whether to display node individually or in a list
if (node->details->unclean || node->details->pending
|| (pcmk_is_set(node->priv->flags, pcmk__node_fail_standby)
&& node->details->online)
|| pcmk_is_set(node->priv->flags, pcmk__node_standby)
|| node->details->maintenance
|| pcmk_is_set(show_opts, pcmk_show_rscs_by_node)
|| pcmk_is_set(show_opts, pcmk_show_feature_set)
|| (pe__node_health(node) <= 0)) {
// Display node individually
} else if (node->details->online) {
// Display online node in a list
if (pcmk__is_guest_or_bundle_node(node)) {
pcmk__add_word(&online_guest_nodes, 1024, node_name);
} else if (pcmk__is_remote_node(node)) {
pcmk__add_word(&online_remote_nodes, 1024, node_name);
} else {
pcmk__add_word(&online_nodes, 1024, node_name);
}
free(node_name);
continue;
} else {
// Display offline node in a list
if (pcmk__is_remote_node(node)) {
pcmk__add_word(&offline_remote_nodes, 1024, node_name);
} else if (pcmk__is_guest_or_bundle_node(node)) {
/* ignore offline guest nodes */
} else {
pcmk__add_word(&offline_nodes, 1024, node_name);
}
free(node_name);
continue;
}
/* If we get here, node is in bad state, or we're grouping by node */
out->message(out, "node", node, show_opts, true, only_node, only_rsc);
free(node_name);
}
/* If we're not grouping by node, summarize nodes by status */
if (online_nodes != NULL) {
out->list_item(out, "Online", "[ %s ]",
(const char *) online_nodes->str);
g_string_free(online_nodes, TRUE);
}
if (offline_nodes != NULL) {
out->list_item(out, "OFFLINE", "[ %s ]",
(const char *) offline_nodes->str);
g_string_free(offline_nodes, TRUE);
}
if (online_remote_nodes) {
out->list_item(out, "RemoteOnline", "[ %s ]",
(const char *) online_remote_nodes->str);
g_string_free(online_remote_nodes, TRUE);
}
if (offline_remote_nodes) {
out->list_item(out, "RemoteOFFLINE", "[ %s ]",
(const char *) offline_remote_nodes->str);
g_string_free(offline_remote_nodes, TRUE);
}
if (online_guest_nodes != NULL) {
out->list_item(out, "GuestOnline", "[ %s ]",
(const char *) online_guest_nodes->str);
g_string_free(online_guest_nodes, TRUE);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool")
static int
node_list_xml(pcmk__output_t *out, va_list args) {
GList *nodes = va_arg(args, GList *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer G_GNUC_UNUSED = va_arg(args, int);
/* PCMK_XE_NODES acts as the list's element name for CLI tools that use
* pcmk__output_enable_list_element. Otherwise PCMK_XE_NODES is the
* value of the list's PCMK_XA_NAME attribute.
*/
out->begin_list(out, NULL, NULL, PCMK_XE_NODES);
for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *node = (pcmk_node_t *) gIter->data;
if (!pcmk__str_in_list(node->priv->name, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
out->message(out, "node", node, show_opts, true, only_node, only_rsc);
}
out->end_list(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-summary", "pcmk_scheduler_t *", "GList *", "GList *",
"uint32_t", "uint32_t", "bool")
static int
node_summary(pcmk__output_t *out, va_list args) {
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer = va_arg(args, int);
xmlNode *node_state = NULL;
xmlNode *cib_status = pcmk_find_cib_element(scheduler->input,
PCMK_XE_STATUS);
int rc = pcmk_rc_no_output;
if (xmlChildElementCount(cib_status) == 0) {
return rc;
}
for (node_state = pcmk__xe_first_child(cib_status, PCMK__XE_NODE_STATE,
NULL, NULL);
node_state != NULL; node_state = pcmk__xe_next_same(node_state)) {
pcmk_node_t *node = pe_find_node_id(scheduler->nodes,
pcmk__xe_id(node_state));
if (!node || !node->details || !node->details->online) {
continue;
}
if (!pcmk__str_in_list(node->priv->name, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc,
pcmk_is_set(section_opts, pcmk_section_operations) ? "Operations" : "Migration Summary");
out->message(out, "node-history-list", scheduler, node, node_state,
only_node, only_rsc, section_opts, show_opts);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("node-weight", "const pcmk_resource_t *", "const char *",
"const char *", "const char *")
static int
node_weight(pcmk__output_t *out, va_list args)
{
const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *);
const char *prefix = va_arg(args, const char *);
const char *uname = va_arg(args, const char *);
const char *score = va_arg(args, const char *);
if (rsc) {
out->list_item(out, NULL, "%s: %s allocation score on %s: %s",
prefix, rsc->id, uname, score);
} else {
out->list_item(out, NULL, "%s: %s = %s", prefix, uname, score);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-weight", "const pcmk_resource_t *", "const char *",
"const char *", "const char *")
static int
node_weight_xml(pcmk__output_t *out, va_list args)
{
const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *);
const char *prefix = va_arg(args, const char *);
const char *uname = va_arg(args, const char *);
const char *score = va_arg(args, const char *);
xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_NODE_WEIGHT,
PCMK_XA_FUNCTION, prefix,
PCMK_XA_NODE, uname,
PCMK_XA_SCORE, score,
NULL);
if (rsc) {
crm_xml_add(node, PCMK_XA_ID, rsc->id);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("op-history", "xmlNode *", "const char *", "const char *", "int", "uint32_t")
static int
op_history_text(pcmk__output_t *out, va_list args) {
xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
const char *task = va_arg(args, const char *);
const char *interval_ms_s = va_arg(args, const char *);
int rc = va_arg(args, int);
uint32_t show_opts = va_arg(args, uint32_t);
char *buf = op_history_string(xml_op, task, interval_ms_s, rc,
pcmk_is_set(show_opts, pcmk_show_timing));
out->list_item(out, NULL, "%s", buf);
free(buf);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("op-history", "xmlNode *", "const char *", "const char *", "int", "uint32_t")
static int
op_history_xml(pcmk__output_t *out, va_list args) {
xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
const char *task = va_arg(args, const char *);
const char *interval_ms_s = va_arg(args, const char *);
int rc = va_arg(args, int);
uint32_t show_opts = va_arg(args, uint32_t);
const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID);
char *rc_s = pcmk__itoa(rc);
- const char *rc_text = services_ocf_exitcode_str(rc);
+ const char *rc_text = crm_exit_str(rc);
xmlNodePtr node = NULL;
node = pcmk__output_create_xml_node(out, PCMK_XE_OPERATION_HISTORY,
PCMK_XA_CALL, call_id,
PCMK_XA_TASK, task,
PCMK_XA_RC, rc_s,
PCMK_XA_RC_TEXT, rc_text,
NULL);
free(rc_s);
if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) {
char *s = crm_strdup_printf("%sms", interval_ms_s);
crm_xml_add(node, PCMK_XA_INTERVAL, s);
free(s);
}
if (pcmk_is_set(show_opts, pcmk_show_timing)) {
const char *value = NULL;
time_t epoch = 0;
if ((crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
&epoch) == pcmk_ok) && (epoch > 0)) {
char *s = pcmk__epoch2str(&epoch, 0);
crm_xml_add(node, PCMK_XA_LAST_RC_CHANGE, s);
free(s);
}
value = crm_element_value(xml_op, PCMK_XA_EXEC_TIME);
if (value) {
char *s = crm_strdup_printf("%sms", value);
crm_xml_add(node, PCMK_XA_EXEC_TIME, s);
free(s);
}
value = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME);
if (value) {
char *s = crm_strdup_printf("%sms", value);
crm_xml_add(node, PCMK_XA_QUEUE_TIME, s);
free(s);
}
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("promotion-score", "pcmk_resource_t *", "pcmk_node_t *",
"const char *")
static int
promotion_score(pcmk__output_t *out, va_list args)
{
pcmk_resource_t *child_rsc = va_arg(args, pcmk_resource_t *);
pcmk_node_t *chosen = va_arg(args, pcmk_node_t *);
const char *score = va_arg(args, const char *);
if (chosen == NULL) {
out->list_item(out, NULL, "%s promotion score (inactive): %s",
child_rsc->id, score);
} else {
out->list_item(out, NULL, "%s promotion score on %s: %s",
child_rsc->id, pcmk__node_name(chosen), score);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("promotion-score", "pcmk_resource_t *", "pcmk_node_t *",
"const char *")
static int
promotion_score_xml(pcmk__output_t *out, va_list args)
{
pcmk_resource_t *child_rsc = va_arg(args, pcmk_resource_t *);
pcmk_node_t *chosen = va_arg(args, pcmk_node_t *);
const char *score = va_arg(args, const char *);
xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_PROMOTION_SCORE,
PCMK_XA_ID, child_rsc->id,
PCMK_XA_SCORE, score,
NULL);
if (chosen) {
crm_xml_add(node, PCMK_XA_NODE, chosen->priv->name);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-config", "const pcmk_resource_t *", "bool")
static int
resource_config(pcmk__output_t *out, va_list args) {
const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *);
GString *xml_buf = g_string_sized_new(1024);
bool raw = va_arg(args, int);
formatted_xml_buf(rsc, xml_buf, raw);
out->output_xml(out, PCMK_XE_XML, xml_buf->str);
g_string_free(xml_buf, TRUE);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-config", "const pcmk_resource_t *", "bool")
static int
resource_config_text(pcmk__output_t *out, va_list args) {
pcmk__formatted_printf(out, "Resource XML:\n");
return resource_config(out, args);
}
PCMK__OUTPUT_ARGS("resource-history", "pcmk_resource_t *", "const char *",
"bool", "int", "time_t", "bool")
static int
resource_history_text(pcmk__output_t *out, va_list args) {
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
const char *rsc_id = va_arg(args, const char *);
bool all = va_arg(args, int);
int failcount = va_arg(args, int);
time_t last_failure = va_arg(args, time_t);
bool as_header = va_arg(args, int);
char *buf = resource_history_string(rsc, rsc_id, all, failcount, last_failure);
if (as_header) {
out->begin_list(out, NULL, NULL, "%s", buf);
} else {
out->list_item(out, NULL, "%s", buf);
}
free(buf);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-history", "pcmk_resource_t *", "const char *",
"bool", "int", "time_t", "bool")
static int
resource_history_xml(pcmk__output_t *out, va_list args) {
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
const char *rsc_id = va_arg(args, const char *);
bool all = va_arg(args, int);
int failcount = va_arg(args, int);
time_t last_failure = va_arg(args, time_t);
bool as_header = va_arg(args, int);
xmlNodePtr node = pcmk__output_xml_create_parent(out,
PCMK_XE_RESOURCE_HISTORY,
PCMK_XA_ID, rsc_id,
NULL);
if (rsc == NULL) {
pcmk__xe_set_bool_attr(node, PCMK_XA_ORPHAN, true);
} else if (all || failcount || last_failure > 0) {
char *migration_s = pcmk__itoa(rsc->priv->ban_after_failures);
pcmk__xe_set_props(node,
PCMK_XA_ORPHAN, PCMK_VALUE_FALSE,
PCMK_META_MIGRATION_THRESHOLD, migration_s,
NULL);
free(migration_s);
if (failcount > 0) {
char *s = pcmk__itoa(failcount);
crm_xml_add(node, PCMK_XA_FAIL_COUNT, s);
free(s);
}
if (last_failure > 0) {
char *s = pcmk__epoch2str(&last_failure, 0);
crm_xml_add(node, PCMK_XA_LAST_FAILURE, s);
free(s);
}
}
if (!as_header) {
pcmk__output_xml_pop_parent(out);
}
return pcmk_rc_ok;
}
static void
print_resource_header(pcmk__output_t *out, uint32_t show_opts)
{
if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) {
/* Active resources have already been printed by node */
out->begin_list(out, NULL, NULL, "Inactive Resources");
} else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
out->begin_list(out, NULL, NULL, "Full List of Resources");
} else {
out->begin_list(out, NULL, NULL, "Active Resources");
}
}
PCMK__OUTPUT_ARGS("resource-list", "pcmk_scheduler_t *", "uint32_t", "bool",
"GList *", "GList *", "bool")
static int
resource_list(pcmk__output_t *out, va_list args)
{
pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_summary = va_arg(args, int);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
bool print_spacer = va_arg(args, int);
GList *rsc_iter;
int rc = pcmk_rc_no_output;
bool printed_header = false;
/* If we already showed active resources by node, and
* we're not showing inactive resources, we have nothing to do
*/
if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node) &&
!pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
return rc;
}
/* If we haven't already printed resources grouped by node,
* and brief output was requested, print resource summary */
if (pcmk_is_set(show_opts, pcmk_show_brief)
&& !pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) {
GList *rscs = pe__filter_rsc_list(scheduler->priv->resources, only_rsc);
PCMK__OUTPUT_SPACER_IF(out, print_spacer);
print_resource_header(out, show_opts);
printed_header = true;
rc = pe__rscs_brief_output(out, rscs, show_opts);
g_list_free(rscs);
}
/* For each resource, display it if appropriate */
for (rsc_iter = scheduler->priv->resources;
rsc_iter != NULL; rsc_iter = rsc_iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) rsc_iter->data;
int x;
/* Complex resources may have some sub-resources active and some inactive */
gboolean is_active = rsc->priv->fns->active(rsc, TRUE);
gboolean partially_active = rsc->priv->fns->active(rsc, FALSE);
/* Skip inactive orphans (deleted but still in CIB) */
if (pcmk_is_set(rsc->flags, pcmk__rsc_removed) && !is_active) {
continue;
/* Skip active resources if we already displayed them by node */
} else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) {
if (is_active) {
continue;
}
/* Skip primitives already counted in a brief summary */
} else if (pcmk_is_set(show_opts, pcmk_show_brief)
&& pcmk__is_primitive(rsc)) {
continue;
/* Skip resources that aren't at least partially active,
* unless we're displaying inactive resources
*/
} else if (!partially_active && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
continue;
} else if (partially_active && !pe__rsc_running_on_any(rsc, only_node)) {
continue;
}
if (!printed_header) {
PCMK__OUTPUT_SPACER_IF(out, print_spacer);
print_resource_header(out, show_opts);
printed_header = true;
}
/* Print this resource */
x = out->message(out, (const char *) rsc->priv->xml->name,
show_opts, rsc, only_node, only_rsc);
if (x == pcmk_rc_ok) {
rc = pcmk_rc_ok;
}
}
if (print_summary && rc != pcmk_rc_ok) {
if (!printed_header) {
PCMK__OUTPUT_SPACER_IF(out, print_spacer);
print_resource_header(out, show_opts);
printed_header = true;
}
if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) {
out->list_item(out, NULL, "No inactive resources");
} else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
out->list_item(out, NULL, "No resources");
} else {
out->list_item(out, NULL, "No active resources");
}
}
if (printed_header) {
out->end_list(out);
}
return rc;
}
PCMK__OUTPUT_ARGS("resource-operation-list", "pcmk_scheduler_t *",
"pcmk_resource_t *", "pcmk_node_t *", "GList *", "uint32_t")
static int
resource_operation_list(pcmk__output_t *out, va_list args)
{
pcmk_scheduler_t *scheduler G_GNUC_UNUSED = va_arg(args,
pcmk_scheduler_t *);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
pcmk_node_t *node = va_arg(args, pcmk_node_t *);
GList *op_list = va_arg(args, GList *);
uint32_t show_opts = va_arg(args, uint32_t);
GList *gIter = NULL;
int rc = pcmk_rc_no_output;
/* Print each operation */
for (gIter = op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *xml_op = (xmlNode *) gIter->data;
const char *task = crm_element_value(xml_op, PCMK_XA_OPERATION);
const char *interval_ms_s = crm_element_value(xml_op,
PCMK_META_INTERVAL);
const char *op_rc = crm_element_value(xml_op, PCMK__XA_RC_CODE);
int op_rc_i;
pcmk__scan_min_int(op_rc, &op_rc_i, 0);
/* Display 0-interval monitors as "probe" */
if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)
&& pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) {
task = "probe";
}
/* If this is the first printed operation, print heading for resource */
if (rc == pcmk_rc_no_output) {
time_t last_failure = 0;
int failcount = pe_get_failcount(node, rsc, &last_failure,
pcmk__fc_default, NULL);
out->message(out, "resource-history", rsc, rsc_printable_id(rsc), true,
failcount, last_failure, true);
rc = pcmk_rc_ok;
}
/* Print the operation */
out->message(out, "op-history", xml_op, task, interval_ms_s,
op_rc_i, show_opts);
}
/* Free the list we created (no need to free the individual items) */
g_list_free(op_list);
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("resource-util", "pcmk_resource_t *", "pcmk_node_t *",
"const char *")
static int
resource_util(pcmk__output_t *out, va_list args)
{
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
pcmk_node_t *node = va_arg(args, pcmk_node_t *);
const char *fn = va_arg(args, const char *);
char *dump_text = crm_strdup_printf("%s: %s utilization on %s:",
fn, rsc->id, pcmk__node_name(node));
g_hash_table_foreach(rsc->priv->utilization, append_dump_text,
&dump_text);
out->list_item(out, NULL, "%s", dump_text);
free(dump_text);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-util", "pcmk_resource_t *", "pcmk_node_t *",
"const char *")
static int
resource_util_xml(pcmk__output_t *out, va_list args)
{
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
pcmk_node_t *node = va_arg(args, pcmk_node_t *);
const char *uname = node->priv->name;
const char *fn = va_arg(args, const char *);
xmlNodePtr xml_node = NULL;
xml_node = pcmk__output_create_xml_node(out, PCMK_XE_UTILIZATION,
PCMK_XA_RESOURCE, rsc->id,
PCMK_XA_NODE, uname,
PCMK_XA_FUNCTION, fn,
NULL);
g_hash_table_foreach(rsc->priv->utilization, add_dump_node, xml_node);
return pcmk_rc_ok;
}
static inline const char *
ticket_status(pcmk__ticket_t *ticket)
{
if (pcmk_is_set(ticket->flags, pcmk__ticket_granted)) {
return PCMK_VALUE_GRANTED;
}
return PCMK_VALUE_REVOKED;
}
static inline const char *
ticket_standby_text(pcmk__ticket_t *ticket)
{
return pcmk_is_set(ticket->flags, pcmk__ticket_standby)? " [standby]" : "";
}
PCMK__OUTPUT_ARGS("ticket", "pcmk__ticket_t *", "bool", "bool")
static int
ticket_default(pcmk__output_t *out, va_list args) {
pcmk__ticket_t *ticket = va_arg(args, pcmk__ticket_t *);
bool raw = va_arg(args, int);
bool details = va_arg(args, int);
GString *detail_str = NULL;
if (raw) {
out->list_item(out, ticket->id, "%s", ticket->id);
return pcmk_rc_ok;
}
if (details && g_hash_table_size(ticket->state) > 0) {
GHashTableIter iter;
const char *name = NULL;
const char *value = NULL;
bool already_added = false;
detail_str = g_string_sized_new(100);
pcmk__g_strcat(detail_str, "\t(", NULL);
g_hash_table_iter_init(&iter, ticket->state);
while (g_hash_table_iter_next(&iter, (void **) &name, (void **) &value)) {
if (already_added) {
g_string_append_printf(detail_str, ", %s=", name);
} else {
g_string_append_printf(detail_str, "%s=", name);
already_added = true;
}
if (pcmk__str_any_of(name, PCMK_XA_LAST_GRANTED, "expires", NULL)) {
char *epoch_str = NULL;
long long time_ll;
(void) pcmk__scan_ll(value, &time_ll, 0);
epoch_str = pcmk__epoch2str((const time_t *) &time_ll, 0);
pcmk__g_strcat(detail_str, epoch_str, NULL);
free(epoch_str);
} else {
pcmk__g_strcat(detail_str, value, NULL);
}
}
pcmk__g_strcat(detail_str, ")", NULL);
}
if (ticket->last_granted > -1) {
/* Prior to the introduction of the details & raw arguments to this
* function, last-granted would always be added in this block. We need
* to preserve that behavior. At the same time, we also need to preserve
* the existing behavior from crm_ticket, which would include last-granted
* as part of the (...) detail string.
*
* Luckily we can check detail_str - if it's NULL, either there were no
* details, or we are preserving the previous behavior of this function.
* If it's not NULL, we are either preserving the previous behavior of
* crm_ticket or we were given details=true as an argument.
*/
if (detail_str == NULL) {
char *epoch_str = pcmk__epoch2str(&(ticket->last_granted), 0);
out->list_item(out, NULL, "%s\t%s%s last-granted=\"%s\"",
ticket->id, ticket_status(ticket),
ticket_standby_text(ticket), pcmk__s(epoch_str, ""));
free(epoch_str);
} else {
out->list_item(out, NULL, "%s\t%s%s %s",
ticket->id, ticket_status(ticket),
ticket_standby_text(ticket), detail_str->str);
}
} else {
out->list_item(out, NULL, "%s\t%s%s%s", ticket->id,
ticket_status(ticket),
ticket_standby_text(ticket),
detail_str != NULL ? detail_str->str : "");
}
if (detail_str != NULL) {
g_string_free(detail_str, TRUE);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("ticket", "pcmk__ticket_t *", "bool", "bool")
static int
ticket_xml(pcmk__output_t *out, va_list args) {
pcmk__ticket_t *ticket = va_arg(args, pcmk__ticket_t *);
bool raw G_GNUC_UNUSED = va_arg(args, int);
bool details G_GNUC_UNUSED = va_arg(args, int);
const char *standby = pcmk__flag_text(ticket->flags, pcmk__ticket_standby);
xmlNodePtr node = NULL;
GHashTableIter iter;
const char *name = NULL;
const char *value = NULL;
node = pcmk__output_create_xml_node(out, PCMK_XE_TICKET,
PCMK_XA_ID, ticket->id,
PCMK_XA_STATUS, ticket_status(ticket),
PCMK_XA_STANDBY, standby,
NULL);
if (ticket->last_granted > -1) {
char *buf = pcmk__epoch2str(&ticket->last_granted, 0);
crm_xml_add(node, PCMK_XA_LAST_GRANTED, buf);
free(buf);
}
g_hash_table_iter_init(&iter, ticket->state);
while (g_hash_table_iter_next(&iter, (void **) &name, (void **) &value)) {
/* PCMK_XA_LAST_GRANTED and "expires" are already added by the check
* for ticket->last_granted above.
*/
if (pcmk__str_any_of(name, PCMK_XA_LAST_GRANTED, PCMK_XA_EXPIRES,
NULL)) {
continue;
}
crm_xml_add(node, name, value);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("ticket-list", "GHashTable *", "bool", "bool", "bool")
static int
ticket_list(pcmk__output_t *out, va_list args) {
GHashTable *tickets = va_arg(args, GHashTable *);
bool print_spacer = va_arg(args, int);
bool raw = va_arg(args, int);
bool details = va_arg(args, int);
GHashTableIter iter;
gpointer value;
if (g_hash_table_size(tickets) == 0) {
return pcmk_rc_no_output;
}
PCMK__OUTPUT_SPACER_IF(out, print_spacer);
/* Print section heading */
out->begin_list(out, NULL, NULL, "Tickets");
/* Print each ticket */
g_hash_table_iter_init(&iter, tickets);
while (g_hash_table_iter_next(&iter, NULL, &value)) {
pcmk__ticket_t *ticket = (pcmk__ticket_t *) value;
out->message(out, "ticket", ticket, raw, details);
}
/* Close section */
out->end_list(out);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "ban", "default", ban_text },
{ "ban", "html", ban_html },
{ "ban", "xml", ban_xml },
{ "ban-list", "default", ban_list },
{ "bundle", "default", pe__bundle_text },
{ "bundle", "xml", pe__bundle_xml },
{ "bundle", "html", pe__bundle_html },
{ "clone", "default", pe__clone_default },
{ "clone", "xml", pe__clone_xml },
{ "cluster-counts", "default", cluster_counts_text },
{ "cluster-counts", "html", cluster_counts_html },
{ "cluster-counts", "xml", cluster_counts_xml },
{ "cluster-dc", "default", cluster_dc_text },
{ "cluster-dc", "html", cluster_dc_html },
{ "cluster-dc", "xml", cluster_dc_xml },
{ "cluster-options", "default", cluster_options_text },
{ "cluster-options", "html", cluster_options_html },
{ "cluster-options", "log", cluster_options_log },
{ "cluster-options", "xml", cluster_options_xml },
{ "cluster-summary", "default", cluster_summary },
{ "cluster-summary", "html", cluster_summary_html },
{ "cluster-stack", "default", cluster_stack_text },
{ "cluster-stack", "html", cluster_stack_html },
{ "cluster-stack", "xml", cluster_stack_xml },
{ "cluster-times", "default", cluster_times_text },
{ "cluster-times", "html", cluster_times_html },
{ "cluster-times", "xml", cluster_times_xml },
{ "failed-action", "default", failed_action_default },
{ "failed-action", "xml", failed_action_xml },
{ "failed-action-list", "default", failed_action_list },
{ "group", "default", pe__group_default},
{ "group", "xml", pe__group_xml },
{ "maint-mode", "text", cluster_maint_mode_text },
{ "node", "default", node_text },
{ "node", "html", node_html },
{ "node", "xml", node_xml },
{ "node-and-op", "default", node_and_op },
{ "node-and-op", "xml", node_and_op_xml },
{ "node-capacity", "default", node_capacity },
{ "node-capacity", "xml", node_capacity_xml },
{ "node-history-list", "default", node_history_list },
{ "node-list", "default", node_list_text },
{ "node-list", "html", node_list_html },
{ "node-list", "xml", node_list_xml },
{ "node-weight", "default", node_weight },
{ "node-weight", "xml", node_weight_xml },
{ "node-attribute", "default", node_attribute_text },
{ "node-attribute", "html", node_attribute_html },
{ "node-attribute", "xml", node_attribute_xml },
{ "node-attribute-list", "default", node_attribute_list },
{ "node-summary", "default", node_summary },
{ "op-history", "default", op_history_text },
{ "op-history", "xml", op_history_xml },
{ "primitive", "default", pe__resource_text },
{ "primitive", "xml", pe__resource_xml },
{ "primitive", "html", pe__resource_html },
{ "promotion-score", "default", promotion_score },
{ "promotion-score", "xml", promotion_score_xml },
{ "resource-config", "default", resource_config },
{ "resource-config", "text", resource_config_text },
{ "resource-history", "default", resource_history_text },
{ "resource-history", "xml", resource_history_xml },
{ "resource-list", "default", resource_list },
{ "resource-operation-list", "default", resource_operation_list },
{ "resource-util", "default", resource_util },
{ "resource-util", "xml", resource_util_xml },
{ "ticket", "default", ticket_default },
{ "ticket", "xml", ticket_xml },
{ "ticket-list", "default", ticket_list },
{ NULL, NULL, NULL }
};
void
pe__register_messages(pcmk__output_t *out) {
pcmk__register_messages(out, fmt_functions);
}
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 0f0a5b5302..a58bde754b 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,5146 +1,5145 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
CRM_TRACE_INIT_DATA(pe_status);
// A (parsed) resource action history entry
struct action_history {
pcmk_resource_t *rsc; // Resource that history is for
pcmk_node_t *node; // Node that history is for
xmlNode *xml; // History entry XML
// Parsed from entry XML
const char *id; // XML ID of history entry
const char *key; // Operation key of action
const char *task; // Action name
const char *exit_reason; // Exit reason given for result
guint interval_ms; // Action interval
int call_id; // Call ID of action
int expected_exit_status; // Expected exit status of action
int exit_status; // Actual exit status of action
int execution_status; // Execution status of action
};
/* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
* use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
* flag is stringified more readably in log messages.
*/
#define set_config_flag(scheduler, option, flag) do { \
GHashTable *config_hash = (scheduler)->priv->options; \
const char *scf_value = pcmk__cluster_option(config_hash, (option)); \
\
if (scf_value != NULL) { \
if (crm_is_true(scf_value)) { \
(scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Scheduler", \
crm_system_name, (scheduler)->flags, \
(flag), #flag); \
} else { \
(scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Scheduler", \
crm_system_name, (scheduler)->flags, \
(flag), #flag); \
} \
} \
} while(0)
static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
xmlNode *xml_op, xmlNode **last_failure,
enum pcmk__on_fail *failed);
static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
pcmk_node_t *this_node);
static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
bool overwrite, pcmk_scheduler_t *scheduler);
static void determine_online_status(const xmlNode *node_state,
pcmk_node_t *this_node,
pcmk_scheduler_t *scheduler);
static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
pcmk_scheduler_t *scheduler);
/*!
* \internal
* \brief Check whether a node is a dangling guest node
*
* \param[in] node Node to check
*
* \return true if \p node had a Pacemaker Remote connection resource with a
* launcher that was removed from the CIB, otherwise false.
*/
static bool
is_dangling_guest_node(pcmk_node_t *node)
{
return pcmk__is_pacemaker_remote_node(node)
&& (node->priv->remote != NULL)
&& (node->priv->remote->priv->launcher == NULL)
&& pcmk_is_set(node->priv->remote->flags,
pcmk__rsc_removed_launched);
}
/*!
* \brief Schedule a fence action for a node
*
* \param[in,out] scheduler Scheduler data
* \param[in,out] node Node to fence
* \param[in] reason Text description of why fencing is needed
* \param[in] priority_delay Whether to consider
* \c PCMK_OPT_PRIORITY_FENCING_DELAY
*/
void
pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
const char *reason, bool priority_delay)
{
CRM_CHECK(node, return);
if (pcmk__is_guest_or_bundle_node(node)) {
// Fence a guest or bundle node by marking its launcher as failed
pcmk_resource_t *rsc = node->priv->remote->priv->launcher;
if (!pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
crm_notice("Not fencing guest node %s "
"(otherwise would because %s): "
"its guest resource %s is unmanaged",
pcmk__node_name(node), reason, rsc->id);
} else {
pcmk__sched_warn(scheduler,
"Guest node %s will be fenced "
"(by recovering its guest resource %s): %s",
pcmk__node_name(node), rsc->id, reason);
/* We don't mark the node as unclean because that would prevent the
* node from running resources. We want to allow it to run resources
* in this transition if the recovery succeeds.
*/
pcmk__set_node_flags(node, pcmk__node_remote_reset);
pcmk__set_rsc_flags(rsc,
pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
}
}
} else if (is_dangling_guest_node(node)) {
crm_info("Cleaning up dangling connection for guest node %s: "
"fencing was already done because %s, "
"and guest resource no longer exists",
pcmk__node_name(node), reason);
pcmk__set_rsc_flags(node->priv->remote,
pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
} else if (pcmk__is_remote_node(node)) {
pcmk_resource_t *rsc = node->priv->remote;
if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
crm_notice("Not fencing remote node %s "
"(otherwise would because %s): connection is unmanaged",
pcmk__node_name(node), reason);
} else if (!pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)) {
pcmk__set_node_flags(node, pcmk__node_remote_reset);
pcmk__sched_warn(scheduler, "Remote node %s %s: %s",
pcmk__node_name(node),
pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
reason);
}
node->details->unclean = TRUE;
// No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
} else if (node->details->unclean) {
crm_trace("Cluster node %s %s because %s",
pcmk__node_name(node),
pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
reason);
} else {
pcmk__sched_warn(scheduler, "Cluster node %s %s: %s",
pcmk__node_name(node),
pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
reason);
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
}
}
// @TODO xpaths can't handle templates, rules, or id-refs
// nvpair with provides or requires set to unfencing
#define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \
"[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \
"or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \
"and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
// unfencing in rsc_defaults or any resource
#define XPATH_ENABLE_UNFENCING \
"/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \
"//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \
"|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
"/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
static void
set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
{
xmlXPathObjectPtr result = NULL;
if (!pcmk_is_set(scheduler->flags, flag)) {
result = xpath_search(scheduler->input, xpath);
if (result && (numXpathResults(result) > 0)) {
pcmk__set_scheduler_flags(scheduler, flag);
}
freeXpathObject(result);
}
}
gboolean
unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
{
const char *value = NULL;
GHashTable *config_hash = pcmk__strkey_table(free, free);
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.now = scheduler->priv->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
scheduler->priv->options = config_hash;
pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET, &rule_data,
config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS,
scheduler);
pcmk__validate_cluster_options(config_hash);
set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES,
pcmk__sched_probe_resources);
if (!pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) {
crm_info("Startup probes: disabled (dangerous)");
}
value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
if (value && crm_is_true(value)) {
crm_info("Watchdog-based self-fencing will be performed via SBD if "
"fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
" is nonzero");
pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing);
}
/* Set certain flags via xpath here, so they can be used before the relevant
* configuration sections are unpacked.
*/
set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
scheduler);
value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
pcmk_parse_interval_spec(value, &(scheduler->priv->fence_timeout_ms));
crm_debug("Default fencing action timeout: %s",
pcmk__readable_interval(scheduler->priv->fence_timeout_ms));
set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED,
pcmk__sched_fencing_enabled);
if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
crm_debug("STONITH of failed nodes is enabled");
} else {
crm_debug("STONITH of failed nodes is disabled");
}
scheduler->priv->fence_action =
pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_ACTION);
if (!strcmp(scheduler->priv->fence_action, PCMK__ACTION_POWEROFF)) {
pcmk__warn_once(pcmk__wo_poweroff,
"Support for " PCMK_OPT_STONITH_ACTION " of "
"'" PCMK__ACTION_POWEROFF "' is deprecated and will be "
"removed in a future release "
"(use '" PCMK_ACTION_OFF "' instead)");
scheduler->priv->fence_action = PCMK_ACTION_OFF;
}
crm_trace("STONITH will %s nodes", scheduler->priv->fence_action);
set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING,
pcmk__sched_concurrent_fencing);
if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) {
crm_debug("Concurrent fencing is enabled");
} else {
crm_debug("Concurrent fencing is disabled");
}
value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
if (value) {
pcmk_parse_interval_spec(value,
&(scheduler->priv->priority_fencing_ms));
crm_trace("Priority fencing delay is %s",
pcmk__readable_interval(scheduler->priv->priority_fencing_ms));
}
set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
pcmk__sched_stop_all);
crm_debug("Stop all active resources: %s",
pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all));
set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
pcmk__sched_symmetric_cluster);
if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) {
crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
}
value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
} else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
} else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
scheduler->no_quorum_policy = pcmk_no_quorum_demote;
} else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
PCMK_VALUE_FENCE_LEGACY, NULL)) {
if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
int do_panic = 0;
crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
&do_panic);
if (do_panic
|| pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) {
scheduler->no_quorum_policy = pcmk_no_quorum_fence;
} else {
crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY
" to 'stop': cluster has never had quorum");
scheduler->no_quorum_policy = pcmk_no_quorum_stop;
}
} else {
pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
" to 'stop' because fencing is disabled");
scheduler->no_quorum_policy = pcmk_no_quorum_stop;
}
} else {
scheduler->no_quorum_policy = pcmk_no_quorum_stop;
}
switch (scheduler->no_quorum_policy) {
case pcmk_no_quorum_freeze:
crm_debug("On loss of quorum: Freeze resources");
break;
case pcmk_no_quorum_stop:
crm_debug("On loss of quorum: Stop ALL resources");
break;
case pcmk_no_quorum_demote:
crm_debug("On loss of quorum: "
"Demote promotable resources and stop other resources");
break;
case pcmk_no_quorum_fence:
crm_notice("On loss of quorum: Fence all remaining nodes");
break;
case pcmk_no_quorum_ignore:
crm_notice("On loss of quorum: Ignore");
break;
}
set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES,
pcmk__sched_stop_removed_resources);
if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
crm_trace("Orphan resources are stopped");
} else {
crm_trace("Orphan resources are ignored");
}
set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS,
pcmk__sched_cancel_removed_actions);
if (pcmk_is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) {
crm_trace("Orphan resource actions are stopped");
} else {
crm_trace("Orphan resource actions are ignored");
}
value = pcmk__cluster_option(config_hash, PCMK__OPT_REMOVE_AFTER_STOP);
if (value != NULL) {
if (crm_is_true(value)) {
pcmk__set_scheduler_flags(scheduler, pcmk__sched_remove_after_stop);
pcmk__warn_once(pcmk__wo_remove_after,
"Support for the " PCMK__OPT_REMOVE_AFTER_STOP
" cluster property is deprecated and will be "
"removed in a future release");
} else {
pcmk__clear_scheduler_flags(scheduler,
pcmk__sched_remove_after_stop);
}
}
set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
pcmk__sched_in_maintenance);
crm_trace("Maintenance mode: %s",
pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance));
set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
pcmk__sched_start_failure_fatal);
if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) {
crm_trace("Start failures are always fatal");
} else {
crm_trace("Start failures are handled by failcount");
}
if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
pcmk__sched_startup_fencing);
}
if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
crm_trace("Unseen nodes will be fenced");
} else {
pcmk__warn_once(pcmk__wo_blind,
"Blind faith: not fencing unseen nodes");
}
pe__unpack_node_health_scores(scheduler);
scheduler->priv->placement_strategy =
pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
crm_trace("Placement strategy: %s", scheduler->priv->placement_strategy);
set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
pcmk__sched_shutdown_lock);
if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
pcmk_parse_interval_spec(value, &(scheduler->priv->shutdown_lock_ms));
crm_trace("Resources will be locked to nodes that were cleanly "
"shut down (locks expire after %s)",
pcmk__readable_interval(scheduler->priv->shutdown_lock_ms));
} else {
crm_trace("Resources will not be locked to nodes that were cleanly "
"shut down");
}
value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
pcmk_parse_interval_spec(value, &(scheduler->priv->node_pending_ms));
if (scheduler->priv->node_pending_ms == 0U) {
crm_trace("Do not fence pending nodes");
} else {
crm_trace("Fence pending nodes after %s",
pcmk__readable_interval(scheduler->priv->node_pending_ms));
}
return TRUE;
}
/*!
* \internal
* \brief Create a new node object in scheduler data
*
* \param[in] id ID of new node
* \param[in] uname Name of new node
* \param[in] type Type of new node
* \param[in] score Score of new node
* \param[in,out] scheduler Scheduler data
*
* \return Newly created node object
* \note The returned object is part of the scheduler data and should not be
* freed separately.
*/
pcmk_node_t *
pe_create_node(const char *id, const char *uname, const char *type,
int score, pcmk_scheduler_t *scheduler)
{
enum pcmk__node_variant variant = pcmk__node_variant_cluster;
pcmk_node_t *new_node = NULL;
if (pcmk_find_node(scheduler, uname) != NULL) {
pcmk__config_warn("More than one node entry has name '%s'", uname);
}
if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
pcmk__str_null_matches|pcmk__str_casei)) {
variant = pcmk__node_variant_cluster;
} else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
variant = pcmk__node_variant_remote;
} else {
pcmk__config_err("Ignoring node %s with unrecognized type '%s'",
pcmk__s(uname, "without name"), type);
return NULL;
}
new_node = calloc(1, sizeof(pcmk_node_t));
if (new_node == NULL) {
pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
uname);
return NULL;
}
new_node->assign = calloc(1, sizeof(struct pcmk__node_assignment));
new_node->details = calloc(1, sizeof(struct pcmk__node_details));
new_node->priv = calloc(1, sizeof(pcmk__node_private_t));
if ((new_node->assign == NULL) || (new_node->details == NULL)
|| (new_node->priv == NULL)) {
free(new_node->assign);
free(new_node->details);
free(new_node->priv);
free(new_node);
pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
uname);
return NULL;
}
crm_trace("Creating node for entry %s/%s", uname, id);
new_node->assign->score = score;
new_node->priv->id = id;
new_node->priv->name = uname;
new_node->priv->flags = pcmk__node_probes_allowed;
new_node->details->online = FALSE;
new_node->details->shutdown = FALSE;
new_node->details->running_rsc = NULL;
new_node->priv->scheduler = scheduler;
new_node->priv->variant = variant;
new_node->priv->attrs = pcmk__strkey_table(free, free);
new_node->priv->utilization = pcmk__strkey_table(free, free);
new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests);
if (pcmk__is_pacemaker_remote_node(new_node)) {
pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote");
pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes);
} else {
pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster");
}
scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
pe__cmp_node_name);
return new_node;
}
static const char *
expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
{
xmlNode *attr_set = NULL;
xmlNode *attr = NULL;
const char *container_id = pcmk__xe_id(xml_obj);
const char *remote_name = NULL;
const char *remote_server = NULL;
const char *remote_port = NULL;
const char *connect_timeout = "60s";
const char *remote_allow_migrate=NULL;
const char *is_managed = NULL;
for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) {
continue;
}
for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
attr != NULL; attr = pcmk__xe_next(attr)) {
const char *value = crm_element_value(attr, PCMK_XA_VALUE);
const char *name = crm_element_value(attr, PCMK_XA_NAME);
if (name == NULL) { // Sanity
continue;
}
if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
remote_name = value;
} else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
remote_server = value;
} else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
remote_port = value;
} else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
connect_timeout = value;
} else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
remote_allow_migrate = value;
} else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
is_managed = value;
}
}
}
if (remote_name == NULL) {
return NULL;
}
if (pe_find_resource(data->priv->resources, remote_name) != NULL) {
return NULL;
}
pe_create_remote_xml(parent, remote_name, container_id,
remote_allow_migrate, is_managed,
connect_timeout, remote_server, remote_port);
return remote_name;
}
static void
handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
{
if ((new_node->priv->variant == pcmk__node_variant_remote)
&& (new_node->priv->remote == NULL)) {
/* Ignore fencing for remote nodes that don't have a connection resource
* associated with them. This happens when remote node entries get left
* in the nodes section after the connection resource is removed.
*/
return;
}
if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
// All nodes are unclean until we've seen their status entry
new_node->details->unclean = TRUE;
} else {
// Blind faith ...
new_node->details->unclean = FALSE;
}
}
gboolean
unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_obj = NULL;
pcmk_node_t *new_node = NULL;
const char *id = NULL;
const char *uname = NULL;
const char *type = NULL;
for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL);
xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) {
int score = 0;
int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
new_node = NULL;
id = crm_element_value(xml_obj, PCMK_XA_ID);
uname = crm_element_value(xml_obj, PCMK_XA_UNAME);
type = crm_element_value(xml_obj, PCMK_XA_TYPE);
crm_trace("Processing node %s/%s", uname, id);
if (id == NULL) {
pcmk__config_err("Ignoring <" PCMK_XE_NODE
"> entry in configuration without id");
continue;
}
if (rc != pcmk_rc_ok) {
// Not possible with schema validation enabled
pcmk__config_warn("Using 0 as score for node %s "
"because '%s' is not a valid score: %s",
pcmk__s(uname, "without name"),
crm_element_value(xml_obj, PCMK_XA_SCORE),
pcmk_rc_str(rc));
}
new_node = pe_create_node(id, uname, type, score, scheduler);
if (new_node == NULL) {
return FALSE;
}
handle_startup_fencing(scheduler, new_node);
add_node_attrs(xml_obj, new_node, FALSE, scheduler);
crm_trace("Done with node %s",
crm_element_value(xml_obj, PCMK_XA_UNAME));
}
}
return TRUE;
}
static void
unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
{
const char *launcher_id = NULL;
if (rsc->priv->children != NULL) {
g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher,
scheduler);
return;
}
launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER);
if ((launcher_id != NULL)
&& !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) {
pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources,
launcher_id);
if (launcher != NULL) {
rsc->priv->launcher = launcher;
launcher->priv->launched =
g_list_append(launcher->priv->launched, rsc);
pcmk__rsc_trace(rsc, "Resource %s's launcher is %s",
rsc->id, launcher_id);
} else {
pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s",
rsc->id, launcher_id);
}
}
}
gboolean
unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_obj = NULL;
/* Create remote nodes and guest nodes from the resource configuration
* before unpacking resources.
*/
for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
const char *new_node_id = NULL;
/* Check for remote nodes, which are defined by ocf:pacemaker:remote
* primitives.
*/
if (xml_contains_remote_node(xml_obj)) {
new_node_id = pcmk__xe_id(xml_obj);
/* The pcmk_find_node() check ensures we don't iterate over an
* expanded node that has already been added to the node list
*/
if (new_node_id
&& (pcmk_find_node(scheduler, new_node_id) == NULL)) {
crm_trace("Found remote node %s defined by resource %s",
new_node_id, pcmk__xe_id(xml_obj));
pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
0, scheduler);
}
continue;
}
/* Check for guest nodes, which are defined by special meta-attributes
* of a primitive of any type (for example, VirtualDomain or Xen).
*/
if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
/* This will add an ocf:pacemaker:remote primitive to the
* configuration for the guest node's connection, to be unpacked
* later.
*/
new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
scheduler);
if (new_node_id
&& (pcmk_find_node(scheduler, new_node_id) == NULL)) {
crm_trace("Found guest node %s in resource %s",
new_node_id, pcmk__xe_id(xml_obj));
pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
0, scheduler);
}
continue;
}
/* Check for guest nodes inside a group. Clones are currently not
* supported as guest nodes.
*/
if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
xmlNode *xml_obj2 = NULL;
for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
scheduler);
if (new_node_id
&& (pcmk_find_node(scheduler, new_node_id) == NULL)) {
crm_trace("Found guest node %s in resource %s inside group %s",
new_node_id, pcmk__xe_id(xml_obj2),
pcmk__xe_id(xml_obj));
pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
0, scheduler);
}
}
}
}
return TRUE;
}
/* Call this after all the nodes and resources have been
* unpacked, but before the status section is read.
*
* A remote node's online status is reflected by the state
* of the remote node's connection resource. We need to link
* the remote node to this connection resource so we can have
* easy access to the connection resource during the scheduler calculations.
*/
static void
link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
{
pcmk_node_t *remote_node = NULL;
if (!pcmk_is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) {
return;
}
if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
/* remote_nodes and remote_resources are not linked in quick location calculations */
return;
}
remote_node = pcmk_find_node(scheduler, new_rsc->id);
CRM_CHECK(remote_node != NULL, return);
pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
new_rsc->id, pcmk__node_name(remote_node));
remote_node->priv->remote = new_rsc;
if (new_rsc->priv->launcher == NULL) {
/* Handle start-up fencing for remote nodes (as opposed to guest nodes)
* the same as is done for cluster nodes.
*/
handle_startup_fencing(scheduler, remote_node);
} else {
/* pe_create_node() marks the new node as "remote" or "cluster"; now
* that we know the node is a guest node, update it correctly.
*/
pcmk__insert_dup(remote_node->priv->attrs,
CRM_ATTR_KIND, "container");
}
}
/*!
* \internal
* \brief Parse configuration XML for resource information
*
* \param[in] xml_resources Top of resource configuration XML
* \param[in,out] scheduler Scheduler data
*
* \return TRUE
*
* \note unpack_remote_nodes() MUST be called before this, so that the nodes can
* be used when pe__unpack_resource() calls resource_location()
*/
gboolean
unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_obj = NULL;
GList *gIter = NULL;
scheduler->priv->templates = pcmk__strkey_table(free, pcmk__free_idref);
for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
pcmk_resource_t *new_rsc = NULL;
const char *id = pcmk__xe_id(xml_obj);
if (pcmk__str_empty(id)) {
pcmk__config_err("Ignoring <%s> resource without ID",
xml_obj->name);
continue;
}
if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
if (g_hash_table_lookup_extended(scheduler->priv->templates, id,
NULL, NULL) == FALSE) {
/* Record the template's ID for the knowledge of its existence anyway. */
pcmk__insert_dup(scheduler->priv->templates, id, NULL);
}
continue;
}
crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
scheduler) == pcmk_rc_ok) {
scheduler->priv->resources =
g_list_append(scheduler->priv->resources, new_rsc);
pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
} else {
pcmk__config_err("Ignoring <%s> resource '%s' "
"because configuration is invalid",
xml_obj->name, id);
}
}
for (gIter = scheduler->priv->resources;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
unpack_launcher(rsc, scheduler);
link_rsc2remotenode(scheduler, rsc);
}
scheduler->priv->resources = g_list_sort(scheduler->priv->resources,
pe__cmp_rsc_priority);
if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
/* Ignore */
} else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)
&& !pcmk_is_set(scheduler->flags, pcmk__sched_have_fencing)) {
pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
pcmk__config_err("Either configure some or disable STONITH with the "
PCMK_OPT_STONITH_ENABLED " option");
pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
}
return TRUE;
}
/*!
* \internal
* \brief Parse configuration XML for fencing topology information
*
* \param[in] xml_fencing_topology Top of fencing topology configuration XML
* \param[in,out] scheduler Scheduler data
*
* \return void
*/
void
pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_obj = NULL;
int id = 0;
for (xml_obj = pcmk__xe_first_child(xml_fencing_topology, PCMK_XE_FENCING_LEVEL, NULL, NULL);
xml_obj != NULL; xml_obj = pcmk__xe_next_same(xml_obj)) {
crm_element_value_int(xml_obj, PCMK_XA_INDEX, &id);
// Ensure an ID was given
if (pcmk__str_empty(pcmk__xe_id(xml_obj))) {
pcmk__config_warn("Ignoring registration for topology level without ID");
continue;
}
// Ensure level ID is in allowed range
if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
pcmk__config_warn("Ignoring topology registration with invalid level %d",
id);
continue;
}
}
}
gboolean
unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_tag = NULL;
scheduler->priv->tags = pcmk__strkey_table(free, pcmk__free_idref);
for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL);
xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
xmlNode *xml_obj_ref = NULL;
const char *tag_id = pcmk__xe_id(xml_tag);
if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) {
continue;
}
if (tag_id == NULL) {
pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
(const char *) xml_tag->name);
continue;
}
for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL);
xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
const char *obj_ref = pcmk__xe_id(xml_obj_ref);
if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) {
continue;
}
if (obj_ref == NULL) {
pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
xml_obj_ref->name, tag_id);
continue;
}
pcmk__add_idref(scheduler->priv->tags, tag_id, obj_ref);
}
}
return TRUE;
}
/* The ticket state section:
* "/cib/status/tickets/ticket_state" */
static gboolean
unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
{
const char *ticket_id = NULL;
const char *granted = NULL;
const char *last_granted = NULL;
const char *standby = NULL;
xmlAttrPtr xIter = NULL;
pcmk__ticket_t *ticket = NULL;
ticket_id = pcmk__xe_id(xml_ticket);
if (pcmk__str_empty(ticket_id)) {
return FALSE;
}
crm_trace("Processing ticket state for %s", ticket_id);
ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints,
ticket_id);
if (ticket == NULL) {
ticket = ticket_new(ticket_id, scheduler);
if (ticket == NULL) {
return FALSE;
}
}
for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = pcmk__xml_attr_value(xIter);
if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
continue;
}
pcmk__insert_dup(ticket->state, prop_name, prop_value);
}
granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
if (granted && crm_is_true(granted)) {
pcmk__set_ticket_flags(ticket, pcmk__ticket_granted);
crm_info("We have ticket '%s'", ticket->id);
} else {
pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted);
crm_info("We do not have ticket '%s'", ticket->id);
}
last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
if (last_granted) {
long long last_granted_ll = 0LL;
int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
if (rc != pcmk_rc_ok) {
crm_warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
" value '%s' in state for ticket %s: %s",
last_granted_ll, last_granted, ticket->id,
pcmk_rc_str(rc));
}
ticket->last_granted = (time_t) last_granted_ll;
}
standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
if (standby && crm_is_true(standby)) {
pcmk__set_ticket_flags(ticket, pcmk__ticket_standby);
if (pcmk_is_set(ticket->flags, pcmk__ticket_granted)) {
crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
}
} else {
pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby);
}
crm_trace("Done with ticket state for %s", ticket_id);
return TRUE;
}
static gboolean
unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_obj = NULL;
for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL);
xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) {
continue;
}
unpack_ticket_state(xml_obj, scheduler);
}
return TRUE;
}
static void
unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
pcmk_scheduler_t *scheduler)
{
const char *discovery = NULL;
const xmlNode *attrs = NULL;
pcmk_resource_t *rsc = NULL;
int maint = 0;
if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
return;
}
if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
return;
}
crm_trace("Processing Pacemaker Remote node %s",
pcmk__node_name(this_node));
pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_IN_MAINTENANCE),
&maint, 0);
if (maint) {
pcmk__set_node_flags(this_node, pcmk__node_remote_maint);
} else {
pcmk__clear_node_flags(this_node, pcmk__node_remote_maint);
}
rsc = this_node->priv->remote;
if (!pcmk_is_set(this_node->priv->flags, pcmk__node_remote_reset)) {
this_node->details->unclean = FALSE;
pcmk__set_node_flags(this_node, pcmk__node_seen);
}
attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
NULL);
add_node_attrs(attrs, this_node, TRUE, scheduler);
if (pe__shutdown_requested(this_node)) {
crm_info("%s is shutting down", pcmk__node_name(this_node));
this_node->details->shutdown = TRUE;
}
if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
pcmk__rsc_node_current))) {
crm_info("%s is in standby mode", pcmk__node_name(this_node));
pcmk__set_node_flags(this_node, pcmk__node_standby);
}
if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
pcmk__rsc_node_current))
|| ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed))) {
crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
this_node->details->maintenance = TRUE;
}
discovery = pcmk__node_attr(this_node,
PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
NULL, pcmk__rsc_node_current);
if ((discovery != NULL) && !crm_is_true(discovery)) {
pcmk__warn_once(pcmk__wo_rdisc_enabled,
"Support for the "
PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
" node attribute is deprecated and will be removed"
" (and behave as 'true') in a future release.");
if (pcmk__is_remote_node(this_node)
&& !pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
pcmk__config_warn("Ignoring "
PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
" attribute on Pacemaker Remote node %s"
" because fencing is disabled",
pcmk__node_name(this_node));
} else {
/* This is either a remote node with fencing enabled, or a guest
* node. We don't care whether fencing is enabled when fencing guest
* nodes, because they are "fenced" by recovering their containing
* resource.
*/
crm_info("%s has resource discovery disabled",
pcmk__node_name(this_node));
pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed);
}
}
}
/*!
* \internal
* \brief Unpack a cluster node's transient attributes
*
* \param[in] state CIB node state XML
* \param[in,out] node Cluster node whose attributes are being unpacked
* \param[in,out] scheduler Scheduler data
*/
static void
unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
pcmk_scheduler_t *scheduler)
{
const char *discovery = NULL;
const xmlNode *attrs = pcmk__xe_first_child(state,
PCMK__XE_TRANSIENT_ATTRIBUTES,
NULL, NULL);
add_node_attrs(attrs, node, TRUE, scheduler);
if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
pcmk__rsc_node_current))) {
crm_info("%s is in standby mode", pcmk__node_name(node));
pcmk__set_node_flags(node, pcmk__node_standby);
}
if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
pcmk__rsc_node_current))) {
crm_info("%s is in maintenance mode", pcmk__node_name(node));
node->details->maintenance = TRUE;
}
discovery = pcmk__node_attr(node,
PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
NULL, pcmk__rsc_node_current);
if ((discovery != NULL) && !crm_is_true(discovery)) {
pcmk__config_warn("Ignoring "
PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
" attribute for %s because disabling resource"
" discovery is not allowed for cluster nodes",
pcmk__node_name(node));
}
}
/*!
* \internal
* \brief Unpack a node state entry (first pass)
*
* Unpack one node state entry from status. This unpacks information from the
* \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
* the resource history inside it. Multiple passes through the status are needed
* to fully unpack everything.
*
* \param[in] state CIB node state XML
* \param[in,out] scheduler Scheduler data
*/
static void
unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
{
const char *id = NULL;
const char *uname = NULL;
pcmk_node_t *this_node = NULL;
id = crm_element_value(state, PCMK_XA_ID);
if (id == NULL) {
pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
PCMK_XA_ID);
crm_log_xml_info(state, "missing-id");
return;
}
uname = crm_element_value(state, PCMK_XA_UNAME);
if (uname == NULL) {
/* If a joining peer makes the cluster acquire the quorum from Corosync
* but has not joined the controller CPG membership yet, it's possible
* that the created PCMK__XE_NODE_STATE entry doesn't have a
* PCMK_XA_UNAME yet. Recognize the node as pending and wait for it to
* join CPG.
*/
crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
"without " PCMK_XA_UNAME,
id);
}
this_node = pe_find_node_any(scheduler->nodes, id, uname);
if (this_node == NULL) {
crm_notice("Ignoring recorded state for removed node with name %s and "
PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
return;
}
if (pcmk__is_pacemaker_remote_node(this_node)) {
int remote_fenced = 0;
/* We can't determine the online status of Pacemaker Remote nodes until
* after all resource history has been unpacked. In this first pass, we
* do need to mark whether the node has been fenced, as this plays a
* role during unpacking cluster node resource state.
*/
pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_FENCED),
&remote_fenced, 0);
if (remote_fenced) {
pcmk__set_node_flags(this_node, pcmk__node_remote_fenced);
} else {
pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced);
}
return;
}
unpack_transient_attributes(state, this_node, scheduler);
/* Provisionally mark this cluster node as clean. We have at least seen it
* in the current cluster's lifetime.
*/
this_node->details->unclean = FALSE;
pcmk__set_node_flags(this_node, pcmk__node_seen);
crm_trace("Determining online status of cluster node %s (id %s)",
pcmk__node_name(this_node), id);
determine_online_status(state, this_node, scheduler);
if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate)
&& this_node->details->online
&& (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
/* Everything else should flow from this automatically
* (at least until the scheduler becomes able to migrate off
* healthy resources)
*/
pe_fence_node(scheduler, this_node, "cluster does not have quorum",
FALSE);
}
}
/*!
* \internal
* \brief Unpack nodes' resource history as much as possible
*
* Unpack as many nodes' resource history as possible in one pass through the
* status. We need to process Pacemaker Remote nodes' connections/containers
* before unpacking their history; the connection/container history will be
* in another node's history, so it might take multiple passes to unpack
* everything.
*
* \param[in] status CIB XML status section
* \param[in] fence If true, treat any not-yet-unpacked nodes as unseen
* \param[in,out] scheduler Scheduler data
*
* \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
* or EAGAIN if more unpacking remains to be done)
*/
static int
unpack_node_history(const xmlNode *status, bool fence,
pcmk_scheduler_t *scheduler)
{
int rc = pcmk_rc_ok;
// Loop through all PCMK__XE_NODE_STATE entries in CIB status
for (const xmlNode *state = pcmk__xe_first_child(status,
PCMK__XE_NODE_STATE, NULL,
NULL);
state != NULL; state = pcmk__xe_next_same(state)) {
const char *id = pcmk__xe_id(state);
const char *uname = crm_element_value(state, PCMK_XA_UNAME);
pcmk_node_t *this_node = NULL;
if ((id == NULL) || (uname == NULL)) {
// Warning already logged in first pass through status section
crm_trace("Not unpacking resource history from malformed "
PCMK__XE_NODE_STATE " without id and/or uname");
continue;
}
this_node = pe_find_node_any(scheduler->nodes, id, uname);
if (this_node == NULL) {
// Warning already logged in first pass through status section
crm_trace("Not unpacking resource history for node %s because "
"no longer in configuration", id);
continue;
}
if (pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) {
crm_trace("Not unpacking resource history for node %s because "
"already unpacked", id);
continue;
}
if (fence) {
// We're processing all remaining nodes
} else if (pcmk__is_guest_or_bundle_node(this_node)) {
/* We can unpack a guest node's history only after we've unpacked
* other resource history to the point that we know that the node's
* connection and containing resource are both up.
*/
const pcmk_resource_t *remote = this_node->priv->remote;
const pcmk_resource_t *launcher = remote->priv->launcher;
if ((remote->priv->orig_role != pcmk_role_started)
|| (launcher->priv->orig_role != pcmk_role_started)) {
crm_trace("Not unpacking resource history for guest node %s "
"because launcher and connection are not known to "
"be up", id);
continue;
}
} else if (pcmk__is_remote_node(this_node)) {
/* We can unpack a remote node's history only after we've unpacked
* other resource history to the point that we know that the node's
* connection is up, with the exception of when shutdown locks are
* in use.
*/
pcmk_resource_t *rsc = this_node->priv->remote;
if ((rsc == NULL)
|| (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)
&& (rsc->priv->orig_role != pcmk_role_started))) {
crm_trace("Not unpacking resource history for remote node %s "
"because connection is not known to be up", id);
continue;
}
/* If fencing and shutdown locks are disabled and we're not processing
* unseen nodes, then we don't want to unpack offline nodes until online
* nodes have been unpacked. This allows us to number active clone
* instances first.
*/
} else if (!pcmk_any_flags_set(scheduler->flags,
pcmk__sched_fencing_enabled
|pcmk__sched_shutdown_lock)
&& !this_node->details->online) {
crm_trace("Not unpacking resource history for offline "
"cluster node %s", id);
continue;
}
if (pcmk__is_pacemaker_remote_node(this_node)) {
determine_remote_online_status(scheduler, this_node);
unpack_handle_remote_attrs(this_node, state, scheduler);
}
crm_trace("Unpacking resource history for %snode %s",
(fence? "unseen " : ""), id);
pcmk__set_node_flags(this_node, pcmk__node_unpacked);
unpack_node_lrm(this_node, state, scheduler);
rc = EAGAIN; // Other node histories might depend on this one
}
return rc;
}
/* remove nodes that are down, stopping */
/* create positive rsc_to_node constraints between resources and the nodes they are running on */
/* anything else? */
gboolean
unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
{
xmlNode *state = NULL;
crm_trace("Beginning unpack");
if (scheduler->priv->ticket_constraints == NULL) {
scheduler->priv->ticket_constraints =
pcmk__strkey_table(free, destroy_ticket);
}
for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
state = pcmk__xe_next(state)) {
if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
unpack_tickets_state((xmlNode *) state, scheduler);
} else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
unpack_node_state(state, scheduler);
}
}
while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
crm_trace("Another pass through node resource histories is needed");
}
// Now catch any nodes we didn't see
unpack_node_history(status,
pcmk_is_set(scheduler->flags,
pcmk__sched_fencing_enabled),
scheduler);
/* Now that we know where resources are, we can schedule stops of containers
* with failed bundle connections
*/
if (scheduler->priv->stop_needed != NULL) {
for (GList *item = scheduler->priv->stop_needed;
item != NULL; item = item->next) {
pcmk_resource_t *container = item->data;
pcmk_node_t *node = pcmk__current_node(container);
if (node) {
stop_action(container, node, FALSE);
}
}
g_list_free(scheduler->priv->stop_needed);
scheduler->priv->stop_needed = NULL;
}
/* Now that we know status of all Pacemaker Remote connections and nodes,
* we can stop connections for node shutdowns, and check the online status
* of remote/guest nodes that didn't have any node history to unpack.
*/
for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *this_node = gIter->data;
if (!pcmk__is_pacemaker_remote_node(this_node)) {
continue;
}
if (this_node->details->shutdown
&& (this_node->priv->remote != NULL)) {
pe__set_next_role(this_node->priv->remote, pcmk_role_stopped,
"remote shutdown");
}
if (!pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) {
determine_remote_online_status(scheduler, this_node);
}
}
return TRUE;
}
/*!
* \internal
* \brief Unpack node's time when it became a member at the cluster layer
*
* \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry
* \param[in,out] scheduler Scheduler data
*
* \return Epoch time when node became a cluster member
* (or scheduler effective time for legacy entries) if a member,
* 0 if not a member, or -1 if no valid information available
*/
static long long
unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
{
const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
int member = 0;
if (member_time == NULL) {
return -1LL;
} else if (crm_str_to_boolean(member_time, &member) == 1) {
/* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
* recorded as a boolean for a DC < 2.1.7, or the node is pending
* shutdown and has left the CPG, in which case it was set to 1 to avoid
* fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
*
* We return the effective time for in_ccm=1 because what's important to
* avoid fencing is that effective time minus this value is less than
* the pending node timeout.
*/
return member? (long long) get_effective_time(scheduler) : 0LL;
} else {
long long when_member = 0LL;
if ((pcmk__scan_ll(member_time, &when_member,
0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
" in " PCMK__XE_NODE_STATE " entry", member_time);
return -1LL;
}
return when_member;
}
}
/*!
* \internal
* \brief Unpack node's time when it became online in process group
*
* \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry
*
* \return Epoch time when node became online in process group (or 0 if not
* online, or 1 for legacy online entries)
*/
static long long
unpack_node_online(const xmlNode *node_state)
{
const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
// @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
pcmk__str_casei|pcmk__str_null_matches)) {
return 0LL;
} else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
return 1LL;
} else {
long long when_online = 0LL;
if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
|| (when_online < 0)) {
crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
return 0LL;
}
return when_online;
}
}
/*!
* \internal
* \brief Unpack node attribute for user-requested fencing
*
* \param[in] node Node to check
* \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry in CIB status
*
* \return \c true if fencing has been requested for \p node, otherwise \c false
*/
static bool
unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
{
long long value = 0LL;
int value_i = 0;
int rc = pcmk_rc_ok;
const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
NULL, pcmk__rsc_node_current);
// Value may be boolean or an epoch time
if (crm_str_to_boolean(value_s, &value_i) == 1) {
return (value_i != 0);
}
rc = pcmk__scan_ll(value_s, &value, 0LL);
if (rc == pcmk_rc_ok) {
return (value > 0);
}
crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
"node attribute for %s: %s",
value_s, pcmk__node_name(node), pcmk_rc_str(rc));
return false;
}
static gboolean
determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
const xmlNode *node_state,
pcmk_node_t *this_node)
{
gboolean online = FALSE;
const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
long long when_member = unpack_node_member(node_state, scheduler);
long long when_online = unpack_node_online(node_state);
if (when_member <= 0) {
crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
((when_member < 0)? "presumed " : ""));
} else if (when_online > 0) {
if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
online = TRUE;
} else {
crm_debug("Node %s is not ready to run resources: %s",
pcmk__node_name(this_node), join);
}
} else if (!pcmk_is_set(this_node->priv->flags,
pcmk__node_expected_up)) {
crm_trace("Node %s controller is down: "
"member@%lld online@%lld join=%s expected=%s",
pcmk__node_name(this_node), when_member, when_online,
pcmk__s(join, ""), pcmk__s(exp_state, ""));
} else {
/* mark it unclean */
pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
pcmk__node_name(this_node), when_member, when_online,
pcmk__s(join, ""), pcmk__s(exp_state, ""));
}
return online;
}
/*!
* \internal
* \brief Check whether a node has taken too long to join controller group
*
* \param[in,out] scheduler Scheduler data
* \param[in] node Node to check
* \param[in] when_member Epoch time when node became a cluster member
* \param[in] when_online Epoch time when node joined controller group
*
* \return true if node has been pending (on the way up) longer than
* \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
* \note This will also update the cluster's recheck time if appropriate.
*/
static inline bool
pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
long long when_member, long long when_online)
{
if ((scheduler->priv->node_pending_ms > 0U)
&& (when_member > 0) && (when_online <= 0)) {
// There is a timeout on pending nodes, and node is pending
time_t timeout = when_member
+ (scheduler->priv->node_pending_ms / 1000U);
if (get_effective_time(node->priv->scheduler) >= timeout) {
return true; // Node has timed out
}
// Node is pending, but still has time
pe__update_recheck_time(timeout, scheduler, "pending node timeout");
}
return false;
}
static bool
determine_online_status_fencing(pcmk_scheduler_t *scheduler,
const xmlNode *node_state,
pcmk_node_t *this_node)
{
bool termination_requested = unpack_node_terminate(this_node, node_state);
const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
long long when_member = unpack_node_member(node_state, scheduler);
long long when_online = unpack_node_online(node_state);
/*
- PCMK__XA_JOIN ::= member|down|pending|banned
- PCMK_XA_EXPECTED ::= member|down
@COMPAT with entries recorded for DCs < 2.1.7
- PCMK__XA_IN_CCM ::= true|false
- PCMK_XA_CRMD ::= online|offline
Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
- PCMK__XA_IN_CCM ::= |0
Since when node has been a cluster member. A value 0 of means the node is not
a cluster member.
- PCMK_XA_CRMD ::= |0
Since when peer has been online in CPG. A value 0 means the peer is offline
in CPG.
*/
crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
pcmk__node_name(this_node), when_member, when_online,
pcmk__s(join, ""), pcmk__s(exp_state, ""),
(termination_requested? " (termination requested)" : ""));
if (this_node->details->shutdown) {
crm_debug("%s is shutting down", pcmk__node_name(this_node));
/* Slightly different criteria since we can't shut down a dead peer */
return (when_online > 0);
}
if (when_member < 0) {
pe_fence_node(scheduler, this_node,
"peer has not been seen by the cluster", FALSE);
return false;
}
if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
pe_fence_node(scheduler, this_node,
"peer failed Pacemaker membership criteria", FALSE);
} else if (termination_requested) {
if ((when_member <= 0) && (when_online <= 0)
&& pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
crm_info("%s was fenced as requested", pcmk__node_name(this_node));
return false;
}
pe_fence_node(scheduler, this_node, "fencing was requested", false);
} else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
pcmk__str_null_matches)) {
if (pending_too_long(scheduler, this_node, when_member, when_online)) {
pe_fence_node(scheduler, this_node,
"peer pending timed out on joining the process group",
FALSE);
} else if ((when_member > 0) || (when_online > 0)) {
crm_info("- %s is not ready to run resources",
pcmk__node_name(this_node));
pcmk__set_node_flags(this_node, pcmk__node_standby);
this_node->details->pending = TRUE;
} else {
crm_trace("%s is down or still coming up",
pcmk__node_name(this_node));
}
} else if (when_member <= 0) {
// Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
pe_fence_node(scheduler, this_node,
"peer is no longer part of the cluster", TRUE);
} else if (when_online <= 0) {
pe_fence_node(scheduler, this_node,
"peer process is no longer available", FALSE);
/* Everything is running at this point, now check join state */
} else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
crm_info("%s is active", pcmk__node_name(this_node));
} else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
CRMD_JOINSTATE_DOWN, NULL)) {
crm_info("%s is not ready to run resources",
pcmk__node_name(this_node));
pcmk__set_node_flags(this_node, pcmk__node_standby);
this_node->details->pending = TRUE;
} else {
pe_fence_node(scheduler, this_node, "peer was in an unknown state",
FALSE);
}
return (when_member > 0);
}
static void
determine_remote_online_status(pcmk_scheduler_t *scheduler,
pcmk_node_t *this_node)
{
pcmk_resource_t *rsc = this_node->priv->remote;
pcmk_resource_t *launcher = NULL;
pcmk_node_t *host = NULL;
const char *node_type = "Remote";
if (rsc == NULL) {
/* This is a leftover node state entry for a former Pacemaker Remote
* node whose connection resource was removed. Consider it offline.
*/
crm_trace("Pacemaker Remote node %s is considered OFFLINE because "
"its connection resource has been removed from the CIB",
this_node->priv->id);
this_node->details->online = FALSE;
return;
}
launcher = rsc->priv->launcher;
if (launcher != NULL) {
node_type = "Guest";
if (pcmk__list_of_1(rsc->priv->active_nodes)) {
host = rsc->priv->active_nodes->data;
}
}
/* If the resource is currently started, mark it online. */
if (rsc->priv->orig_role == pcmk_role_started) {
this_node->details->online = TRUE;
}
/* consider this node shutting down if transitioning start->stop */
if ((rsc->priv->orig_role == pcmk_role_started)
&& (rsc->priv->next_role == pcmk_role_stopped)) {
crm_trace("%s node %s shutting down because connection resource is stopping",
node_type, this_node->priv->id);
this_node->details->shutdown = TRUE;
}
/* Now check all the failure conditions. */
if ((launcher != NULL) && pcmk_is_set(launcher->flags, pcmk__rsc_failed)) {
crm_trace("Guest node %s UNCLEAN because guest resource failed",
this_node->priv->id);
this_node->details->online = FALSE;
pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
crm_trace("%s node %s OFFLINE because connection resource failed",
node_type, this_node->priv->id);
this_node->details->online = FALSE;
} else if ((rsc->priv->orig_role == pcmk_role_stopped)
|| ((launcher != NULL)
&& (launcher->priv->orig_role == pcmk_role_stopped))) {
crm_trace("%s node %s OFFLINE because its resource is stopped",
node_type, this_node->priv->id);
this_node->details->online = FALSE;
pcmk__clear_node_flags(this_node, pcmk__node_remote_reset);
} else if (host && (host->details->online == FALSE)
&& host->details->unclean) {
crm_trace("Guest node %s UNCLEAN because host is unclean",
this_node->priv->id);
this_node->details->online = FALSE;
pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
} else {
crm_trace("%s node %s is %s",
node_type, this_node->priv->id,
this_node->details->online? "ONLINE" : "OFFLINE");
}
}
static void
determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
pcmk_scheduler_t *scheduler)
{
gboolean online = FALSE;
const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
CRM_CHECK(this_node != NULL, return);
this_node->details->shutdown = FALSE;
if (pe__shutdown_requested(this_node)) {
this_node->details->shutdown = TRUE;
} else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
pcmk__set_node_flags(this_node, pcmk__node_expected_up);
}
if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
online = determine_online_status_no_fencing(scheduler, node_state,
this_node);
} else {
online = determine_online_status_fencing(scheduler, node_state,
this_node);
}
if (online) {
this_node->details->online = TRUE;
} else {
/* remove node from contention */
this_node->assign->score = -PCMK_SCORE_INFINITY;
}
if (online && this_node->details->shutdown) {
/* don't run resources here */
this_node->assign->score = -PCMK_SCORE_INFINITY;
}
if (this_node->details->unclean) {
pcmk__sched_warn(scheduler, "%s is unclean",
pcmk__node_name(this_node));
} else if (!this_node->details->online) {
crm_trace("%s is offline", pcmk__node_name(this_node));
} else if (this_node->details->shutdown) {
crm_info("%s is shutting down", pcmk__node_name(this_node));
} else if (this_node->details->pending) {
crm_info("%s is pending", pcmk__node_name(this_node));
} else if (pcmk_is_set(this_node->priv->flags, pcmk__node_standby)) {
crm_info("%s is in standby", pcmk__node_name(this_node));
} else if (this_node->details->maintenance) {
crm_info("%s is in maintenance", pcmk__node_name(this_node));
} else {
crm_info("%s is online", pcmk__node_name(this_node));
}
}
/*!
* \internal
* \brief Find the end of a resource's name, excluding any clone suffix
*
* \param[in] id Resource ID to check
*
* \return Pointer to last character of resource's base name
*/
const char *
pe_base_name_end(const char *id)
{
if (!pcmk__str_empty(id)) {
const char *end = id + strlen(id) - 1;
for (const char *s = end; s > id; --s) {
switch (*s) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
return (s == end)? s : (s - 1);
default:
return end;
}
}
return end;
}
return NULL;
}
/*!
* \internal
* \brief Get a resource name excluding any clone suffix
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_strip(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
char *basename = NULL;
pcmk__assert(end != NULL);
basename = strndup(last_rsc_id, end - last_rsc_id + 1);
pcmk__assert(basename != NULL);
return basename;
}
/*!
* \internal
* \brief Get the name of the first instance of a cloned resource
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name plus :0
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_zero(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
size_t base_name_len = end - last_rsc_id + 1;
char *zero = NULL;
pcmk__assert(end != NULL);
zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
memcpy(zero, last_rsc_id, base_name_len);
zero[base_name_len] = ':';
zero[base_name_len + 1] = '0';
return zero;
}
static pcmk_resource_t *
create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
pcmk_scheduler_t *scheduler)
{
pcmk_resource_t *rsc = NULL;
xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
crm_log_xml_debug(xml_rsc, "Orphan resource");
if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
return NULL;
}
if (xml_contains_remote_node(xml_rsc)) {
pcmk_node_t *node;
crm_debug("Detected orphaned remote node %s", rsc_id);
node = pcmk_find_node(scheduler, rsc_id);
if (node == NULL) {
node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
scheduler);
}
link_rsc2remotenode(scheduler, rsc);
if (node) {
crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
node->details->shutdown = TRUE;
}
}
if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
// This removed resource needs to be mapped to a launcher
crm_trace("Launched resource %s was removed from the configuration",
rsc_id);
pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched);
}
pcmk__set_rsc_flags(rsc, pcmk__rsc_removed);
scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc);
return rsc;
}
/*!
* \internal
* \brief Create orphan instance for anonymous clone resource history
*
* \param[in,out] parent Clone resource that orphan will be added to
* \param[in] rsc_id Orphan's resource ID
* \param[in] node Where orphan is active (for logging only)
* \param[in,out] scheduler Scheduler data
*
* \return Newly added orphaned instance of \p parent
*/
static pcmk_resource_t *
create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
{
pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
pcmk_resource_t *orphan = NULL;
// find_rsc() because we might be a cloned group
orphan = top->priv->fns->find_rsc(top, rsc_id, NULL,
pcmk_rsc_match_clone_only);
pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
top->id, parent->id, rsc_id, pcmk__node_name(node));
return orphan;
}
/*!
* \internal
* \brief Check a node for an instance of an anonymous clone
*
* Return a child instance of the specified anonymous clone, in order of
* preference: (1) the instance running on the specified node, if any;
* (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
* instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX
* instances are already active).
*
* \param[in,out] scheduler Scheduler data
* \param[in] node Node on which to check for instance
* \param[in,out] parent Clone to check
* \param[in] rsc_id Name of cloned resource in history (no instance)
*/
static pcmk_resource_t *
find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
pcmk_resource_t *parent, const char *rsc_id)
{
GList *rIter = NULL;
pcmk_resource_t *rsc = NULL;
pcmk_resource_t *inactive_instance = NULL;
gboolean skip_inactive = FALSE;
pcmk__assert(pcmk__is_anonymous_clone(parent));
// Check for active (or partially active, for cloned groups) instance
pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
rsc_id, pcmk__node_name(node), parent->id);
for (rIter = parent->priv->children;
(rIter != NULL) && (rsc == NULL); rIter = rIter->next) {
GList *locations = NULL;
pcmk_resource_t *child = rIter->data;
/* Check whether this instance is already known to be active or pending
* anywhere, at this stage of unpacking. Because this function is called
* for a resource before the resource's individual operation history
* entries are unpacked, locations will generally not contain the
* desired node.
*
* However, there are three exceptions:
* (1) when child is a cloned group and we have already unpacked the
* history of another member of the group on the same node;
* (2) when we've already unpacked the history of another numbered
* instance on the same node (which can happen if
* PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
* (3) when we re-run calculations on the same scheduler data as part of
* a simulation.
*/
child->priv->fns->location(child, &locations, pcmk__rsc_node_current
|pcmk__rsc_node_pending);
if (locations) {
/* We should never associate the same numbered anonymous clone
* instance with multiple nodes, and clone instances can't migrate,
* so there must be only one location, regardless of history.
*/
CRM_LOG_ASSERT(locations->next == NULL);
if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
/* This child instance is active on the requested node, so check
* for a corresponding configured resource. We use find_rsc()
* instead of child because child may be a cloned group, and we
* need the particular member corresponding to rsc_id.
*
* If the history entry is orphaned, rsc will be NULL.
*/
rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL,
pcmk_rsc_match_clone_only);
if (rsc) {
/* If there are multiple instance history entries for an
* anonymous clone in a single node's history (which can
* happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
* to false), we want to consider the instances beyond the
* first as orphans, even if there are inactive instance
* numbers available.
*/
if (rsc->priv->active_nodes != NULL) {
crm_notice("Active (now-)anonymous clone %s has "
"multiple (orphan) instance histories on %s",
parent->id, pcmk__node_name(node));
skip_inactive = TRUE;
rsc = NULL;
} else {
pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
}
}
}
g_list_free(locations);
} else {
pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
if (!skip_inactive && !inactive_instance
&& !pcmk_is_set(child->flags, pcmk__rsc_blocked)) {
// Remember one inactive instance in case we don't find active
inactive_instance =
parent->priv->fns->find_rsc(child, rsc_id, NULL,
pcmk_rsc_match_clone_only);
/* ... but don't use it if it was already associated with a
* pending action on another node
*/
if (inactive_instance != NULL) {
const pcmk_node_t *pending_node = NULL;
pending_node = inactive_instance->priv->pending_node;
if ((pending_node != NULL)
&& !pcmk__same_node(pending_node, node)) {
inactive_instance = NULL;
}
}
}
}
}
if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
pcmk__rsc_trace(parent, "Resource %s, empty slot",
inactive_instance->id);
rsc = inactive_instance;
}
/* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
* PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
* don't want to consume a valid instance number for unclean nodes. Such
* instances may appear to be active according to the history, but should be
* considered inactive, so we can start an instance elsewhere. Treat such
* instances as orphans.
*
* An exception is instances running on guest nodes -- since guest node
* "fencing" is actually just a resource stop, requires shouldn't apply.
*
* @TODO Ideally, we'd use an inactive instance number if it is not needed
* for any clean instances. However, we don't know that at this point.
*/
if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)
&& (!node->details->online || node->details->unclean)
&& !pcmk__is_guest_or_bundle_node(node)
&& !pe__is_universal_clone(parent, scheduler)) {
rsc = NULL;
}
if (rsc == NULL) {
rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
}
return rsc;
}
static pcmk_resource_t *
unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
const char *rsc_id)
{
pcmk_resource_t *rsc = NULL;
pcmk_resource_t *parent = NULL;
crm_trace("looking for %s", rsc_id);
rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
if (rsc == NULL) {
/* If we didn't find the resource by its name in the operation history,
* check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
* we create a single :0 orphan to match against here.
*/
char *clone0_id = clone_zero(rsc_id);
pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources,
clone0_id);
if (clone0 && !pcmk_is_set(clone0->flags, pcmk__rsc_unique)) {
rsc = clone0;
parent = uber_parent(clone0);
crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
} else {
crm_trace("%s is not known as %s either (orphan)",
rsc_id, clone0_id);
}
free(clone0_id);
} else if (rsc->priv->variant > pcmk__rsc_variant_primitive) {
crm_trace("Resource history for %s is orphaned "
"because it is no longer primitive", rsc_id);
return NULL;
} else {
parent = uber_parent(rsc);
}
if (pcmk__is_anonymous_clone(parent)) {
if (pcmk__is_bundled(parent)) {
rsc = pe__find_bundle_replica(parent->priv->parent, node);
} else {
char *base = clone_strip(rsc_id);
rsc = find_anonymous_clone(scheduler, node, parent, base);
free(base);
pcmk__assert(rsc != NULL);
}
}
if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
&& !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) {
pcmk__str_update(&(rsc->priv->history_id), rsc_id);
pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
rsc_id, pcmk__node_name(node), rsc->id,
pcmk_is_set(rsc->flags, pcmk__rsc_removed)? " (ORPHAN)" : "");
}
return rsc;
}
static pcmk_resource_t *
process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
pcmk_scheduler_t *scheduler)
{
pcmk_resource_t *rsc = NULL;
const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
crm_debug("Detected orphan resource %s on %s",
rsc_id, pcmk__node_name(node));
rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
if (rsc == NULL) {
return NULL;
}
if (!pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
} else {
CRM_CHECK(rsc != NULL, return NULL);
pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
"__orphan_do_not_run__", scheduler);
}
return rsc;
}
static void
process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
enum pcmk__on_fail on_fail)
{
pcmk_node_t *tmpnode = NULL;
char *reason = NULL;
enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore;
pcmk_scheduler_t *scheduler = NULL;
bool known_active = false;
pcmk__assert(rsc != NULL);
scheduler = rsc->priv->scheduler;
known_active = (rsc->priv->orig_role > pcmk_role_stopped);
pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
rsc->id, pcmk_role_text(rsc->priv->orig_role),
pcmk__node_name(node), pcmk__on_fail_text(on_fail));
/* process current state */
if (rsc->priv->orig_role != pcmk_role_unknown) {
pcmk_resource_t *iter = rsc;
while (iter) {
if (g_hash_table_lookup(iter->priv->probed_nodes,
node->priv->id) == NULL) {
pcmk_node_t *n = pe__copy_node(node);
pcmk__rsc_trace(rsc, "%s (%s in history) known on %s",
rsc->id,
pcmk__s(rsc->priv->history_id, "the same"),
pcmk__node_name(n));
g_hash_table_insert(iter->priv->probed_nodes,
(gpointer) n->priv->id, n);
}
if (pcmk_is_set(iter->flags, pcmk__rsc_unique)) {
break;
}
iter = iter->priv->parent;
}
}
/* If a managed resource is believed to be running, but node is down ... */
if (known_active && !node->details->online && !node->details->maintenance
&& pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
gboolean should_fence = FALSE;
/* If this is a guest node, fence it (regardless of whether fencing is
* enabled, because guest node fencing is done by recovery of the
* container resource rather than by the fencer). Mark the resource
* we're processing as failed. When the guest comes back up, its
* operation history in the CIB will be cleared, freeing the affected
* resource to run again once we are sure we know its state.
*/
if (pcmk__is_guest_or_bundle_node(node)) {
pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
should_fence = TRUE;
} else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
if (pcmk__is_remote_node(node)
&& (node->priv->remote != NULL)
&& !pcmk_is_set(node->priv->remote->flags,
pcmk__rsc_failed)) {
/* Setting unseen means that fencing of the remote node will
* occur only if the connection resource is not going to start
* somewhere. This allows connection resources on a failed
* cluster node to move to another node without requiring the
* remote nodes to be fenced as well.
*/
pcmk__clear_node_flags(node, pcmk__node_seen);
reason = crm_strdup_printf("%s is active there (fencing will be"
" revoked if remote connection can "
"be re-established elsewhere)",
rsc->id);
}
should_fence = TRUE;
}
if (should_fence) {
if (reason == NULL) {
reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
}
pe_fence_node(scheduler, node, reason, FALSE);
}
free(reason);
}
/* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
save_on_fail = on_fail;
if (node->details->unclean) {
/* No extra processing needed
* Also allows resources to be started again after a node is shot
*/
on_fail = pcmk__on_fail_ignore;
}
switch (on_fail) {
case pcmk__on_fail_ignore:
/* nothing to do */
break;
case pcmk__on_fail_demote:
pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
demote_action(rsc, node, FALSE);
break;
case pcmk__on_fail_fence_node:
/* treat it as if it is still running
* but also mark the node as unclean
*/
reason = crm_strdup_printf("%s failed there", rsc->id);
pe_fence_node(scheduler, node, reason, FALSE);
free(reason);
break;
case pcmk__on_fail_standby_node:
pcmk__set_node_flags(node,
pcmk__node_standby|pcmk__node_fail_standby);
break;
case pcmk__on_fail_block:
/* is_managed == FALSE will prevent any
* actions being sent for the resource
*/
pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked);
break;
case pcmk__on_fail_ban:
/* make sure it comes up somewhere else
* or not at all
*/
resource_location(rsc, node, -PCMK_SCORE_INFINITY,
"__action_migration_auto__", scheduler);
break;
case pcmk__on_fail_stop:
pe__set_next_role(rsc, pcmk_role_stopped,
PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
break;
case pcmk__on_fail_restart:
if (known_active) {
pcmk__set_rsc_flags(rsc,
pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
stop_action(rsc, node, FALSE);
}
break;
case pcmk__on_fail_restart_container:
pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) {
/* A bundle's remote connection can run on a different node than
* the bundle's container. We don't necessarily know where the
* container is running yet, so remember it and add a stop
* action for it later.
*/
scheduler->priv->stop_needed =
g_list_prepend(scheduler->priv->stop_needed,
rsc->priv->launcher);
} else if (rsc->priv->launcher != NULL) {
stop_action(rsc->priv->launcher, node, FALSE);
} else if (known_active) {
stop_action(rsc, node, FALSE);
}
break;
case pcmk__on_fail_reset_remote:
pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
tmpnode = NULL;
if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
tmpnode = pcmk_find_node(scheduler, rsc->id);
}
if (pcmk__is_remote_node(tmpnode)
&& !pcmk_is_set(tmpnode->priv->flags,
pcmk__node_remote_fenced)) {
/* The remote connection resource failed in a way that
* should result in fencing the remote node.
*/
pe_fence_node(scheduler, tmpnode,
"remote connection is unrecoverable", FALSE);
}
}
/* require the stop action regardless if fencing is occurring or not. */
if (known_active) {
stop_action(rsc, node, FALSE);
}
/* if reconnect delay is in use, prevent the connection from exiting the
* "STOPPED" role until the failure is cleared by the delay timeout. */
if (rsc->priv->remote_reconnect_ms > 0U) {
pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
}
break;
}
/* Ensure a remote connection failure forces an unclean Pacemaker Remote
* node to be fenced. By marking the node as seen, the failure will result
* in a fencing operation regardless if we're going to attempt to reconnect
* in this transition.
*/
if (pcmk_all_flags_set(rsc->flags,
pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) {
tmpnode = pcmk_find_node(scheduler, rsc->id);
if (tmpnode && tmpnode->details->unclean) {
pcmk__set_node_flags(tmpnode, pcmk__node_seen);
}
}
if (known_active) {
if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
crm_notice("Removed resource %s is active on %s and will be "
"stopped when possible",
rsc->id, pcmk__node_name(node));
} else {
crm_notice("Removed resource %s must be stopped manually on %s "
"because " PCMK_OPT_STOP_ORPHAN_RESOURCES
" is set to false", rsc->id, pcmk__node_name(node));
}
}
native_add_running(rsc, node, scheduler,
(save_on_fail != pcmk__on_fail_ignore));
switch (on_fail) {
case pcmk__on_fail_ignore:
break;
case pcmk__on_fail_demote:
case pcmk__on_fail_block:
pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
break;
default:
pcmk__set_rsc_flags(rsc,
pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
break;
}
} else if ((rsc->priv->history_id != NULL)
&& (strchr(rsc->priv->history_id, ':') != NULL)) {
/* Only do this for older status sections that included instance numbers
* Otherwise stopped instances will appear as orphans
*/
pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)",
rsc->priv->history_id, rsc->id);
free(rsc->priv->history_id);
rsc->priv->history_id = NULL;
} else {
GList *possible_matches = pe__resource_actions(rsc, node,
PCMK_ACTION_STOP, FALSE);
GList *gIter = possible_matches;
for (; gIter != NULL; gIter = gIter->next) {
pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
pcmk__set_action_flags(stop, pcmk__action_optional);
}
g_list_free(possible_matches);
}
/* A successful stop after migrate_to on the migration source doesn't make
* the partially migrated resource stopped on the migration target.
*/
if ((rsc->priv->orig_role == pcmk_role_stopped)
&& (rsc->priv->active_nodes != NULL)
&& (rsc->priv->partial_migration_target != NULL)
&& pcmk__same_node(rsc->priv->partial_migration_source, node)) {
rsc->priv->orig_role = pcmk_role_started;
}
}
/* create active recurring operations as optional */
static void
process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
int start_index, int stop_index,
GList *sorted_op_list, pcmk_scheduler_t *scheduler)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GList *gIter = sorted_op_list;
pcmk__assert(rsc != NULL);
pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
rsc->id, start_index, stop_index);
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
guint interval_ms = 0;
char *key = NULL;
const char *id = pcmk__xe_id(rsc_op);
counter++;
if (node->details->online == FALSE) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
rsc->id, pcmk__node_name(node));
break;
/* Need to check if there's a monitor for role="Stopped" */
} else if (start_index < stop_index && counter <= stop_index) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
id, pcmk__node_name(node));
continue;
} else if (counter < start_index) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
id, pcmk__node_name(node), counter);
continue;
}
crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
if (interval_ms == 0) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
id, pcmk__node_name(node));
continue;
}
status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
id, pcmk__node_name(node));
continue;
}
task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
/* create the action */
key = pcmk__op_key(rsc->id, task, interval_ms);
pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
custom_action(rsc, key, task, node, TRUE, scheduler);
}
}
void
calculate_active_ops(const GList *sorted_op_list, int *start_index,
int *stop_index)
{
int counter = -1;
int implied_monitor_start = -1;
int implied_clone_start = -1;
const char *task = NULL;
const char *status = NULL;
*stop_index = -1;
*start_index = -1;
for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
const xmlNode *rsc_op = (const xmlNode *) iter->data;
counter++;
task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
&& pcmk__str_eq(status, "0", pcmk__str_casei)) {
*stop_index = counter;
} else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
*start_index = counter;
} else if ((implied_monitor_start <= *stop_index)
&& pcmk__str_eq(task, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
implied_monitor_start = counter;
}
} else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
PCMK_ACTION_DEMOTE, NULL)) {
implied_clone_start = counter;
}
}
if (*start_index == -1) {
if (implied_clone_start != -1) {
*start_index = implied_clone_start;
} else if (implied_monitor_start != -1) {
*start_index = implied_monitor_start;
}
}
}
// If resource history entry has shutdown lock, remember lock node and time
static void
unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
{
time_t lock_time = 0; // When lock started (i.e. node shutdown time)
if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK,
&lock_time) == pcmk_ok) && (lock_time != 0)) {
if ((scheduler->priv->shutdown_lock_ms > 0U)
&& (get_effective_time(scheduler)
> (lock_time + (scheduler->priv->shutdown_lock_ms / 1000U)))) {
pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
rsc->id, pcmk__node_name(node));
pe__clear_resource_history(rsc, node);
} else {
rsc->priv->lock_node = node;
rsc->priv->lock_time = lock_time;
}
}
}
/*!
* \internal
* \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
*
* \param[in,out] node Node whose status is being unpacked
* \param[in] rsc_entry \c PCMK__XE_LRM_RESOURCE XML being unpacked
* \param[in,out] scheduler Scheduler data
*
* \return Resource corresponding to the entry, or NULL if no operation history
*/
static pcmk_resource_t *
unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
pcmk_scheduler_t *scheduler)
{
GList *gIter = NULL;
int stop_index = -1;
int start_index = -1;
enum rsc_role_e req_role = pcmk_role_unknown;
const char *rsc_id = pcmk__xe_id(lrm_resource);
pcmk_resource_t *rsc = NULL;
GList *op_list = NULL;
GList *sorted_op_list = NULL;
xmlNode *rsc_op = NULL;
xmlNode *last_failure = NULL;
enum pcmk__on_fail on_fail = pcmk__on_fail_ignore;
enum rsc_role_e saved_role = pcmk_role_unknown;
if (rsc_id == NULL) {
pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
" entry: No " PCMK_XA_ID);
crm_log_xml_info(lrm_resource, "missing-id");
return NULL;
}
crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
rsc_id, pcmk__node_name(node));
/* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
* them
*/
for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
NULL);
rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) {
op_list = g_list_prepend(op_list, rsc_op);
}
if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
if (op_list == NULL) {
// If there are no operations, there is nothing to do
return NULL;
}
}
/* find the resource */
rsc = unpack_find_resource(scheduler, node, rsc_id);
if (rsc == NULL) {
if (op_list == NULL) {
// If there are no operations, there is nothing to do
return NULL;
} else {
rsc = process_orphan_resource(lrm_resource, node, scheduler);
}
}
pcmk__assert(rsc != NULL);
// Check whether the resource is "shutdown-locked" to this node
if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
}
/* process operations */
saved_role = rsc->priv->orig_role;
rsc->priv->orig_role = pcmk_role_unknown;
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
}
/* create active recurring operations as optional */
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
scheduler);
/* no need to free the contents */
g_list_free(sorted_op_list);
process_rsc_state(rsc, node, on_fail);
if (get_target_role(rsc, &req_role)) {
if ((rsc->priv->next_role == pcmk_role_unknown)
|| (req_role < rsc->priv->next_role)) {
pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
} else if (req_role > rsc->priv->next_role) {
pcmk__rsc_info(rsc,
"%s: Not overwriting calculated next role %s"
" with requested next role %s",
rsc->id, pcmk_role_text(rsc->priv->next_role),
pcmk_role_text(req_role));
}
}
if (saved_role > rsc->priv->orig_role) {
rsc->priv->orig_role = saved_role;
}
return rsc;
}
static void
handle_removed_launched_resources(const xmlNode *lrm_rsc_list,
pcmk_scheduler_t *scheduler)
{
for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL,
NULL, NULL);
rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
pcmk_resource_t *rsc;
pcmk_resource_t *launcher = NULL;
const char *rsc_id;
const char *launcher_id = NULL;
if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
continue;
}
launcher_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
if ((launcher_id == NULL) || (rsc_id == NULL)) {
continue;
}
launcher = pe_find_resource(scheduler->priv->resources, launcher_id);
if (launcher == NULL) {
continue;
}
rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
if ((rsc == NULL) || (rsc->priv->launcher != NULL)
|| !pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) {
continue;
}
pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s",
rsc->id, launcher_id);
rsc->priv->launcher = launcher;
launcher->priv->launched = g_list_append(launcher->priv->launched,
rsc);
}
}
/*!
* \internal
* \brief Unpack one node's lrm status section
*
* \param[in,out] node Node whose status is being unpacked
* \param[in] xml CIB node state XML
* \param[in,out] scheduler Scheduler data
*/
static void
unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
pcmk_scheduler_t *scheduler)
{
bool found_removed_launched_resource = false;
// Drill down to PCMK__XE_LRM_RESOURCES section
xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
if (xml == NULL) {
return;
}
xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
if (xml == NULL) {
return;
}
// Unpack each PCMK__XE_LRM_RESOURCE entry
for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
PCMK__XE_LRM_RESOURCE,
NULL, NULL);
rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) {
pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
if ((rsc != NULL)
&& pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) {
found_removed_launched_resource = true;
}
}
/* Now that all resource state has been unpacked for this node, map any
* removed launched resources to their launchers.
*/
if (found_removed_launched_resource) {
handle_removed_launched_resources(xml, scheduler);
}
}
static void
set_active(pcmk_resource_t *rsc)
{
const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
if (top && pcmk_is_set(top->flags, pcmk__rsc_promotable)) {
rsc->priv->orig_role = pcmk_role_unpromoted;
} else {
rsc->priv->orig_role = pcmk_role_started;
}
}
static void
set_node_score(gpointer key, gpointer value, gpointer user_data)
{
pcmk_node_t *node = value;
int *score = user_data;
node->assign->score = *score;
}
#define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
"/" PCMK__XE_NODE_STATE
#define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \
"/" PCMK__XE_LRM_RESOURCES \
"/" PCMK__XE_LRM_RESOURCE
#define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
static xmlNode *
find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
int target_rc, pcmk_scheduler_t *scheduler)
{
GString *xpath = NULL;
xmlNode *xml = NULL;
CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
return NULL);
xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
NULL);
/* Need to check against transition_magic too? */
if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
pcmk__g_strcat(xpath,
" and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
NULL);
} else if ((source != NULL)
&& (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
pcmk__g_strcat(xpath,
" and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
NULL);
} else {
g_string_append_c(xpath, ']');
}
xml = get_xpath_object((const char *) xpath->str, scheduler->input,
LOG_DEBUG);
g_string_free(xpath, TRUE);
if (xml && target_rc >= 0) {
int rc = PCMK_OCF_UNKNOWN_ERROR;
int status = PCMK_EXEC_ERROR;
crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc);
crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status);
if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
return NULL;
}
}
return xml;
}
static xmlNode *
find_lrm_resource(const char *rsc_id, const char *node_name,
pcmk_scheduler_t *scheduler)
{
GString *xpath = NULL;
xmlNode *xml = NULL;
CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
NULL);
xml = get_xpath_object((const char *) xpath->str, scheduler->input,
LOG_DEBUG);
g_string_free(xpath, TRUE);
return xml;
}
/*!
* \internal
* \brief Check whether a resource has no completed action history on a node
*
* \param[in,out] rsc Resource to check
* \param[in] node_name Node to check
*
* \return true if \p rsc_id is unknown on \p node_name, otherwise false
*/
static bool
unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
{
bool result = false;
xmlXPathObjectPtr search;
char *xpath = NULL;
xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']"
SUB_XPATH_LRM_RSC_OP
"[@" PCMK__XA_RC_CODE "!='%d']",
node_name, rsc->id, PCMK_OCF_UNKNOWN);
search = xpath_search(rsc->priv->scheduler->input, xpath);
result = (numXpathResults(search) == 0);
freeXpathObject(search);
free(xpath);
return result;
}
/*!
* \internal
* \brief Check whether a probe/monitor indicating the resource was not running
* on a node happened after some event
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] xml_op Event that monitor is being compared to
* \param[in,out] scheduler Scheduler data
*
* \return true if such a monitor happened after event, false otherwise
*/
static bool
monitor_not_running_after(const char *rsc_id, const char *node_name,
const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
{
/* Any probe/monitor operation on the node indicating it was not running
* there
*/
xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
NULL, PCMK_OCF_NOT_RUNNING, scheduler);
return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0);
}
/*!
* \internal
* \brief Check whether any non-monitor operation on a node happened after some
* event
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] xml_op Event that non-monitor is being compared to
* \param[in,out] scheduler Scheduler data
*
* \return true if such a operation happened after event, false otherwise
*/
static bool
non_monitor_after(const char *rsc_id, const char *node_name,
const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
{
xmlNode *lrm_resource = NULL;
lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
if (lrm_resource == NULL) {
return false;
}
for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
NULL, NULL);
op != NULL; op = pcmk__xe_next_same(op)) {
const char * task = NULL;
if (op == xml_op) {
continue;
}
task = crm_element_value(op, PCMK_XA_OPERATION);
if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
NULL)
&& pe__is_newer_op(op, xml_op) > 0) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether the resource has newer state on a node after a migration
* attempt
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] migrate_to Any migrate_to event that is being compared to
* \param[in] migrate_from Any migrate_from event that is being compared to
* \param[in,out] scheduler Scheduler data
*
* \return true if such a operation happened after event, false otherwise
*/
static bool
newer_state_after_migrate(const char *rsc_id, const char *node_name,
const xmlNode *migrate_to,
const xmlNode *migrate_from,
pcmk_scheduler_t *scheduler)
{
const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to;
const char *source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE);
/* It's preferred to compare to the migrate event on the same node if
* existing, since call ids are more reliable.
*/
if ((xml_op != migrate_to) && (migrate_to != NULL)
&& pcmk__str_eq(node_name, source, pcmk__str_casei)) {
xml_op = migrate_to;
}
/* If there's any newer non-monitor operation on the node, or any newer
* probe/monitor operation on the node indicating it was not running there,
* the migration events potentially no longer matter for the node.
*/
return non_monitor_after(rsc_id, node_name, xml_op, scheduler)
|| monitor_not_running_after(rsc_id, node_name, xml_op, scheduler);
}
/*!
* \internal
* \brief Parse migration source and target node names from history entry
*
* \param[in] entry Resource history entry for a migration action
* \param[in] source_node If not NULL, source must match this node
* \param[in] target_node If not NULL, target must match this node
* \param[out] source_name Where to store migration source node name
* \param[out] target_name Where to store migration target node name
*
* \return Standard Pacemaker return code
*/
static int
get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
const pcmk_node_t *target_node,
const char **source_name, const char **target_name)
{
*source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
*target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
if ((*source_name == NULL) || (*target_name == NULL)) {
pcmk__config_err("Ignoring resource history entry %s without "
PCMK__META_MIGRATE_SOURCE " and "
PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
return pcmk_rc_unpack_error;
}
if ((source_node != NULL)
&& !pcmk__str_eq(*source_name, source_node->priv->name,
pcmk__str_casei|pcmk__str_null_matches)) {
pcmk__config_err("Ignoring resource history entry %s because "
PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
pcmk__xe_id(entry), *source_name,
pcmk__node_name(source_node));
return pcmk_rc_unpack_error;
}
if ((target_node != NULL)
&& !pcmk__str_eq(*target_name, target_node->priv->name,
pcmk__str_casei|pcmk__str_null_matches)) {
pcmk__config_err("Ignoring resource history entry %s because "
PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
pcmk__xe_id(entry), *target_name,
pcmk__node_name(target_node));
return pcmk_rc_unpack_error;
}
return pcmk_rc_ok;
}
/*
* \internal
* \brief Add a migration source to a resource's list of dangling migrations
*
* If the migrate_to and migrate_from actions in a live migration both
* succeeded, but there is no stop on the source, the migration is considered
* "dangling." Add the source to the resource's dangling migration list, which
* will be used to schedule a stop on the source without affecting the target.
*
* \param[in,out] rsc Resource involved in migration
* \param[in] node Migration source
*/
static void
add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
{
pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
rsc->id, pcmk__node_name(node));
rsc->priv->orig_role = pcmk_role_stopped;
rsc->priv->dangling_migration_sources =
g_list_prepend(rsc->priv->dangling_migration_sources,
(gpointer) node);
}
/*!
* \internal
* \brief Update resource role etc. after a successful migrate_to action
*
* \param[in,out] history Parsed action result history
*/
static void
unpack_migrate_to_success(struct action_history *history)
{
/* A complete migration sequence is:
* 1. migrate_to on source node (which succeeded if we get to this function)
* 2. migrate_from on target node
* 3. stop on source node
*
* If no migrate_from has happened, the migration is considered to be
* "partial". If the migrate_from succeeded but no stop has happened, the
* migration is considered to be "dangling".
*
* If a successful migrate_to and stop have happened on the source node, we
* still need to check for a partial migration, due to scenarios (easier to
* produce with batch-limit=1) like:
*
* - A resource is migrating from node1 to node2, and a migrate_to is
* initiated for it on node1.
*
* - node2 goes into standby mode while the migrate_to is pending, which
* aborts the transition.
*
* - Upon completion of the migrate_to, a new transition schedules a stop
* on both nodes and a start on node1.
*
* - If the new transition is aborted for any reason while the resource is
* stopping on node1, the transition after that stop completes will see
* the migrate_to and stop on the source, but it's still a partial
* migration, and the resource must be stopped on node2 because it is
* potentially active there due to the migrate_to.
*
* We also need to take into account that either node's history may be
* cleared at any point in the migration process.
*/
int from_rc = PCMK_OCF_OK;
int from_status = PCMK_EXEC_PENDING;
pcmk_node_t *target_node = NULL;
xmlNode *migrate_from = NULL;
const char *source = NULL;
const char *target = NULL;
bool source_newer_op = false;
bool target_newer_state = false;
bool active_on_target = false;
pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
// Get source and target node names from XML
if (get_migration_node_names(history->xml, history->node, NULL, &source,
&target) != pcmk_rc_ok) {
return;
}
// Check for newer state on the source
source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
scheduler);
// Check for a migrate_from action from this source on the target
migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
target, source, -1, scheduler);
if (migrate_from != NULL) {
if (source_newer_op) {
/* There's a newer non-monitor operation on the source and a
* migrate_from on the target, so this migrate_to is irrelevant to
* the resource's state.
*/
return;
}
crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
}
/* If the resource has newer state on both the source and target after the
* migration events, this migrate_to is irrelevant to the resource's state.
*/
target_newer_state = newer_state_after_migrate(history->rsc->id, target,
history->xml, migrate_from,
scheduler);
if (source_newer_op && target_newer_state) {
return;
}
/* Check for dangling migration (migrate_from succeeded but stop not done).
* We know there's no stop because we already returned if the target has a
* migrate_from and the source has any newer non-monitor operation.
*/
if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
add_dangling_migration(history->rsc, history->node);
return;
}
/* Without newer state, this migrate_to implies the resource is active.
* (Clones are not allowed to migrate, so role can't be promoted.)
*/
history->rsc->priv->orig_role = pcmk_role_started;
target_node = pcmk_find_node(scheduler, target);
active_on_target = !target_newer_state && (target_node != NULL)
&& target_node->details->online;
if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
if (active_on_target) {
native_add_running(history->rsc, target_node, scheduler, TRUE);
} else {
// Mark resource as failed, require recovery, and prevent migration
pcmk__set_rsc_flags(history->rsc,
pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
}
return;
}
// The migrate_from is pending, complete but erased, or to be scheduled
/* If there is no history at all for the resource on an online target, then
* it was likely cleaned. Just return, and we'll schedule a probe. Once we
* have the probe result, it will be reflected in target_newer_state.
*/
if ((target_node != NULL) && target_node->details->online
&& unknown_on_node(history->rsc, target)) {
return;
}
if (active_on_target) {
pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
native_add_running(history->rsc, target_node, scheduler, FALSE);
if ((source_node != NULL) && source_node->details->online) {
/* This is a partial migration: the migrate_to completed
* successfully on the source, but the migrate_from has not
* completed. Remember the source and target; if the newly
* chosen target remains the same when we schedule actions
* later, we may continue with the migration.
*/
history->rsc->priv->partial_migration_target = target_node;
history->rsc->priv->partial_migration_source = source_node;
}
} else if (!source_newer_op) {
// Mark resource as failed, require recovery, and prevent migration
pcmk__set_rsc_flags(history->rsc,
pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
}
}
/*!
* \internal
* \brief Update resource role etc. after a failed migrate_to action
*
* \param[in,out] history Parsed action result history
*/
static void
unpack_migrate_to_failure(struct action_history *history)
{
xmlNode *target_migrate_from = NULL;
const char *source = NULL;
const char *target = NULL;
pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
// Get source and target node names from XML
if (get_migration_node_names(history->xml, history->node, NULL, &source,
&target) != pcmk_rc_ok) {
return;
}
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be promoted.
*/
history->rsc->priv->orig_role = pcmk_role_started;
// Check for migrate_from on the target
target_migrate_from = find_lrm_op(history->rsc->id,
PCMK_ACTION_MIGRATE_FROM, target, source,
PCMK_OCF_OK, scheduler);
if (/* If the resource state is unknown on the target, it will likely be
* probed there.
* Don't just consider it running there. We will get back here anyway in
* case the probe detects it's running there.
*/
!unknown_on_node(history->rsc, target)
/* If the resource has newer state on the target after the migration
* events, this migrate_to no longer matters for the target.
*/
&& !newer_state_after_migrate(history->rsc->id, target, history->xml,
target_migrate_from, scheduler)) {
/* The resource has no newer state on the target, so assume it's still
* active there.
* (if it is up).
*/
pcmk_node_t *target_node = pcmk_find_node(scheduler, target);
if (target_node && target_node->details->online) {
native_add_running(history->rsc, target_node, scheduler, FALSE);
}
} else if (!non_monitor_after(history->rsc->id, source, history->xml,
scheduler)) {
/* We know the resource has newer state on the target, but this
* migrate_to still matters for the source as long as there's no newer
* non-monitor operation there.
*/
// Mark node as having dangling migration so we can force a stop later
history->rsc->priv->dangling_migration_sources =
g_list_prepend(history->rsc->priv->dangling_migration_sources,
(gpointer) history->node);
}
}
/*!
* \internal
* \brief Update resource role etc. after a failed migrate_from action
*
* \param[in,out] history Parsed action result history
*/
static void
unpack_migrate_from_failure(struct action_history *history)
{
xmlNode *source_migrate_to = NULL;
const char *source = NULL;
const char *target = NULL;
pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
// Get source and target node names from XML
if (get_migration_node_names(history->xml, NULL, history->node, &source,
&target) != pcmk_rc_ok) {
return;
}
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be promoted.
*/
history->rsc->priv->orig_role = pcmk_role_started;
// Check for a migrate_to on the source
source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
source, target, PCMK_OCF_OK, scheduler);
if (/* If the resource state is unknown on the source, it will likely be
* probed there.
* Don't just consider it running there. We will get back here anyway in
* case the probe detects it's running there.
*/
!unknown_on_node(history->rsc, source)
/* If the resource has newer state on the source after the migration
* events, this migrate_from no longer matters for the source.
*/
&& !newer_state_after_migrate(history->rsc->id, source,
source_migrate_to, history->xml,
scheduler)) {
/* The resource has no newer state on the source, so assume it's still
* active there (if it is up).
*/
pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
if (source_node && source_node->details->online) {
native_add_running(history->rsc, source_node, scheduler, TRUE);
}
}
}
/*!
* \internal
* \brief Add an action to cluster's list of failed actions
*
* \param[in,out] history Parsed action result history
*/
static void
record_failed_op(struct action_history *history)
{
const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
if (!(history->node->details->online)) {
return;
}
for (const xmlNode *xIter = scheduler->priv->failed->children;
xIter != NULL; xIter = xIter->next) {
const char *key = pcmk__xe_history_key(xIter);
const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
if (pcmk__str_eq(history->key, key, pcmk__str_none)
&& pcmk__str_eq(uname, history->node->priv->name,
pcmk__str_casei)) {
crm_trace("Skipping duplicate entry %s on %s",
history->key, pcmk__node_name(history->node));
return;
}
}
crm_trace("Adding entry for %s on %s to failed action list",
history->key, pcmk__node_name(history->node));
crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name);
crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
pcmk__xml_copy(scheduler->priv->failed, history->xml);
}
static char *
last_change_str(const xmlNode *xml_op)
{
time_t when;
char *result = NULL;
if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
&when) == pcmk_ok) {
char *when_s = pcmk__epoch2str(&when, 0);
const char *p = strchr(when_s, ' ');
// Skip day of week to make message shorter
if ((p != NULL) && (*(++p) != '\0')) {
result = pcmk__str_copy(p);
}
free(when_s);
}
if (result == NULL) {
result = pcmk__str_copy("unknown_time");
}
return result;
}
/*!
* \internal
* \brief Ban a resource (or its clone if an anonymous instance) from all nodes
*
* \param[in,out] rsc Resource to ban
*/
static void
ban_from_all_nodes(pcmk_resource_t *rsc)
{
int score = -PCMK_SCORE_INFINITY;
const pcmk_scheduler_t *scheduler = rsc->priv->scheduler;
if (rsc->priv->parent != NULL) {
pcmk_resource_t *parent = uber_parent(rsc);
if (pcmk__is_anonymous_clone(parent)) {
/* For anonymous clones, if an operation with
* PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
* entire clone must stop.
*/
rsc = parent;
}
}
// Ban the resource from all nodes
crm_notice("%s will not be started under current conditions", rsc->id);
if (rsc->priv->allowed_nodes != NULL) {
g_hash_table_destroy(rsc->priv->allowed_nodes);
}
rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes);
g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score);
}
/*!
* \internal
* \brief Get configured failure handling and role after failure for an action
*
* \param[in,out] history Unpacked action history entry
* \param[out] on_fail Where to set configured failure handling
* \param[out] fail_role Where to set to role after failure
*/
static void
unpack_failure_handling(struct action_history *history,
enum pcmk__on_fail *on_fail,
enum rsc_role_e *fail_role)
{
xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
history->interval_ms, true);
GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
history->task,
history->interval_ms, config);
const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
*on_fail = pcmk__parse_on_fail(history->rsc, history->task,
history->interval_ms, on_fail_str);
*fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
meta);
g_hash_table_destroy(meta);
}
/*!
* \internal
* \brief Update resource role, failure handling, etc., after a failed action
*
* \param[in,out] history Parsed action result history
* \param[in] config_on_fail Action failure handling from configuration
* \param[in] fail_role Resource's role after failure of this action
* \param[out] last_failure This will be set to the history XML
* \param[in,out] on_fail Actual handling of action result
*/
static void
unpack_rsc_op_failure(struct action_history *history,
enum pcmk__on_fail config_on_fail,
enum rsc_role_e fail_role, xmlNode **last_failure,
enum pcmk__on_fail *on_fail)
{
bool is_probe = false;
char *last_change_s = NULL;
pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
*last_failure = history->xml;
is_probe = pcmk_xe_is_probe(history->xml);
last_change_s = last_change_str(history->xml);
if (!pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)
&& (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
crm_trace("Unexpected result (%s%s%s) was recorded for "
"%s of %s on %s at %s " QB_XS " exit-status=%d id=%s",
- services_ocf_exitcode_str(history->exit_status),
+ crm_exit_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""),
(is_probe? "probe" : history->task), history->rsc->id,
pcmk__node_name(history->node), last_change_s,
history->exit_status, history->id);
} else {
pcmk__sched_warn(scheduler,
"Unexpected result (%s%s%s) was recorded for %s of "
"%s on %s at %s " QB_XS " exit-status=%d id=%s",
- services_ocf_exitcode_str(history->exit_status),
+ crm_exit_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""),
(is_probe? "probe" : history->task), history->rsc->id,
pcmk__node_name(history->node), last_change_s,
history->exit_status, history->id);
if (is_probe && (history->exit_status != PCMK_OCF_OK)
&& (history->exit_status != PCMK_OCF_NOT_RUNNING)
&& (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
/* A failed (not just unexpected) probe result could mean the user
* didn't know resources will be probed even where they can't run.
*/
crm_notice("If it is not possible for %s to run on %s, see "
"the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
"constraints",
history->rsc->id, pcmk__node_name(history->node));
}
record_failed_op(history);
}
free(last_change_s);
if (*on_fail < config_on_fail) {
pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
pcmk__on_fail_text(*on_fail),
pcmk__on_fail_text(config_on_fail), history->key);
*on_fail = config_on_fail;
}
if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
"__stop_fail__", scheduler);
} else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
unpack_migrate_to_failure(history);
} else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
unpack_migrate_from_failure(history);
} else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
history->rsc->priv->orig_role = pcmk_role_promoted;
} else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
if (config_on_fail == pcmk__on_fail_block) {
history->rsc->priv->orig_role = pcmk_role_promoted;
pe__set_next_role(history->rsc, pcmk_role_stopped,
"demote with " PCMK_META_ON_FAIL "=block");
} else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
history->rsc->priv->orig_role = pcmk_role_stopped;
} else {
/* Staying in the promoted role would put the scheduler and
* controller into a loop. Setting the role to unpromoted is not
* dangerous because the resource will be stopped as part of
* recovery, and any promotion will be ordered after that stop.
*/
history->rsc->priv->orig_role = pcmk_role_unpromoted;
}
}
if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
/* leave stopped */
pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
history->rsc->priv->orig_role = pcmk_role_stopped;
} else if (history->rsc->priv->orig_role < pcmk_role_started) {
pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
set_active(history->rsc);
}
pcmk__rsc_trace(history->rsc,
"Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
history->rsc->id,
pcmk_role_text(history->rsc->priv->orig_role),
pcmk__btoa(history->node->details->unclean),
pcmk__on_fail_text(config_on_fail),
pcmk_role_text(fail_role));
if ((fail_role != pcmk_role_started)
&& (history->rsc->priv->next_role < fail_role)) {
pe__set_next_role(history->rsc, fail_role, "failure");
}
if (fail_role == pcmk_role_stopped) {
ban_from_all_nodes(history->rsc);
}
}
/*!
* \internal
* \brief Block a resource with a failed action if it cannot be recovered
*
* If resource action is a failed stop and fencing is not possible, mark the
* resource as unmanaged and blocked, since recovery cannot be done.
*
* \param[in,out] history Parsed action history entry
*/
static void
block_if_unrecoverable(struct action_history *history)
{
char *last_change_s = NULL;
if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
return; // All actions besides stop are always recoverable
}
if (pe_can_fence(history->node->priv->scheduler, history->node)) {
return; // Failed stops are recoverable via fencing
}
last_change_s = last_change_str(history->xml);
pcmk__sched_err(history->node->priv->scheduler,
"No further recovery can be attempted for %s "
"because %s on %s failed (%s%s%s) at %s "
QB_XS " rc=%d id=%s",
history->rsc->id, history->task,
pcmk__node_name(history->node),
- services_ocf_exitcode_str(history->exit_status),
+ crm_exit_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""),
last_change_s, history->exit_status, history->id);
free(last_change_s);
pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed);
pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked);
}
/*!
* \internal
* \brief Update action history's execution status and why
*
* \param[in,out] history Parsed action history entry
* \param[out] why Where to store reason for update
* \param[in] value New value
* \param[in] reason Description of why value was changed
*/
static inline void
remap_because(struct action_history *history, const char **why, int value,
const char *reason)
{
if (history->execution_status != value) {
history->execution_status = value;
*why = reason;
}
}
/*!
* \internal
* \brief Remap informational monitor results and operation status
*
* For the monitor results, certain OCF codes are for providing extended information
* to the user about services that aren't yet failed but not entirely healthy either.
* These must be treated as the "normal" result by Pacemaker.
*
* For operation status, the action result can be used to determine an appropriate
* status for the purposes of responding to the action. The status provided by the
* executor is not directly usable since the executor does not know what was expected.
*
* \param[in,out] history Parsed action history entry
* \param[in,out] on_fail What should be done about the result
* \param[in] expired Whether result is expired
*
* \note If the result is remapped and the node is not shutting down or failed,
* the operation will be recorded in the scheduler data's list of failed
* operations to highlight it for the user.
*
* \note This may update the resource's current and next role.
*/
static void
remap_operation(struct action_history *history,
enum pcmk__on_fail *on_fail, bool expired)
{
bool is_probe = false;
int orig_exit_status = history->exit_status;
int orig_exec_status = history->execution_status;
const char *why = NULL;
const char *task = history->task;
// Remap degraded results to their successful counterparts
history->exit_status = pcmk__effective_rc(history->exit_status);
if (history->exit_status != orig_exit_status) {
why = "degraded result";
if (!expired && (!history->node->details->shutdown
|| history->node->details->online)) {
record_failed_op(history);
}
}
if (!pcmk__is_bundled(history->rsc)
&& pcmk_xe_mask_probe_failure(history->xml)
&& ((history->execution_status != PCMK_EXEC_DONE)
|| (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
history->execution_status = PCMK_EXEC_DONE;
history->exit_status = PCMK_OCF_NOT_RUNNING;
why = "equivalent probe result";
}
/* If the executor reported an execution status of anything but done or
* error, consider that final. But for done or error, we know better whether
* it should be treated as a failure or not, because we know the expected
* result.
*/
switch (history->execution_status) {
case PCMK_EXEC_DONE:
case PCMK_EXEC_ERROR:
break;
// These should be treated as node-fatal
case PCMK_EXEC_NO_FENCE_DEVICE:
case PCMK_EXEC_NO_SECRETS:
remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
"node-fatal error");
goto remap_done;
default:
goto remap_done;
}
is_probe = pcmk_xe_is_probe(history->xml);
if (is_probe) {
task = "probe";
}
if (history->expected_exit_status < 0) {
/* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
* Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
* expected exit status in the transition key, which (along with the
* similar case of a corrupted transition key in the CIB) will be
* reported to this function as -1. Pacemaker 2.0+ does not support
* rolling upgrades from those versions or processing of saved CIB files
* from those versions, so we do not need to care much about this case.
*/
remap_because(history, &why, PCMK_EXEC_ERROR,
"obsolete history format");
pcmk__config_warn("Expected result not found for %s on %s "
"(corrupt or obsolete CIB?)",
history->key, pcmk__node_name(history->node));
} else if (history->exit_status == history->expected_exit_status) {
remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
} else {
remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
pcmk__rsc_debug(history->rsc,
"%s on %s: expected %d (%s), got %d (%s%s%s)",
history->key, pcmk__node_name(history->node),
history->expected_exit_status,
- services_ocf_exitcode_str(history->expected_exit_status),
+ crm_exit_str(history->expected_exit_status),
history->exit_status,
- services_ocf_exitcode_str(history->exit_status),
+ crm_exit_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""));
}
switch (history->exit_status) {
case PCMK_OCF_OK:
if (is_probe
&& (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
char *last_change_s = last_change_str(history->xml);
remap_because(history, &why, PCMK_EXEC_DONE, "probe");
pcmk__rsc_info(history->rsc,
"Probe found %s active on %s at %s",
history->rsc->id, pcmk__node_name(history->node),
last_change_s);
free(last_change_s);
}
break;
case PCMK_OCF_NOT_RUNNING:
if (is_probe
|| (history->expected_exit_status == history->exit_status)
|| !pcmk_is_set(history->rsc->flags, pcmk__rsc_managed)) {
/* For probes, recurring monitors for the Stopped role, and
* unmanaged resources, "not running" is not considered a
* failure.
*/
remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
history->rsc->priv->orig_role = pcmk_role_stopped;
*on_fail = pcmk__on_fail_ignore;
pe__set_next_role(history->rsc, pcmk_role_unknown,
"not running");
}
break;
case PCMK_OCF_RUNNING_PROMOTED:
if (is_probe
&& (history->exit_status != history->expected_exit_status)) {
char *last_change_s = last_change_str(history->xml);
remap_because(history, &why, PCMK_EXEC_DONE, "probe");
pcmk__rsc_info(history->rsc,
"Probe found %s active and promoted on %s at %s",
history->rsc->id,
pcmk__node_name(history->node), last_change_s);
free(last_change_s);
}
if (!expired
|| (history->exit_status == history->expected_exit_status)) {
history->rsc->priv->orig_role = pcmk_role_promoted;
}
break;
case PCMK_OCF_FAILED_PROMOTED:
if (!expired) {
history->rsc->priv->orig_role = pcmk_role_promoted;
}
remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
break;
case PCMK_OCF_NOT_CONFIGURED:
remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
break;
case PCMK_OCF_UNIMPLEMENT_FEATURE:
{
guint interval_ms = 0;
crm_element_value_ms(history->xml, PCMK_META_INTERVAL,
&interval_ms);
if (interval_ms == 0) {
if (!expired) {
block_if_unrecoverable(history);
}
remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
"exit status");
} else {
remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
"exit status");
}
}
break;
case PCMK_OCF_NOT_INSTALLED:
case PCMK_OCF_INVALID_PARAM:
case PCMK_OCF_INSUFFICIENT_PRIV:
if (!expired) {
block_if_unrecoverable(history);
}
remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
break;
default:
if (history->execution_status == PCMK_EXEC_DONE) {
char *last_change_s = last_change_str(history->xml);
crm_info("Treating unknown exit status %d from %s of %s "
"on %s at %s as failure",
history->exit_status, task, history->rsc->id,
pcmk__node_name(history->node), last_change_s);
remap_because(history, &why, PCMK_EXEC_ERROR,
"unknown exit status");
free(last_change_s);
}
break;
}
remap_done:
if (why != NULL) {
pcmk__rsc_trace(history->rsc,
"Remapped %s result from [%s: %s] to [%s: %s] "
"because of %s",
history->key, pcmk_exec_status_str(orig_exec_status),
crm_exit_str(orig_exit_status),
pcmk_exec_status_str(history->execution_status),
crm_exit_str(history->exit_status), why);
}
}
// return TRUE if start or monitor last failure but parameters changed
static bool
should_clear_for_param_change(const xmlNode *xml_op, const char *task,
pcmk_resource_t *rsc, pcmk_node_t *node)
{
if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
if (pe__bundle_needs_remote_name(rsc)) {
/* We haven't allocated resources yet, so we can't reliably
* substitute addr parameters for the REMOTE_CONTAINER_HACK.
* When that's needed, defer the check until later.
*/
pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
rsc->priv->scheduler);
} else {
pcmk__op_digest_t *digest_data = NULL;
digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
rsc->priv->scheduler);
switch (digest_data->rc) {
case pcmk__digest_unknown:
crm_trace("Resource %s history entry %s on %s"
" has no digest to compare",
rsc->id, pcmk__xe_history_key(xml_op),
node->priv->id);
break;
case pcmk__digest_match:
break;
default:
return TRUE;
}
}
}
return FALSE;
}
// Order action after fencing of remote node, given connection rsc
static void
order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
pcmk_scheduler_t *scheduler)
{
pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
if (remote_node) {
pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
FALSE, scheduler);
order_actions(fence, action, pcmk__ar_first_implies_then);
}
}
static bool
should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
guint interval_ms, bool is_last_failure)
{
/* Clearing failures of recurring monitors has special concerns. The
* executor reports only changes in the monitor result, so if the
* monitor is still active and still getting the same failure result,
* that will go undetected after the failure is cleared.
*
* Also, the operation history will have the time when the recurring
* monitor result changed to the given code, not the time when the
* result last happened.
*
* @TODO We probably should clear such failures only when the failure
* timeout has passed since the last occurrence of the failed result.
* However we don't record that information. We could maybe approximate
* that by clearing only if there is a more recent successful monitor or
* stop result, but we don't even have that information at this point
* since we are still unpacking the resource's operation history.
*
* This is especially important for remote connection resources with a
* reconnect interval, so in that case, we skip clearing failures
* if the remote node hasn't been fenced.
*/
if ((rsc->priv->remote_reconnect_ms > 0U)
&& pcmk_is_set(rsc->priv->scheduler->flags,
pcmk__sched_fencing_enabled)
&& (interval_ms != 0)
&& pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler,
rsc->id);
if (remote_node && !pcmk_is_set(remote_node->priv->flags,
pcmk__node_remote_fenced)) {
if (is_last_failure) {
crm_info("Waiting to clear monitor failure for remote node %s"
" until fencing has occurred", rsc->id);
}
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Check operation age and schedule failure clearing when appropriate
*
* This function has two distinct purposes. The first is to check whether an
* operation history entry is expired (i.e. the resource has a failure timeout,
* the entry is older than the timeout, and the resource either has no fail
* count or its fail count is entirely older than the timeout). The second is to
* schedule fail count clearing when appropriate (i.e. the operation is expired
* and either the resource has an expired fail count or the operation is a
* last_failure for a remote connection resource with a reconnect interval,
* or the operation is a last_failure for a start or monitor operation and the
* resource's parameters have changed since the operation).
*
* \param[in,out] history Parsed action result history
*
* \return true if operation history entry is expired, otherwise false
*/
static bool
check_operation_expiry(struct action_history *history)
{
bool expired = false;
bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
time_t last_run = 0;
int unexpired_fail_count = 0;
const char *clear_reason = NULL;
const guint expiration_sec =
history->rsc->priv->failure_expiration_ms / 1000;
pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
pcmk__rsc_trace(history->rsc,
"Resource history entry %s on %s is not expired: "
"Not Installed does not expire",
history->id, pcmk__node_name(history->node));
return false; // "Not installed" must always be cleared manually
}
if ((expiration_sec > 0)
&& (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE,
&last_run) == 0)) {
/* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
* timestamp
*/
time_t now = get_effective_time(scheduler);
time_t last_failure = 0;
// Is this particular operation history older than the failure timeout?
if ((now >= (last_run + expiration_sec))
&& !should_ignore_failure_timeout(history->rsc, history->task,
history->interval_ms,
is_last_failure)) {
expired = true;
}
// Does the resource as a whole have an unexpired fail count?
unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
&last_failure,
pcmk__fc_effective,
history->xml);
// Update scheduler recheck time according to *last* failure
crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d "
"expiration=%s last-failure@%lld",
history->id, (long long) last_run, (expired? "" : "not "),
(long long) now, unexpired_fail_count,
pcmk__readable_interval(expiration_sec * 1000),
(long long) last_failure);
last_failure += expiration_sec + 1;
if (unexpired_fail_count && (now < last_failure)) {
pe__update_recheck_time(last_failure, scheduler,
"fail count expiration");
}
}
if (expired) {
if (pe_get_failcount(history->node, history->rsc, NULL,
pcmk__fc_default, history->xml)) {
// There is a fail count ignoring timeout
if (unexpired_fail_count == 0) {
// There is no fail count considering timeout
clear_reason = "it expired";
} else {
/* This operation is old, but there is an unexpired fail count.
* In a properly functioning cluster, this should only be
* possible if this operation is not a failure (otherwise the
* fail count should be expired too), so this is really just a
* failsafe.
*/
pcmk__rsc_trace(history->rsc,
"Resource history entry %s on %s is not "
"expired: Unexpired fail count",
history->id, pcmk__node_name(history->node));
expired = false;
}
} else if (is_last_failure
&& (history->rsc->priv->remote_reconnect_ms > 0U)) {
/* Clear any expired last failure when reconnect interval is set,
* even if there is no fail count.
*/
clear_reason = "reconnect interval is set";
}
}
if (!expired && is_last_failure
&& should_clear_for_param_change(history->xml, history->task,
history->rsc, history->node)) {
clear_reason = "resource parameters have changed";
}
if (clear_reason != NULL) {
pcmk_action_t *clear_op = NULL;
// Schedule clearing of the fail count
clear_op = pe__clear_failcount(history->rsc, history->node,
clear_reason, scheduler);
if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)
&& (history->rsc->priv->remote_reconnect_ms > 0)) {
/* If we're clearing a remote connection due to a reconnect
* interval, we want to wait until any scheduled fencing
* completes.
*
* We could limit this to remote_node->details->unclean, but at
* this point, that's always true (it won't be reliable until
* after unpack_node_history() is done).
*/
crm_info("Clearing %s failure will wait until any scheduled "
"fencing of %s completes",
history->task, history->rsc->id);
order_after_remote_fencing(clear_op, history->rsc, scheduler);
}
}
if (expired && (history->interval_ms == 0)
&& pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
switch (history->exit_status) {
case PCMK_OCF_OK:
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_PROMOTED:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_PROMOTED:
// Don't expire probes that return these values
pcmk__rsc_trace(history->rsc,
"Resource history entry %s on %s is not "
"expired: Probe result",
history->id, pcmk__node_name(history->node));
expired = false;
break;
}
}
return expired;
}
int
pe__target_rc_from_xml(const xmlNode *xml_op)
{
int target_rc = 0;
const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
if (key == NULL) {
return -1;
}
decode_transition_key(key, NULL, NULL, NULL, &target_rc);
return target_rc;
}
/*!
* \internal
* \brief Update a resource's state for an action result
*
* \param[in,out] history Parsed action history entry
* \param[in] exit_status Exit status to base new state on
* \param[in] last_failure Resource's last_failure entry, if known
* \param[in,out] on_fail Resource's current failure handling
*/
static void
update_resource_state(struct action_history *history, int exit_status,
const xmlNode *last_failure,
enum pcmk__on_fail *on_fail)
{
bool clear_past_failure = false;
if ((exit_status == PCMK_OCF_NOT_INSTALLED)
|| (!pcmk__is_bundled(history->rsc)
&& pcmk_xe_mask_probe_failure(history->xml))) {
history->rsc->priv->orig_role = pcmk_role_stopped;
} else if (exit_status == PCMK_OCF_NOT_RUNNING) {
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
pcmk__str_none)) {
if ((last_failure != NULL)
&& pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
pcmk__str_none)) {
clear_past_failure = true;
}
if (history->rsc->priv->orig_role < pcmk_role_started) {
set_active(history->rsc);
}
} else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
history->rsc->priv->orig_role = pcmk_role_started;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
history->rsc->priv->orig_role = pcmk_role_stopped;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
pcmk__str_none)) {
history->rsc->priv->orig_role = pcmk_role_promoted;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
pcmk__str_none)) {
if (*on_fail == pcmk__on_fail_demote) {
/* Demote clears an error only if
* PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
*/
clear_past_failure = true;
}
history->rsc->priv->orig_role = pcmk_role_unpromoted;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_none)) {
history->rsc->priv->orig_role = pcmk_role_started;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
pcmk__str_none)) {
unpack_migrate_to_success(history);
} else if (history->rsc->priv->orig_role < pcmk_role_started) {
pcmk__rsc_trace(history->rsc, "%s active on %s",
history->rsc->id, pcmk__node_name(history->node));
set_active(history->rsc);
}
if (!clear_past_failure) {
return;
}
switch (*on_fail) {
case pcmk__on_fail_stop:
case pcmk__on_fail_ban:
case pcmk__on_fail_standby_node:
case pcmk__on_fail_fence_node:
pcmk__rsc_trace(history->rsc,
"%s (%s) is not cleared by a completed %s",
history->rsc->id, pcmk__on_fail_text(*on_fail),
history->task);
break;
case pcmk__on_fail_block:
case pcmk__on_fail_ignore:
case pcmk__on_fail_demote:
case pcmk__on_fail_restart:
case pcmk__on_fail_restart_container:
*on_fail = pcmk__on_fail_ignore;
pe__set_next_role(history->rsc, pcmk_role_unknown,
"clear past failures");
break;
case pcmk__on_fail_reset_remote:
if (history->rsc->priv->remote_reconnect_ms == 0U) {
/* With no reconnect interval, the connection is allowed to
* start again after the remote node is fenced and
* completely stopped. (With a reconnect interval, we wait
* for the failure to be cleared entirely before attempting
* to reconnect.)
*/
*on_fail = pcmk__on_fail_ignore;
pe__set_next_role(history->rsc, pcmk_role_unknown,
"clear past failures and reset remote");
}
break;
}
}
/*!
* \internal
* \brief Check whether a given history entry matters for resource state
*
* \param[in] history Parsed action history entry
*
* \return true if action can affect resource state, otherwise false
*/
static inline bool
can_affect_state(struct action_history *history)
{
return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
PCMK_ACTION_START, PCMK_ACTION_STOP,
PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
"asyncmon", NULL);
}
/*!
* \internal
* \brief Unpack execution/exit status and exit reason from a history entry
*
* \param[in,out] history Action history entry to unpack
*
* \return Standard Pacemaker return code
*/
static int
unpack_action_result(struct action_history *history)
{
if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
&(history->execution_status)) < 0)
|| (history->execution_status < PCMK_EXEC_PENDING)
|| (history->execution_status > PCMK_EXEC_MAX)
|| (history->execution_status == PCMK_EXEC_CANCELLED)) {
pcmk__config_err("Ignoring resource history entry %s for %s on %s "
"with invalid " PCMK__XA_OP_STATUS " '%s'",
history->id, history->rsc->id,
pcmk__node_name(history->node),
pcmk__s(crm_element_value(history->xml,
PCMK__XA_OP_STATUS),
""));
return pcmk_rc_unpack_error;
}
if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
&(history->exit_status)) < 0)
|| (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
pcmk__config_err("Ignoring resource history entry %s for %s on %s "
"with invalid " PCMK__XA_RC_CODE " '%s'",
history->id, history->rsc->id,
pcmk__node_name(history->node),
pcmk__s(crm_element_value(history->xml,
PCMK__XA_RC_CODE),
""));
return pcmk_rc_unpack_error;
}
history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Process an action history entry whose result expired
*
* \param[in,out] history Parsed action history entry
* \param[in] orig_exit_status Action exit status before remapping
*
* \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
* entry needs no further processing)
*/
static int
process_expired_result(struct action_history *history, int orig_exit_status)
{
if (!pcmk__is_bundled(history->rsc)
&& pcmk_xe_mask_probe_failure(history->xml)
&& (orig_exit_status != history->expected_exit_status)) {
if (history->rsc->priv->orig_role <= pcmk_role_stopped) {
history->rsc->priv->orig_role = pcmk_role_unknown;
}
crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
"Masked failure expired",
history->id, history->rsc->id,
pcmk__node_name(history->node));
return pcmk_rc_ok;
}
if (history->exit_status == history->expected_exit_status) {
return pcmk_rc_undetermined; // Only failures expire
}
if (history->interval_ms == 0) {
crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
"Expired failure",
history->id, history->task, history->rsc->id,
pcmk__node_name(history->node));
return pcmk_rc_ok;
}
if (history->node->details->online && !history->node->details->unclean) {
/* Reschedule the recurring action. schedule_cancel() won't work at
* this stage, so as a hacky workaround, forcibly change the restart
* digest so pcmk__check_action_config() does what we want later.
*
* @TODO We should skip this if there is a newer successful monitor.
* Also, this causes rescheduling only if the history entry
* has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
* scheduler regression test doesn't, but that may not be a
* realistic scenario in production).
*/
crm_notice("Rescheduling %s-interval %s of %s on %s "
"after failure expired",
pcmk__readable_interval(history->interval_ms), history->task,
history->rsc->id, pcmk__node_name(history->node));
crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST,
"calculated-failure-timeout");
return pcmk_rc_ok;
}
return pcmk_rc_undetermined;
}
/*!
* \internal
* \brief Process a masked probe failure
*
* \param[in,out] history Parsed action history entry
* \param[in] orig_exit_status Action exit status before remapping
* \param[in] last_failure Resource's last_failure entry, if known
* \param[in,out] on_fail Resource's current failure handling
*/
static void
mask_probe_failure(struct action_history *history, int orig_exit_status,
const xmlNode *last_failure,
enum pcmk__on_fail *on_fail)
{
pcmk_resource_t *ban_rsc = history->rsc;
if (!pcmk_is_set(history->rsc->flags, pcmk__rsc_unique)) {
ban_rsc = uber_parent(history->rsc);
}
crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
- services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
+ crm_exit_str(orig_exit_status), history->rsc->id,
pcmk__node_name(history->node));
update_resource_state(history, history->expected_exit_status, last_failure,
on_fail);
crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name);
record_failed_op(history);
resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
"masked-probe-failure", ban_rsc->priv->scheduler);
}
/*!
* \internal Check whether a given failure is for a given pending action
*
* \param[in] history Parsed history entry for pending action
* \param[in] last_failure Resource's last_failure entry, if known
*
* \return true if \p last_failure is failure of pending action in \p history,
* otherwise false
* \note Both \p history and \p last_failure must come from the same
* \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
* the same.
*/
static bool
failure_is_newer(const struct action_history *history,
const xmlNode *last_failure)
{
guint failure_interval_ms = 0U;
long long failure_change = 0LL;
long long this_change = 0LL;
if (last_failure == NULL) {
return false; // Resource has no last_failure entry
}
if (!pcmk__str_eq(history->task,
crm_element_value(last_failure, PCMK_XA_OPERATION),
pcmk__str_none)) {
return false; // last_failure is for different action
}
if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
&failure_interval_ms) != pcmk_ok)
|| (history->interval_ms != failure_interval_ms)) {
return false; // last_failure is for action with different interval
}
if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE),
&this_change, 0LL) != pcmk_rc_ok)
|| (pcmk__scan_ll(crm_element_value(last_failure,
PCMK_XA_LAST_RC_CHANGE),
&failure_change, 0LL) != pcmk_rc_ok)
|| (failure_change < this_change)) {
return false; // Failure is not known to be newer
}
return true;
}
/*!
* \internal
* \brief Update a resource's role etc. for a pending action
*
* \param[in,out] history Parsed history entry for pending action
* \param[in] last_failure Resource's last_failure entry, if known
*/
static void
process_pending_action(struct action_history *history,
const xmlNode *last_failure)
{
/* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
* and there might be a RSC_monitor_INTERVAL entry with the last successful
* or pending result.
*
* If last_failure contains the failure of the pending recurring monitor
* we're processing here, and is newer, the action is no longer pending.
* (Pending results have call ID -1, which sorts last, so the last failure
* if any should be known.)
*/
if (failure_is_newer(history, last_failure)) {
return;
}
if (strcmp(history->task, PCMK_ACTION_START) == 0) {
pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending);
set_active(history->rsc);
} else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
history->rsc->priv->orig_role = pcmk_role_promoted;
} else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
&& history->node->details->unclean) {
/* A migrate_to action is pending on a unclean source, so force a stop
* on the target.
*/
const char *migrate_target = NULL;
pcmk_node_t *target = NULL;
migrate_target = crm_element_value(history->xml,
PCMK__META_MIGRATE_TARGET);
target = pcmk_find_node(history->rsc->priv->scheduler,
migrate_target);
if (target != NULL) {
stop_action(history->rsc, target, FALSE);
}
}
if (history->rsc->priv->pending_action != NULL) {
/* There should never be multiple pending actions, but as a failsafe,
* just remember the first one processed for display purposes.
*/
return;
}
if (pcmk_is_probe(history->task, history->interval_ms)) {
/* Pending probes are currently never displayed, even if pending
* operations are requested. If we ever want to change that,
* enable the below and the corresponding part of
* native.c:native_pending_action().
*/
#if 0
history->rsc->private->pending_action = strdup("probe");
history->rsc->private->pending_node = history->node;
#endif
} else {
history->rsc->priv->pending_action = strdup(history->task);
history->rsc->priv->pending_node = history->node;
}
}
static void
unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
xmlNode **last_failure, enum pcmk__on_fail *on_fail)
{
int old_rc = 0;
bool expired = false;
pcmk_resource_t *parent = rsc;
enum rsc_role_e fail_role = pcmk_role_unknown;
enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart;
struct action_history history = {
.rsc = rsc,
.node = node,
.xml = xml_op,
.execution_status = PCMK_EXEC_UNKNOWN,
};
CRM_CHECK(rsc && node && xml_op, return);
history.id = pcmk__xe_id(xml_op);
if (history.id == NULL) {
pcmk__config_err("Ignoring resource history entry for %s on %s "
"without ID", rsc->id, pcmk__node_name(node));
return;
}
// Task and interval
history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
if (history.task == NULL) {
pcmk__config_err("Ignoring resource history entry %s for %s on %s "
"without " PCMK_XA_OPERATION,
history.id, rsc->id, pcmk__node_name(node));
return;
}
crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
if (!can_affect_state(&history)) {
pcmk__rsc_trace(rsc,
"Ignoring resource history entry %s for %s on %s "
"with irrelevant action '%s'",
history.id, rsc->id, pcmk__node_name(node),
history.task);
return;
}
if (unpack_action_result(&history) != pcmk_rc_ok) {
return; // Error already logged
}
history.expected_exit_status = pe__target_rc_from_xml(xml_op);
history.key = pcmk__xe_history_key(xml_op);
crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
history.id, history.task, history.call_id,
pcmk__node_name(node),
pcmk_exec_status_str(history.execution_status),
crm_exit_str(history.exit_status));
if (node->details->unclean) {
pcmk__rsc_trace(rsc,
"%s is running on %s, which is unclean (further action "
"depends on value of stop's on-fail attribute)",
rsc->id, pcmk__node_name(node));
}
expired = check_operation_expiry(&history);
old_rc = history.exit_status;
remap_operation(&history, on_fail, expired);
if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
goto done;
}
if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
mask_probe_failure(&history, old_rc, *last_failure, on_fail);
goto done;
}
if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
parent = uber_parent(rsc);
}
switch (history.execution_status) {
case PCMK_EXEC_PENDING:
process_pending_action(&history, *last_failure);
goto done;
case PCMK_EXEC_DONE:
update_resource_state(&history, history.exit_status, *last_failure,
on_fail);
goto done;
case PCMK_EXEC_NOT_INSTALLED:
unpack_failure_handling(&history, &failure_strategy, &fail_role);
if (failure_strategy == pcmk__on_fail_ignore) {
crm_warn("Cannot ignore failed %s of %s on %s: "
"Resource agent doesn't exist "
QB_XS " status=%d rc=%d id=%s",
history.task, rsc->id, pcmk__node_name(node),
history.execution_status, history.exit_status,
history.id);
/* Also for printing it as "FAILED" by marking it as
* pcmk__rsc_failed later
*/
*on_fail = pcmk__on_fail_ban;
}
resource_location(parent, node, -PCMK_SCORE_INFINITY,
"hard-error", rsc->priv->scheduler);
unpack_rsc_op_failure(&history, failure_strategy, fail_role,
last_failure, on_fail);
goto done;
case PCMK_EXEC_NOT_CONNECTED:
if (pcmk__is_pacemaker_remote_node(node)
&& pcmk_is_set(node->priv->remote->flags,
pcmk__rsc_managed)) {
/* We should never get into a situation where a managed remote
* connection resource is considered OK but a resource action
* behind the connection gets a "not connected" status. But as a
* fail-safe in case a bug or unusual circumstances do lead to
* that, ensure the remote connection is considered failed.
*/
pcmk__set_rsc_flags(node->priv->remote,
pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
}
break; // Not done, do error handling
case PCMK_EXEC_ERROR:
case PCMK_EXEC_ERROR_HARD:
case PCMK_EXEC_ERROR_FATAL:
case PCMK_EXEC_TIMEOUT:
case PCMK_EXEC_NOT_SUPPORTED:
case PCMK_EXEC_INVALID:
break; // Not done, do error handling
default: // No other value should be possible at this point
break;
}
unpack_failure_handling(&history, &failure_strategy, &fail_role);
if ((failure_strategy == pcmk__on_fail_ignore)
|| ((failure_strategy == pcmk__on_fail_restart_container)
&& (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
char *last_change_s = last_change_str(xml_op);
crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
QB_XS " %s",
- history.task, services_ocf_exitcode_str(history.exit_status),
+ history.task, crm_exit_str(history.exit_status),
(pcmk__str_empty(history.exit_reason)? "" : ": "),
pcmk__s(history.exit_reason, ""), rsc->id,
pcmk__node_name(node), last_change_s, history.id);
free(last_change_s);
update_resource_state(&history, history.expected_exit_status,
*last_failure, on_fail);
crm_xml_add(xml_op, PCMK_XA_UNAME, node->priv->name);
pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure);
record_failed_op(&history);
if ((failure_strategy == pcmk__on_fail_restart_container)
&& (*on_fail <= pcmk__on_fail_restart)) {
*on_fail = failure_strategy;
}
} else {
unpack_rsc_op_failure(&history, failure_strategy, fail_role,
last_failure, on_fail);
if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
uint8_t log_level = LOG_ERR;
if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
log_level = LOG_NOTICE;
}
do_crm_log(log_level,
"Preventing %s from restarting on %s because "
"of hard failure (%s%s%s) " QB_XS " %s",
parent->id, pcmk__node_name(node),
- services_ocf_exitcode_str(history.exit_status),
+ crm_exit_str(history.exit_status),
(pcmk__str_empty(history.exit_reason)? "" : ": "),
pcmk__s(history.exit_reason, ""), history.id);
resource_location(parent, node, -PCMK_SCORE_INFINITY,
"hard-error", rsc->priv->scheduler);
} else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
pcmk__sched_err(rsc->priv->scheduler,
"Preventing %s from restarting anywhere because "
"of fatal failure (%s%s%s) " QB_XS " %s",
- parent->id,
- services_ocf_exitcode_str(history.exit_status),
+ parent->id, crm_exit_str(history.exit_status),
(pcmk__str_empty(history.exit_reason)? "" : ": "),
pcmk__s(history.exit_reason, ""), history.id);
resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
"fatal-error", rsc->priv->scheduler);
}
}
done:
pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
rsc->id, pcmk__node_name(node), history.id,
pcmk_role_text(rsc->priv->orig_role),
pcmk_role_text(rsc->priv->next_role));
}
/*!
* \internal
* \brief Insert a node attribute with value into a \c GHashTable
*
* \param[in,out] key Key to insert (either freed or owned by
* \p user_data upon return)
* \param[in] value Value to insert (owned by \p user_data upon return)
* \param[in] user_data \c GHashTable to insert into
*/
static gboolean
insert_attr(gpointer key, gpointer value, gpointer user_data)
{
GHashTable *table = user_data;
g_hash_table_insert(table, key, value);
return TRUE;
}
static void
add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
pcmk_scheduler_t *scheduler)
{
const char *cluster_name = NULL;
const char *dc_id = crm_element_value(scheduler->input, PCMK_XA_DC_UUID);
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.now = scheduler->priv->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
pcmk__insert_dup(node->priv->attrs,
CRM_ATTR_UNAME, node->priv->name);
pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id);
if ((scheduler->dc_node == NULL)
&& pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) {
scheduler->dc_node = node;
pcmk__insert_dup(node->priv->attrs,
CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
} else if (!pcmk__same_node(node, scheduler->dc_node)) {
pcmk__insert_dup(node->priv->attrs,
CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
}
cluster_name = g_hash_table_lookup(scheduler->priv->options,
PCMK_OPT_CLUSTER_NAME);
if (cluster_name) {
pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME,
cluster_name);
}
if (overwrite) {
/* @TODO Try to reorder some unpacking so that we don't need the
* overwrite argument or to unpack into a temporary table
*/
GHashTable *unpacked = pcmk__strkey_table(free, free);
pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
&rule_data, unpacked, NULL, scheduler);
g_hash_table_foreach_steal(unpacked, insert_attr, node->priv->attrs);
g_hash_table_destroy(unpacked);
} else {
pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
&rule_data, node->priv->attrs, NULL,
scheduler);
}
pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data,
node->priv->utilization, NULL, scheduler);
if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
pcmk__rsc_node_current) == NULL) {
const char *site_name = pcmk__node_attr(node, "site-name", NULL,
pcmk__rsc_node_current);
if (site_name) {
pcmk__insert_dup(node->priv->attrs,
CRM_ATTR_SITE_NAME, site_name);
} else if (cluster_name) {
/* Default to cluster-name if unset */
pcmk__insert_dup(node->priv->attrs,
CRM_ATTR_SITE_NAME, cluster_name);
}
}
}
static GList *
extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
{
int counter = -1;
int stop_index = -1;
int start_index = -1;
xmlNode *rsc_op = NULL;
GList *gIter = NULL;
GList *op_list = NULL;
GList *sorted_op_list = NULL;
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL);
rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) {
crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
/* create active recurring operations as optional */
if (active_filter == FALSE) {
return sorted_op_list;
}
op_list = NULL;
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
if (start_index < stop_index) {
crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
break;
} else if (counter < start_index) {
crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
continue;
}
op_list = g_list_append(op_list, rsc_op);
}
g_list_free(sorted_op_list);
return op_list;
}
GList *
find_operations(const char *rsc, const char *node, gboolean active_filter,
pcmk_scheduler_t *scheduler)
{
GList *output = NULL;
GList *intermediate = NULL;
xmlNode *tmp = NULL;
xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
NULL, NULL);
pcmk_node_t *this_node = NULL;
xmlNode *node_state = NULL;
CRM_CHECK(status != NULL, return NULL);
for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL);
node_state != NULL; node_state = pcmk__xe_next(node_state)) {
if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) {
const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
continue;
}
this_node = pcmk_find_node(scheduler, uname);
if(this_node == NULL) {
CRM_LOG_ASSERT(this_node != NULL);
continue;
} else if (pcmk__is_pacemaker_remote_node(this_node)) {
determine_remote_online_status(scheduler, this_node);
} else {
determine_online_status(node_state, this_node, scheduler);
}
if (this_node->details->online
|| pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
/* offline nodes run no resources...
* unless stonith is enabled in which case we need to
* make sure rsc start events happen after the stonith
*/
xmlNode *lrm_rsc = NULL;
tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
NULL);
tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
NULL);
for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL);
lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
const char *rsc_id = crm_element_value(lrm_rsc,
PCMK_XA_ID);
if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
continue;
}
intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
output = g_list_concat(output, intermediate);
}
}
}
}
}
return output;
}
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index 0e4b80f20f..433b0cb681 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -1,2188 +1,2188 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include // pcmk__ends_with_ext()
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include // stonith__*
#include "crm_mon.h"
#define SUMMARY "Provides a summary of cluster's current state.\n\n" \
"Outputs varying levels of detail in a number of different formats."
/*
* Definitions indicating which items to print
*/
static uint32_t show;
static uint32_t show_opts = pcmk_show_pending;
/*
* Definitions indicating how to output
*/
static mon_output_format_t output_format = mon_output_unset;
/* other globals */
static GIOChannel *io_channel = NULL;
static GMainLoop *mainloop = NULL;
static guint reconnect_timer = 0;
static mainloop_timer_t *refresh_timer = NULL;
static enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid;
static cib_t *cib = NULL;
static stonith_t *st = NULL;
static xmlNode *current_cib = NULL;
static GError *error = NULL;
static pcmk__common_args_t *args = NULL;
static pcmk__output_t *out = NULL;
static GOptionContext *context = NULL;
static gchar **processed_args = NULL;
static time_t last_refresh = 0;
volatile crm_trigger_t *refresh_trigger = NULL;
static pcmk_scheduler_t *scheduler = NULL;
static enum pcmk__fence_history fence_history = pcmk__fence_history_none;
int interactive_fence_level = 0;
static pcmk__supported_format_t formats[] = {
#if CURSES_ENABLED
CRM_MON_SUPPORTED_FORMAT_CURSES,
#endif
PCMK__SUPPORTED_FORMAT_HTML,
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_default(pcmk__output_t *out, va_list args)
{
return pcmk_rc_no_output;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_html(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
if (out->dest != stdout) {
out->reset(out);
}
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN,
"Not connected to CIB");
if (desc != NULL) {
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ": ");
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, desc);
}
if (state != pcmk_pacemakerd_state_invalid) {
const char *state_s = pcmk__pcmkd_state_enum2friendly(state);
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, " (");
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, state_s);
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ")");
}
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_text(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
int rc = pcmk_rc_ok;
if (out->dest != stdout) {
out->reset(out);
}
if (state != pcmk_pacemakerd_state_invalid) {
rc = out->info(out, "Not connected to CIB%s%s (%s)",
(desc != NULL)? ": " : "", pcmk__s(desc, ""),
pcmk__pcmkd_state_enum2friendly(state));
} else {
rc = out->info(out, "Not connected to CIB%s%s",
(desc != NULL)? ": " : "", pcmk__s(desc, ""));
}
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return rc;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_xml(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = NULL;
if (out->dest != stdout) {
out->reset(out);
}
if (state != pcmk_pacemakerd_state_invalid) {
state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state);
}
pcmk__output_create_xml_node(out, PCMK_XE_CRM_MON_DISCONNECTED,
PCMK_XA_DESCRIPTION, desc,
PCMK_XA_PACEMAKERD_STATE, state_s,
NULL);
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "crm-mon-disconnected", "default", crm_mon_disconnected_default },
{ "crm-mon-disconnected", "html", crm_mon_disconnected_html },
{ "crm-mon-disconnected", "text", crm_mon_disconnected_text },
{ "crm-mon-disconnected", "xml", crm_mon_disconnected_xml },
{ NULL, NULL, NULL },
};
#define RECONNECT_MSECS 5000
struct {
guint reconnect_ms;
enum mon_exec_mode exec_mode;
gboolean fence_connect;
gboolean print_pending;
gboolean show_bans;
gboolean watch_fencing;
char *pid_file;
char *external_agent;
char *external_recipient;
char *neg_location_prefix;
char *only_node;
char *only_rsc;
GSList *user_includes_excludes;
GSList *includes_excludes;
} options = {
.reconnect_ms = RECONNECT_MSECS,
.exec_mode = mon_exec_unset,
.fence_connect = TRUE,
};
static crm_exit_t clean_up(crm_exit_t exit_code);
static void crm_diff_update(const char *event, xmlNode * msg);
static void clean_up_on_connection_failure(int rc);
static int mon_refresh_display(gpointer user_data);
static int setup_cib_connection(void);
static int setup_fencer_connection(void);
static int setup_api_connections(void);
static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
static void refresh_after_event(gboolean data_updated, gboolean enforce);
static uint32_t
all_includes(mon_output_format_t fmt) {
if ((fmt == mon_output_plain) || (fmt == mon_output_console)) {
return ~pcmk_section_options;
} else {
return pcmk_section_all;
}
}
static uint32_t
default_includes(mon_output_format_t fmt) {
switch (fmt) {
case mon_output_plain:
case mon_output_console:
case mon_output_html:
return pcmk_section_summary
|pcmk_section_nodes
|pcmk_section_resources
|pcmk_section_failures;
case mon_output_xml:
return all_includes(fmt);
default:
return 0;
}
}
struct {
const char *name;
uint32_t bit;
} sections[] = {
{ "attributes", pcmk_section_attributes },
{ "bans", pcmk_section_bans },
{ "counts", pcmk_section_counts },
{ "dc", pcmk_section_dc },
{ "failcounts", pcmk_section_failcounts },
{ "failures", pcmk_section_failures },
{ PCMK_VALUE_FENCING, pcmk_section_fencing_all },
{ "fencing-failed", pcmk_section_fence_failed },
{ "fencing-pending", pcmk_section_fence_pending },
{ "fencing-succeeded", pcmk_section_fence_worked },
{ "maint-mode", pcmk_section_maint_mode },
{ "nodes", pcmk_section_nodes },
{ "operations", pcmk_section_operations },
{ "options", pcmk_section_options },
{ "resources", pcmk_section_resources },
{ "stack", pcmk_section_stack },
{ "summary", pcmk_section_summary },
{ "tickets", pcmk_section_tickets },
{ "times", pcmk_section_times },
{ NULL }
};
static uint32_t
find_section_bit(const char *name) {
for (int i = 0; sections[i].name != NULL; i++) {
if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) {
return sections[i].bit;
}
}
return 0;
}
static gboolean
apply_exclude(const gchar *excludes, GError **error) {
char **parts = NULL;
gboolean result = TRUE;
parts = g_strsplit(excludes, ",", 0);
for (char **s = parts; *s != NULL; s++) {
uint32_t bit = find_section_bit(*s);
if (pcmk__str_eq(*s, "all", pcmk__str_none)) {
show = 0;
} else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) {
show = all_includes(output_format);
} else if (bit != 0) {
show &= ~bit;
} else {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--exclude options: all, attributes, bans, counts, dc, "
"failcounts, failures, fencing, fencing-failed, "
"fencing-pending, fencing-succeeded, maint-mode, nodes, "
PCMK_VALUE_NONE ", operations, options, resources, "
"stack, summary, tickets, times");
result = FALSE;
break;
}
}
g_strfreev(parts);
return result;
}
static gboolean
apply_include(const gchar *includes, GError **error) {
char **parts = NULL;
gboolean result = TRUE;
parts = g_strsplit(includes, ",", 0);
for (char **s = parts; *s != NULL; s++) {
uint32_t bit = find_section_bit(*s);
if (pcmk__str_eq(*s, "all", pcmk__str_none)) {
show = all_includes(output_format);
} else if (pcmk__starts_with(*s, "bans")) {
show |= pcmk_section_bans;
if (options.neg_location_prefix != NULL) {
free(options.neg_location_prefix);
options.neg_location_prefix = NULL;
}
if (strlen(*s) > 4 && (*s)[4] == ':') {
options.neg_location_prefix = strdup(*s+5);
}
} else if (pcmk__str_any_of(*s, PCMK_VALUE_DEFAULT, "defaults", NULL)) {
show |= default_includes(output_format);
} else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) {
show = 0;
} else if (bit != 0) {
show |= bit;
} else {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--include options: all, attributes, bans[:PREFIX], counts, dc, "
PCMK_VALUE_DEFAULT ", failcounts, failures, fencing, "
"fencing-failed, fencing-pending, fencing-succeeded, "
"maint-mode, nodes, " PCMK_VALUE_NONE ", operations, "
"options, resources, stack, summary, tickets, times");
result = FALSE;
break;
}
}
g_strfreev(parts);
return result;
}
static gboolean
apply_include_exclude(GSList *lst, GError **error) {
gboolean rc = TRUE;
GSList *node = lst;
while (node != NULL) {
char *s = node->data;
if (pcmk__starts_with(s, "--include=")) {
rc = apply_include(s+10, error);
} else if (pcmk__starts_with(s, "-I=")) {
rc = apply_include(s+3, error);
} else if (pcmk__starts_with(s, "--exclude=")) {
rc = apply_exclude(s+10, error);
} else if (pcmk__starts_with(s, "-U=")) {
rc = apply_exclude(s+3, error);
}
if (rc != TRUE) {
break;
}
node = node->next;
}
return rc;
}
static gboolean
user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
char *s = crm_strdup_printf("%s=%s", option_name, optarg);
options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s);
return TRUE;
}
static gboolean
include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
char *s = crm_strdup_printf("%s=%s", option_name, optarg);
options.includes_excludes = g_slist_append(options.includes_excludes, s);
return TRUE;
}
static gboolean
as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "xml");
output_format = mon_output_legacy_xml;
return TRUE;
}
static gboolean
fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
if (optarg == NULL) {
interactive_fence_level = 2;
} else {
pcmk__scan_min_int(optarg, &interactive_fence_level, 0);
}
switch (interactive_fence_level) {
case 3:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", PCMK_VALUE_FENCING, data,
err);
case 2:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", PCMK_VALUE_FENCING, data,
err);
case 1:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err);
case 0:
options.fence_connect = FALSE;
fence_history = pcmk__fence_history_none;
return include_exclude_cb("--exclude", PCMK_VALUE_FENCING, data,
err);
default:
g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3");
return FALSE;
}
}
static gboolean
group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_rscs_by_node;
return TRUE;
}
static gboolean
hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--exclude", "summary", data, err);
}
static gboolean
inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_inactive_rscs;
return TRUE;
}
static gboolean
print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_brief;
return TRUE;
}
static gboolean
print_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_details;
return TRUE;
}
static gboolean
print_description_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_description;
return TRUE;
}
static gboolean
print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_timing;
return user_include_exclude_cb("--include", "operations", data, err);
}
static gboolean
reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
int rc = crm_get_msec(optarg);
if (rc == -1) {
g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg);
return FALSE;
} else {
pcmk_parse_interval_spec(optarg, &options.reconnect_ms);
if (options.exec_mode != mon_exec_daemonized) {
// Reconnect interval applies to daemonized too, so don't override
options.exec_mode = mon_exec_update;
}
}
return TRUE;
}
/*!
* \internal
* \brief Enable one-shot mode
*
* \param[in] option_name Name of option being parsed (ignored)
* \param[in] optarg Value to be parsed (ignored)
* \param[in] data User data (ignored)
* \param[out] err Where to store error (ignored)
*/
static gboolean
one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **err)
{
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
/*!
* \internal
* \brief Enable daemonized mode
*
* \param[in] option_name Name of option being parsed (ignored)
* \param[in] optarg Value to be parsed (ignored)
* \param[in] data User data (ignored)
* \param[out] err Where to store error (ignored)
*/
static gboolean
daemonize_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **err)
{
options.exec_mode = mon_exec_daemonized;
return TRUE;
}
static gboolean
show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "attributes", data, err);
}
static gboolean
show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
if (optarg != NULL) {
char *s = crm_strdup_printf("bans:%s", optarg);
gboolean rc = user_include_exclude_cb("--include", s, data, err);
free(s);
return rc;
} else {
return user_include_exclude_cb("--include", "bans", data, err);
}
}
static gboolean
show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "failcounts", data, err);
}
static gboolean
show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "failcounts,operations", data, err);
}
static gboolean
show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "tickets", data, err);
}
static gboolean
use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
setenv("CIB_file", optarg, 1);
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
#define INDENT " "
/* *INDENT-OFF* */
static GOptionEntry addl_entries[] = {
{ "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb,
"Update frequency (default is 5 seconds)",
"TIMESPEC" },
{ "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
one_shot_cb,
"Display the cluster status once and exit",
NULL },
{ "daemonize", 'd', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
daemonize_cb,
"Run in the background as a daemon.\n"
INDENT "Requires at least one of --output-to and --external-agent.",
NULL },
{ "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file,
"(Advanced) Daemon pid file location",
"FILE" },
{ "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent,
"A program to run when resource operations take place",
"FILE" },
{ "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient,
"A recipient for your program (assuming you want the program to send something to someone).",
"RCPT" },
{ "watch-fencing", 'W', 0, G_OPTION_ARG_NONE, &options.watch_fencing,
"Listen for fencing events. For use with --external-agent.",
NULL },
{ "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb,
NULL,
NULL },
{ NULL }
};
static GOptionEntry display_entries[] = {
{ "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb,
"A list of sections to include in the output.\n"
INDENT "See `Output Control` help for more information.",
"SECTION(s)" },
{ "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb,
"A list of sections to exclude from the output.\n"
INDENT "See `Output Control` help for more information.",
"SECTION(s)" },
{ "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node,
"When displaying information about nodes, show only what's related to the given\n"
INDENT "node, or to all nodes tagged with the given tag",
"NODE" },
{ "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc,
"When displaying information about resources, show only what's related to the given\n"
INDENT "resource, or to all resources tagged with the given tag",
"RSC" },
{ "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb,
"Group resources by node",
NULL },
{ "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb,
"Display inactive resources",
NULL },
{ "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb,
"Display resource fail counts",
NULL },
{ "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb,
"Display resource operation history",
NULL },
{ "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb,
"Display resource operation history with timing details",
NULL },
{ "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb,
"Display cluster tickets",
NULL },
{ "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb,
"Show fence history:\n"
INDENT "0=off, 1=failures and pending (default without option),\n"
INDENT "2=add successes (default without value for option),\n"
INDENT "3=show full history without reduction to most recent of each flavor",
"LEVEL" },
{ "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb,
"Display negative location constraints [optionally filtered by id prefix]",
NULL },
{ "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb,
"Display node attributes",
NULL },
{ "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb,
"Hide all headers",
NULL },
{ "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_detail_cb,
"Show more details (node IDs, individual clone instances)",
NULL },
{ "show-description", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_description_cb,
"Show resource descriptions",
NULL },
{ "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb,
"Brief output",
NULL },
{ "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending,
"Display pending state if '" PCMK_META_RECORD_PENDING "' is enabled",
NULL },
{ NULL }
};
static GOptionEntry deprecated_entries[] = {
/* @COMPAT resource-agents <4.15.0 uses --as-xml, so removing this option
* must wait until we no longer support building on any platforms that ship
* the older agents.
*/
{ "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb,
"Write cluster status as XML to stdout. This will enable one-shot mode.\n"
INDENT "Use --output-as=xml instead.",
NULL },
{ NULL }
};
/* *INDENT-ON* */
/* Reconnect to the CIB and fencing agent after reconnect_ms has passed. This sounds
* like it would be more broadly useful, but only ever happens after a disconnect via
* mon_cib_connection_destroy.
*/
static gboolean
reconnect_after_timeout(gpointer data)
{
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
out->transient(out, "Reconnecting...");
if (setup_api_connections() == pcmk_rc_ok) {
// Trigger redrawing the screen (needs reconnect_timer == 0)
reconnect_timer = 0;
refresh_after_event(FALSE, TRUE);
return G_SOURCE_REMOVE;
}
out->message(out, "crm-mon-disconnected",
"Latest connection attempt failed", pcmkd_state);
reconnect_timer = g_timeout_add(options.reconnect_ms,
reconnect_after_timeout, NULL);
return G_SOURCE_REMOVE;
}
/* Called from various places when we are disconnected from the CIB or from the
* fencing agent. If the CIB connection is still valid, this function will also
* attempt to sign off and reconnect.
*/
static void
mon_cib_connection_destroy(gpointer user_data)
{
const char *msg = "Connection to the cluster lost";
pcmkd_state = pcmk_pacemakerd_state_invalid;
/* No crm-mon-disconnected message for console; a working implementation
* is not currently worth the effort
*/
out->transient(out, "%s", msg);
out->message(out, "crm-mon-disconnected", msg, pcmkd_state);
if (refresh_timer != NULL) {
/* we'll trigger a refresh after reconnect */
mainloop_timer_stop(refresh_timer);
}
if (reconnect_timer) {
/* we'll trigger a new reconnect-timeout at the end */
g_source_remove(reconnect_timer);
reconnect_timer = 0;
}
/* the client API won't properly reconnect notifications if they are still
* in the table - so remove them
*/
if (st != NULL) {
if (st->state != stonith_disconnected) {
st->cmds->disconnect(st);
}
st->cmds->remove_notification(st, NULL);
}
if (cib) {
cib->cmds->signoff(cib);
reconnect_timer = g_timeout_add(options.reconnect_ms,
reconnect_after_timeout, NULL);
}
}
/* Signal handler installed into the mainloop for normal program shutdown */
static void
mon_shutdown(int nsig)
{
clean_up(CRM_EX_OK);
}
#if CURSES_ENABLED
static volatile sighandler_t ncurses_winch_handler;
/* Signal handler installed the regular way (not into the main loop) for when
* the screen is resized. Commonly, this happens when running in an xterm and
* the user changes its size.
*/
static void
mon_winresize(int nsig)
{
static int not_done;
int lines = 0, cols = 0;
if (!not_done++) {
if (ncurses_winch_handler)
/* the original ncurses WINCH signal handler does the
* magic of retrieving the new window size;
* otherwise, we'd have to use ioctl or tgetent */
(*ncurses_winch_handler) (SIGWINCH);
getmaxyx(stdscr, lines, cols);
resizeterm(lines, cols);
/* Alert the mainloop code we'd like the refresh_trigger to run next
* time the mainloop gets around to checking.
*/
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
}
not_done--;
}
#endif
static int
setup_fencer_connection(void)
{
int rc = pcmk_ok;
if (options.fence_connect && st == NULL) {
st = stonith_api_new();
}
if (!options.fence_connect || st == NULL || st->state != stonith_disconnected) {
return rc;
}
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_ok) {
crm_trace("Setting up stonith callbacks");
if (options.watch_fencing) {
st->cmds->register_notification(st,
PCMK__VALUE_ST_NOTIFY_DISCONNECT,
mon_st_callback_event);
st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE,
mon_st_callback_event);
} else {
st->cmds->register_notification(st,
PCMK__VALUE_ST_NOTIFY_DISCONNECT,
mon_st_callback_display);
st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY,
mon_st_callback_display);
}
} else {
stonith_api_delete(st);
st = NULL;
}
return rc;
}
static int
setup_cib_connection(void)
{
int rc = pcmk_rc_ok;
CRM_CHECK(cib != NULL, return EINVAL);
if (cib->state != cib_disconnected) {
// Already connected with notifications registered for CIB updates
return rc;
}
rc = cib__signon_query(out, &cib, ¤t_cib);
if (rc == pcmk_rc_ok) {
rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib,
mon_cib_connection_destroy));
if (rc == EPROTONOSUPPORT) {
out->err(out,
"CIB client does not support connection loss "
"notifications; crm_mon will be unable to reconnect after "
"connection loss");
rc = pcmk_rc_ok;
}
if (rc == pcmk_rc_ok) {
cib->cmds->del_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
crm_diff_update);
rc = cib->cmds->add_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
crm_diff_update);
rc = pcmk_legacy2rc(rc);
}
if (rc != pcmk_rc_ok) {
if (rc == EPROTONOSUPPORT) {
out->err(out,
"CIB client does not support CIB diff "
"notifications");
} else {
out->err(out, "CIB diff notification setup failed");
}
out->err(out, "Cannot monitor CIB changes; exiting");
cib__clean_up_connection(&cib);
stonith_api_delete(st);
st = NULL;
}
}
return rc;
}
/* This is used to set up the fencing options after the interactive UI has been stared.
* fence_history_cb can't be used because it builds up a list of includes/excludes that
* then have to be processed with apply_include_exclude and that could affect other
* things.
*/
static void
set_fencing_options(int level)
{
switch (level) {
case 3:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fencing_all;
break;
case 2:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fencing_all;
break;
case 1:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fence_failed | pcmk_section_fence_pending;
break;
default:
interactive_fence_level = 0;
options.fence_connect = FALSE;
fence_history = pcmk__fence_history_none;
show &= ~pcmk_section_fencing_all;
break;
}
}
static int
setup_api_connections(void)
{
int rc = pcmk_rc_ok;
CRM_CHECK(cib != NULL, return EINVAL);
if (cib->state != cib_disconnected) {
return rc;
}
if (cib->variant == cib_native) {
rc = pcmk__pacemakerd_status(out, crm_system_name,
options.reconnect_ms / 2, false,
&pcmkd_state);
if (rc != pcmk_rc_ok) {
return rc;
}
switch (pcmkd_state) {
case pcmk_pacemakerd_state_running:
case pcmk_pacemakerd_state_remote:
case pcmk_pacemakerd_state_shutting_down:
/* Fencer and CIB may still be available while shutting down or
* running on a Pacemaker Remote node
*/
break;
default:
// Fencer and CIB are definitely unavailable
return ENOTCONN;
}
setup_fencer_connection();
}
rc = setup_cib_connection();
return rc;
}
#if CURSES_ENABLED
static const char *
get_option_desc(char c)
{
const char *desc = "No help available";
for (GOptionEntry *entry = display_entries; entry != NULL; entry++) {
if (entry->short_name == c) {
desc = entry->description;
break;
}
}
return desc;
}
#define print_option_help(out, option, condition) \
curses_formatted_printf(out, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option));
/* This function is called from the main loop when there is something to be read
* on stdin, like an interactive user's keystroke. All it does is read the keystroke,
* set flags (or show the page showing which keystrokes are valid), and redraw the
* screen. It does not do anything with connections to the CIB or fencing agent
* agent what would happen in mon_refresh_display.
*/
static gboolean
detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
{
int c;
gboolean config_mode = FALSE;
gboolean rc = G_SOURCE_CONTINUE;
/* If the attached pty device (pseudo-terminal) has been closed/deleted,
* the condition (G_IO_IN | G_IO_ERR | G_IO_HUP) occurs.
* Exit with an error, otherwise the process would persist in the
* background and significantly raise the CPU usage.
*/
if ((condition & G_IO_ERR) && (condition & G_IO_HUP)) {
rc = G_SOURCE_REMOVE;
clean_up(CRM_EX_IOERR);
}
/* The connection/fd has been closed. Refresh the screen and remove this
* event source hence ignore stdin.
*/
if (condition & (G_IO_HUP | G_IO_NVAL)) {
rc = G_SOURCE_REMOVE;
}
if ((condition & G_IO_IN) == 0) {
return rc;
}
while (1) {
/* Get user input */
c = getchar();
switch (c) {
case 'm':
interactive_fence_level++;
if (interactive_fence_level > 3) {
interactive_fence_level = 0;
}
set_fencing_options(interactive_fence_level);
break;
case 'c':
show ^= pcmk_section_tickets;
break;
case 'f':
show ^= pcmk_section_failcounts;
break;
case 'n':
show_opts ^= pcmk_show_rscs_by_node;
break;
case 'o':
show ^= pcmk_section_operations;
if (!pcmk_is_set(show, pcmk_section_operations)) {
show_opts &= ~pcmk_show_timing;
}
break;
case 'r':
show_opts ^= pcmk_show_inactive_rscs;
break;
case 'R':
show_opts ^= pcmk_show_details;
break;
case 't':
show_opts ^= pcmk_show_timing;
if (pcmk_is_set(show_opts, pcmk_show_timing)) {
show |= pcmk_section_operations;
}
break;
case 'A':
show ^= pcmk_section_attributes;
break;
case 'L':
show ^= pcmk_section_bans;
break;
case 'D':
/* If any header is shown, clear them all, otherwise set them all */
if (pcmk_any_flags_set(show, pcmk_section_summary)) {
show &= ~pcmk_section_summary;
} else {
show |= pcmk_section_summary;
}
/* Regardless, we don't show options in console mode. */
show &= ~pcmk_section_options;
break;
case 'b':
show_opts ^= pcmk_show_brief;
break;
case 'j':
show_opts ^= pcmk_show_pending;
break;
case '?':
config_mode = TRUE;
break;
default:
/* All other keys just redraw the screen. */
goto refresh;
}
if (!config_mode)
goto refresh;
clear();
refresh();
curses_formatted_printf(out, "%s", "Display option change mode\n");
print_option_help(out, 'c', pcmk_is_set(show, pcmk_section_tickets));
print_option_help(out, 'f', pcmk_is_set(show, pcmk_section_failcounts));
print_option_help(out, 'n', pcmk_is_set(show_opts, pcmk_show_rscs_by_node));
print_option_help(out, 'o', pcmk_is_set(show, pcmk_section_operations));
print_option_help(out, 'r', pcmk_is_set(show_opts, pcmk_show_inactive_rscs));
print_option_help(out, 't', pcmk_is_set(show_opts, pcmk_show_timing));
print_option_help(out, 'A', pcmk_is_set(show, pcmk_section_attributes));
print_option_help(out, 'L', pcmk_is_set(show, pcmk_section_bans));
print_option_help(out, 'D', !pcmk_is_set(show, pcmk_section_summary));
print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details));
print_option_help(out, 'b', pcmk_is_set(show_opts, pcmk_show_brief));
print_option_help(out, 'j', pcmk_is_set(show_opts, pcmk_show_pending));
curses_formatted_printf(out, "%d m: \t%s\n", interactive_fence_level, get_option_desc('m'));
curses_formatted_printf(out, "%s", "\nToggle fields via field letter, type any other key to return\n");
}
refresh:
refresh_after_event(FALSE, TRUE);
return rc;
}
#endif // CURSES_ENABLED
// Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag
static void
avoid_zombies(void)
{
struct sigaction sa;
memset(&sa, 0, sizeof(struct sigaction));
if (sigemptyset(&sa.sa_mask) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno));
return;
}
sa.sa_handler = SIG_IGN;
sa.sa_flags = SA_RESTART|SA_NOCLDWAIT;
if (sigaction(SIGCHLD, &sa, NULL) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno));
}
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
GOptionEntry extra_prog_entries[] = {
{ "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet),
"Be less descriptive in output.",
NULL },
{ NULL }
};
#if CURSES_ENABLED
const char *fmts = "console (default), html, text, xml, none";
#else
const char *fmts = "text (default), html, xml, none";
#endif // CURSES_ENABLED
const char *desc = NULL;
desc = "Notes:\n\n"
"Time Specification:\n\n"
"The TIMESPEC in any command line option can be specified in many\n"
"different formats. It can be an integer number of seconds, a\n"
"number plus units (us/usec/ms/msec/s/sec/m/min/h/hr), or an ISO\n"
"8601 period specification.\n\n"
"Output Control:\n\n"
"By default, a particular set of sections are written to the\n"
"output destination. The default varies based on the output\n"
"format: XML includes all sections by default, while other output\n"
"formats include less. This set can be modified with the --include\n"
"and --exclude command line options. Each option may be passed\n"
"multiple times, and each can specify a comma-separated list of\n"
"sections. The options are applied to the default set, in order\n"
"from left to right as they are passed on the command line. For a\n"
"list of valid sections, pass --include=list or --exclude=list.\n\n"
"Interactive Use:\n\n"
#if CURSES_ENABLED
"When run interactively, crm_mon can be told to hide and show\n"
"various sections of output. To see a help screen explaining the\n"
"options, press '?'. Any key stroke aside from those listed will\n"
"cause the screen to refresh.\n\n"
#else
"The local installation of Pacemaker was built without support for\n"
"interactive (console) mode. A curses library must be available at\n"
"build time to support interactive mode.\n\n"
#endif // CURSES_ENABLED
"Examples:\n\n"
#if CURSES_ENABLED
"Display the cluster status on the console with updates as they\n"
"occur:\n\n"
"\tcrm_mon\n\n"
#endif // CURSES_ENABLED
"Display the cluster status once and exit:\n\n"
"\tcrm_mon -1\n\n"
"Display the cluster status, group resources by node, and include\n"
"inactive resources in the list:\n\n"
"\tcrm_mon --group-by-node --inactive\n\n"
"Start crm_mon as a background daemon and have it write the\n"
"cluster status to an HTML file:\n\n"
"\tcrm_mon --daemonize --output-as html "
"--output-to /path/to/docroot/filename.html\n\n"
"Display the cluster status as XML:\n\n"
"\tcrm_mon --output-as xml\n\n";
context = pcmk__build_arg_context(args, fmts, group, NULL);
pcmk__add_main_args(context, extra_prog_entries);
g_option_context_set_description(context, desc);
pcmk__add_arg_group(context, "display", "Display Options:",
"Show display options", display_entries);
pcmk__add_arg_group(context, "additional", "Additional Options:",
"Show additional options", addl_entries);
pcmk__add_arg_group(context, "deprecated", "Deprecated Options:",
"Show deprecated options", deprecated_entries);
return context;
}
/*!
* \internal
* \brief Set output format based on \c --output-as arguments and mode arguments
*
* When the deprecated \c --as-xml argument is parsed, a callback function sets
* \c output_format. Otherwise, this function does the same based on the current
* \c --output-as arguments and the \c --one-shot and \c --daemonize arguments.
*
* \param[in,out] args Command line arguments
*/
static void
reconcile_output_format(pcmk__common_args_t *args)
{
if (output_format != mon_output_unset) {
/* The deprecated --as-xml argument was used, and we're finished. Note
* that this means the deprecated argument takes precedence.
*/
return;
}
if (pcmk__str_eq(args->output_ty, PCMK_VALUE_NONE, pcmk__str_none)) {
output_format = mon_output_none;
} else if (pcmk__str_eq(args->output_ty, "html", pcmk__str_none)) {
output_format = mon_output_html;
umask(S_IWGRP | S_IWOTH); // World-readable HTML
} else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) {
output_format = mon_output_xml;
#if CURSES_ENABLED
} else if (pcmk__str_eq(args->output_ty, "console",
pcmk__str_null_matches)) {
/* Console is the default format if no conflicting options are given.
*
* Use text output instead if one of the following conditions is met:
* * We've requested daemonized or one-shot mode (console output is
* incompatible with modes other than mon_exec_update)
* * We requested the version, which is effectively one-shot
* * We specified a non-stdout output destination (console mode is
* compatible only with stdout)
*/
if ((options.exec_mode == mon_exec_daemonized)
|| (options.exec_mode == mon_exec_one_shot)
|| args->version
|| !pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) {
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
} else {
pcmk__str_update(&args->output_ty, "console");
output_format = mon_output_console;
crm_enable_stderr(FALSE);
}
#endif // CURSES_ENABLED
} else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches)) {
/* Text output was explicitly requested, or it's the default because
* curses is not enabled
*/
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
}
// Otherwise, invalid format. Let pcmk__output_new() throw an error.
}
/*!
* \internal
* \brief Set execution mode to the output format's default if appropriate
*
* \param[in,out] args Command line arguments
*/
static void
set_default_exec_mode(const pcmk__common_args_t *args)
{
if (output_format == mon_output_console) {
/* Update is the only valid mode for console, but set here instead of
* reconcile_output_format() for isolation and consistency
*/
options.exec_mode = mon_exec_update;
} else if (options.exec_mode == mon_exec_unset) {
// Default to one-shot mode for all other formats
options.exec_mode = mon_exec_one_shot;
} else if ((options.exec_mode == mon_exec_update)
&& pcmk__str_eq(args->output_dest, "-",
pcmk__str_null_matches)) {
// If not using console format, update mode cannot be used with stdout
options.exec_mode = mon_exec_one_shot;
}
}
static void
clean_up_on_connection_failure(int rc)
{
if (rc == ENOTCONN) {
if (pcmkd_state == pcmk_pacemakerd_state_remote) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster");
} else {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
}
} else {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc));
}
clean_up(pcmk_rc2exitc(rc));
}
static void
one_shot(void)
{
int rc = pcmk__status(out, cib, fence_history, show, show_opts,
options.only_node, options.only_rsc,
options.neg_location_prefix, 0);
if (rc == pcmk_rc_ok) {
clean_up(pcmk_rc2exitc(rc));
} else {
clean_up_on_connection_failure(rc);
}
}
static void
exit_on_invalid_cib(void)
{
if (cib != NULL) {
return;
}
// Shouldn't really be possible
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source");
clean_up(CRM_EX_ERROR);
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
GOptionGroup *output_group = NULL;
args = pcmk__new_common_args(SUMMARY);
context = build_arg_context(args, &output_group);
pcmk__register_formats(output_group, formats);
options.pid_file = strdup("/tmp/ClusterMon.pid");
pcmk__cli_init_logging("crm_mon", 0);
// Avoid needing to wait for subprocesses forked for -E/--external-agent
avoid_zombies();
processed_args = pcmk__cmdline_preproc(argv, "eimpxEILU");
fence_history_cb("--fence-history", "1", NULL, NULL);
/* Set an HTML title regardless of what format we will eventually use.
* Doing this here means the user can give their own title on the command
* line.
*/
if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"",
g_get_prgname())) {
return clean_up(CRM_EX_USAGE);
}
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
return clean_up(CRM_EX_USAGE);
}
for (int i = 0; i < args->verbosity; i++) {
crm_bump_log_level(argc, argv);
}
if (!args->version) {
if (args->quiet) {
include_exclude_cb("--exclude", "times", NULL, NULL);
}
if (options.watch_fencing) {
fence_history_cb("--fence-history", "0", NULL, NULL);
options.fence_connect = TRUE;
}
/* create the cib-object early to be able to do further
* decisions based on the cib-source
*/
cib = cib_new();
exit_on_invalid_cib();
switch (cib->variant) {
case cib_native:
// Everything (fencer, CIB, pcmkd status) should be available
break;
case cib_file:
// Live fence history is not meaningful
fence_history_cb("--fence-history", "0", NULL, NULL);
/* Notifications are unsupported; nothing to monitor
* @COMPAT: Let setup_cib_connection() handle this by exiting?
*/
options.exec_mode = mon_exec_one_shot;
break;
case cib_remote:
// We won't receive any fencing updates
fence_history_cb("--fence-history", "0", NULL, NULL);
break;
default:
/* something is odd */
exit_on_invalid_cib();
break;
}
if ((options.exec_mode == mon_exec_daemonized)
&& !options.external_agent
&& pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--daemonize requires at least one of --output-to "
"(with value not set to '-') and --external-agent");
return clean_up(CRM_EX_USAGE);
}
}
reconcile_output_format(args);
set_default_exec_mode(args);
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
return clean_up(CRM_EX_ERROR);
}
if (output_format == mon_output_legacy_xml) {
output_format = mon_output_xml;
pcmk__output_set_legacy_xml(out);
}
/* output_format MUST NOT BE CHANGED AFTER THIS POINT. */
/* If we had a valid format for pcmk__output_new(), output_format should be
* set by now.
*/
pcmk__assert(output_format != mon_output_unset);
if (output_format == mon_output_plain) {
pcmk__output_text_set_fancy(out, true);
}
if (options.exec_mode == mon_exec_daemonized) {
if (!options.external_agent && (output_format == mon_output_none)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--daemonize requires --external-agent if used with "
"--output-as=none");
return clean_up(CRM_EX_USAGE);
}
crm_enable_stderr(FALSE);
cib_delete(cib);
cib = NULL;
pcmk__daemonize(crm_system_name, options.pid_file);
cib = cib_new();
exit_on_invalid_cib();
}
show = default_includes(output_format);
/* Apply --include/--exclude flags we used internally. There's no error reporting
* here because this would be a programming error.
*/
apply_include_exclude(options.includes_excludes, &error);
/* And now apply any --include/--exclude flags the user gave on the command line.
* These are done in a separate pass from the internal ones because we want to
* make sure whatever the user specifies overrides whatever we do.
*/
if (!apply_include_exclude(options.user_includes_excludes, &error)) {
return clean_up(CRM_EX_USAGE);
}
/* Sync up the initial value of interactive_fence_level with whatever was set with
* --include/--exclude= options.
*/
if (pcmk_all_flags_set(show, pcmk_section_fencing_all)) {
interactive_fence_level = 3;
} else if (pcmk_is_set(show, pcmk_section_fence_worked)) {
interactive_fence_level = 2;
} else if (pcmk_any_flags_set(show, pcmk_section_fence_failed | pcmk_section_fence_pending)) {
interactive_fence_level = 1;
} else {
interactive_fence_level = 0;
}
pcmk__register_lib_messages(out);
crm_mon_register_messages(out);
pe__register_messages(out);
stonith__register_messages(out);
// Messages internal to this file, nothing curses-specific
pcmk__register_messages(out, fmt_functions);
if (args->version) {
out->version(out, false);
return clean_up(CRM_EX_OK);
}
if (output_format == mon_output_xml) {
show_opts |= pcmk_show_inactive_rscs | pcmk_show_timing;
}
if ((output_format == mon_output_html) && (out->dest != stdout)) {
char *content = pcmk__itoa(options.reconnect_ms / 1000);
pcmk__html_add_header(PCMK__XE_META,
PCMK__XA_HTTP_EQUIV, PCMK__VALUE_REFRESH,
PCMK__XA_CONTENT, content,
NULL);
free(content);
}
crm_info("Starting %s", crm_system_name);
cib__set_output(cib, out);
if (options.exec_mode == mon_exec_one_shot) {
one_shot();
}
scheduler = pe_new_working_set();
pcmk__mem_assert(scheduler);
scheduler->priv->out = out;
if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) {
// Currently used only in the times section
pcmk__query_node_name(out, 0, &(scheduler->priv->local_node_name), 0);
}
out->message(out, "crm-mon-disconnected",
"Waiting for initial connection", pcmkd_state);
do {
out->transient(out, "Connecting to cluster...");
rc = setup_api_connections();
if (rc != pcmk_rc_ok) {
if ((rc == ENOTCONN) || (rc == ECONNREFUSED)) {
out->transient(out, "Connection failed. Retrying in %ums...",
options.reconnect_ms);
}
// Give some time to view all output even if we won't retry
pcmk__sleep_ms(options.reconnect_ms);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
}
} while ((rc == ENOTCONN) || (rc == ECONNREFUSED));
if (rc != pcmk_rc_ok) {
clean_up_on_connection_failure(rc);
}
set_fencing_options(interactive_fence_level);
mon_refresh_display(NULL);
mainloop = g_main_loop_new(NULL, FALSE);
mainloop_add_signal(SIGTERM, mon_shutdown);
mainloop_add_signal(SIGINT, mon_shutdown);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize);
if (ncurses_winch_handler == SIG_DFL ||
ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR)
ncurses_winch_handler = NULL;
io_channel = g_io_channel_unix_new(STDIN_FILENO);
g_io_add_watch(io_channel, (G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL),
detect_user_input, NULL);
}
#endif
/* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In
* this file, that is anywhere mainloop_set_trigger is called.
*/
refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
crm_info("Exiting %s", crm_system_name);
return clean_up(CRM_EX_OK);
}
static int
send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
int status, const char *desc)
{
pid_t pid;
/*setenv needs chars, these are ints */
char *rc_s = pcmk__itoa(rc);
char *status_s = pcmk__itoa(status);
char *target_rc_s = pcmk__itoa(target_rc);
crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent);
if(rsc) {
setenv("CRM_notify_rsc", rsc, 1);
}
if (options.external_recipient) {
setenv("CRM_notify_recipient", options.external_recipient, 1);
}
setenv("CRM_notify_node", node, 1);
setenv("CRM_notify_task", task, 1);
setenv("CRM_notify_desc", desc, 1);
setenv("CRM_notify_rc", rc_s, 1);
setenv("CRM_notify_target_rc", target_rc_s, 1);
setenv("CRM_notify_status", status_s, 1);
pid = fork();
if (pid == -1) {
out->err(out, "notification fork() failed: %s", strerror(errno));
}
if (pid == 0) {
/* crm_debug("notification: I am the child. Executing the nofitication program."); */
execl(options.external_agent, options.external_agent, NULL);
crm_exit(CRM_EX_ERROR);
}
crm_trace("Finished running custom notification program '%s'.", options.external_agent);
free(target_rc_s);
free(status_s);
free(rc_s);
return 0;
}
static int
handle_rsc_op(xmlNode *xml, void *userdata)
{
const char *node_id = (const char *) userdata;
int rc = -1;
int status = -1;
int target_rc = -1;
gboolean notify = TRUE;
char *rsc = NULL;
char *task = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *id = NULL;
const char *node = NULL;
xmlNode *n = xml;
xmlNode * rsc_op = xml;
if(strcmp((const char*)xml->name, PCMK__XE_LRM_RSC_OP) != 0) {
pcmk__xe_foreach_child(xml, NULL, handle_rsc_op, (void *) node_id);
return pcmk_rc_ok;
}
id = pcmk__xe_history_key(rsc_op);
magic = crm_element_value(rsc_op, PCMK__XA_TRANSITION_MAGIC);
if (magic == NULL) {
/* non-change */
return pcmk_rc_ok;
}
if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc,
&target_rc)) {
crm_err("Invalid event %s detected for %s", magic, id);
return pcmk_rc_ok;
}
if (parse_op_key(id, &rsc, &task, NULL) == FALSE) {
crm_err("Invalid event detected for %s", id);
goto bail;
}
node = crm_element_value(rsc_op, PCMK__META_ON_NODE);
while ((n != NULL) && !pcmk__xe_is(n, PCMK__XE_NODE_STATE)) {
n = n->parent;
}
if(node == NULL && n) {
node = crm_element_value(n, PCMK_XA_UNAME);
}
if (node == NULL && n) {
node = pcmk__xe_id(n);
}
if (node == NULL) {
node = node_id;
}
if (node == NULL) {
crm_err("No node detected for event %s (%s)", magic, id);
goto bail;
}
/* look up where we expected it to be? */
desc = pcmk_rc_str(pcmk_rc_ok);
if ((status == PCMK_EXEC_DONE) && (target_rc == rc)) {
crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc);
if (rc == PCMK_OCF_NOT_RUNNING) {
notify = FALSE;
}
} else if (status == PCMK_EXEC_DONE) {
- desc = services_ocf_exitcode_str(rc);
+ desc = crm_exit_str(rc);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
} else {
desc = pcmk_exec_status_str(status);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
}
if (notify && options.external_agent) {
send_custom_trap(node, rsc, task, target_rc, rc, status, desc);
}
bail:
free(rsc);
free(task);
return pcmk_rc_ok;
}
/* This function is just a wrapper around mainloop_set_trigger so that it can be
* called from a mainloop directly. It's simply another way of ensuring the screen
* gets redrawn.
*/
static gboolean
mon_trigger_refresh(gpointer user_data)
{
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
return FALSE;
}
static int
handle_op_for_node(xmlNode *xml, void *userdata)
{
const char *node = crm_element_value(xml, PCMK_XA_UNAME);
if (node == NULL) {
node = pcmk__xe_id(xml);
}
handle_rsc_op(xml, (void *) node);
return pcmk_rc_ok;
}
static int
crm_diff_update_element(xmlNode *change, void *userdata)
{
const char *name = NULL;
const char *op = crm_element_value(change, PCMK_XA_OPERATION);
const char *xpath = crm_element_value(change, PCMK_XA_PATH);
xmlNode *match = NULL;
const char *node = NULL;
if (op == NULL) {
return pcmk_rc_ok;
} else if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
match = change->children;
} else if (pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE,
NULL)) {
return pcmk_rc_ok;
} else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
match = pcmk__xe_first_child(change, PCMK_XE_CHANGE_RESULT, NULL, NULL);
if(match) {
match = match->children;
}
}
if(match) {
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name);
if(xpath == NULL) {
/* Version field, ignore */
} else if(name == NULL) {
crm_debug("No result for %s operation to %s", op, xpath);
pcmk__assert(pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE,
NULL));
} else if (strcmp(name, PCMK_XE_CIB) == 0) {
pcmk__xe_foreach_child(pcmk__xe_first_child(match, PCMK_XE_STATUS, NULL,
NULL),
NULL, handle_op_for_node, NULL);
} else if (strcmp(name, PCMK_XE_STATUS) == 0) {
pcmk__xe_foreach_child(match, NULL, handle_op_for_node, NULL);
} else if (strcmp(name, PCMK__XE_NODE_STATE) == 0) {
node = crm_element_value(match, PCMK_XA_UNAME);
if (node == NULL) {
node = pcmk__xe_id(match);
}
handle_rsc_op(match, (void *) node);
} else if (strcmp(name, PCMK__XE_LRM) == 0) {
node = pcmk__xe_id(match);
handle_rsc_op(match, (void *) node);
} else if (strcmp(name, PCMK__XE_LRM_RESOURCES) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
handle_rsc_op(match, local_node);
free(local_node);
} else if (strcmp(name, PCMK__XE_LRM_RESOURCE) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
handle_rsc_op(match, local_node);
free(local_node);
} else if (strcmp(name, PCMK__XE_LRM_RSC_OP) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
handle_rsc_op(match, local_node);
free(local_node);
} else {
crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
}
return pcmk_rc_ok;
}
static void
crm_diff_update(const char *event, xmlNode * msg)
{
int rc = -1;
static bool stale = FALSE;
gboolean cib_updated = FALSE;
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT,
NULL, NULL);
xmlNode *diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
out->progress(out, false);
if (current_cib != NULL) {
rc = xml_apply_patchset(current_cib, diff, TRUE);
switch (rc) {
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
pcmk__xml_free(current_cib); current_cib = NULL;
break;
case pcmk_ok:
cib_updated = TRUE;
break;
default:
crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
pcmk__xml_free(current_cib); current_cib = NULL;
}
}
if (current_cib == NULL) {
crm_trace("Re-requesting the full cib");
cib->cmds->query(cib, NULL, ¤t_cib, cib_sync_call);
}
if (options.external_agent) {
int format = 0;
crm_element_value_int(diff, PCMK_XA_FORMAT, &format);
if (format == 2) {
xmlNode *wrapper = pcmk__xe_first_child(msg,
PCMK__XE_CIB_UPDATE_RESULT,
NULL, NULL);
xmlNode *diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
pcmk__xe_foreach_child(diff, NULL, crm_diff_update_element, NULL);
} else {
crm_err("Unknown patch format: %d", format);
}
}
if (current_cib == NULL) {
if(!stale) {
out->info(out, "--- Stale data ---");
}
stale = TRUE;
return;
}
stale = FALSE;
refresh_after_event(cib_updated, FALSE);
}
static int
mon_refresh_display(gpointer user_data)
{
int rc = pcmk_rc_ok;
last_refresh = time(NULL);
if (output_format == mon_output_none) {
return G_SOURCE_REMOVE;
}
if (fence_history == pcmk__fence_history_full &&
!pcmk_all_flags_set(show, pcmk_section_fencing_all) &&
output_format != mon_output_xml) {
fence_history = pcmk__fence_history_reduced;
}
// Get an up-to-date pacemakerd status for the cluster summary
if (cib->variant == cib_native) {
pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2,
false, &pcmkd_state);
}
if (out->dest != stdout) {
out->reset(out);
}
rc = pcmk__output_cluster_status(scheduler, st, cib, current_cib,
pcmkd_state, fence_history, show,
show_opts,
options.only_node,options.only_rsc,
options.neg_location_prefix);
if (rc == pcmk_rc_schema_validation) {
clean_up(CRM_EX_CONFIG);
return G_SOURCE_REMOVE;
}
if (out->dest != stdout) {
out->finish(out, CRM_EX_OK, true, NULL);
}
return G_SOURCE_CONTINUE;
}
/* This function is called for fencing events (see setup_fencer_connection() for
* which ones) when --watch-fencing is used on the command line
*/
static void
mon_st_callback_event(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else if (options.external_agent) {
char *desc = stonith__event_description(e);
send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
free(desc);
}
}
/* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met:
*
* - If the last update occurred more than reconnect_ms ago (defaults to 5s, but
* can be changed via the -i command line option), or
* - After every 10 CIB updates, or
* - If it's been 2s since the last update
*
* This function sounds like it would be more broadly useful, but it is only called when a
* fencing event is received or a CIB diff occurrs.
*/
static void
refresh_after_event(gboolean data_updated, gboolean enforce)
{
static int updates = 0;
time_t now = time(NULL);
if (data_updated) {
updates++;
}
if(refresh_timer == NULL) {
refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
}
if (reconnect_timer > 0) {
/* we will receive a refresh request after successful reconnect */
mainloop_timer_stop(refresh_timer);
return;
}
/* as we're not handling initial failure of fencer-connection as
* fatal give it a retry here
* not getting here if cib-reconnection is already on the way
*/
setup_fencer_connection();
if (enforce ||
((now - last_refresh) > (options.reconnect_ms / 1000)) ||
updates >= 10) {
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else {
mainloop_timer_start(refresh_timer);
}
}
/* This function is called for fencing events (see setup_fencer_connection() for
* which ones) when --watch-fencing is NOT used on the command line
*/
static void
mon_st_callback_display(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else {
out->progress(out, false);
refresh_after_event(TRUE, FALSE);
}
}
/*
* De-init ncurses, disconnect from the CIB manager, disconnect fencing,
* deallocate memory and show usage-message if requested.
*
* We don't actually return, but nominally returning crm_exit_t allows a usage
* like "return clean_up(exit_code);" which helps static analysis understand the
* code flow.
*/
static crm_exit_t
clean_up(crm_exit_t exit_code)
{
/* Quitting crm_mon is much more complicated than it ought to be. */
/* (1) Close connections, free things, etc. */
if (io_channel != NULL) {
g_io_channel_shutdown(io_channel, TRUE, NULL);
}
cib__clean_up_connection(&cib);
stonith_api_delete(st);
free(options.neg_location_prefix);
free(options.only_node);
free(options.only_rsc);
free(options.pid_file);
g_slist_free_full(options.includes_excludes, free);
g_strfreev(processed_args);
pe_free_working_set(scheduler);
/* (2) If this is abnormal termination and we're in curses mode, shut down
* curses first. Any messages displayed to the screen before curses is shut
* down will be lost because doing the shut down will also restore the
* screen to whatever it looked like before crm_mon was started.
*/
if (((error != NULL) || (exit_code == CRM_EX_USAGE))
&& (output_format == mon_output_console)
&& (out != NULL)) {
out->finish(out, exit_code, false, NULL);
pcmk__output_free(out);
out = NULL;
}
/* (3) If this is a command line usage related failure, print the usage
* message.
*/
if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) {
char *help = g_option_context_get_help(context, TRUE, NULL);
fprintf(stderr, "%s", help);
g_free(help);
}
pcmk__free_arg_context(context);
/* (4) If this is any kind of error, print the error out and exit. Make
* sure to handle situations both before and after formatted output is
* set up. We want errors to appear formatted if at all possible.
*/
if (error != NULL) {
if (out != NULL) {
out->err(out, "%s: %s", g_get_prgname(), error->message);
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
} else {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
}
g_clear_error(&error);
crm_exit(exit_code);
}
/* (5) Print formatted output to the screen if we made it far enough in
* crm_mon to be able to do so.
*/
if (out != NULL) {
if (options.exec_mode != mon_exec_daemonized) {
out->finish(out, exit_code, true, NULL);
}
pcmk__output_free(out);
pcmk__unregister_formats();
}
crm_exit(exit_code);
}
diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c
index 078c6ea864..e37b7cbd9a 100644
--- a/tools/crm_resource_print.c
+++ b/tools/crm_resource_print.c
@@ -1,917 +1,917 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#define cons_string(x) x?x:"NA"
static int
print_constraint(xmlNode *xml_obj, void *userdata)
{
pcmk_scheduler_t *scheduler = (pcmk_scheduler_t *) userdata;
pcmk__output_t *out = scheduler->priv->out;
const char *id = crm_element_value(xml_obj, PCMK_XA_ID);
if (id == NULL) {
return pcmk_rc_ok;
}
if (!pcmk__xe_is(xml_obj, PCMK_XE_RSC_COLOCATION)) {
return pcmk_rc_ok;
}
out->info(out, "Constraint %s %s %s %s %s %s %s",
xml_obj->name,
cons_string(crm_element_value(xml_obj, PCMK_XA_ID)),
cons_string(crm_element_value(xml_obj, PCMK_XA_RSC)),
cons_string(crm_element_value(xml_obj, PCMK_XA_WITH_RSC)),
cons_string(crm_element_value(xml_obj, PCMK_XA_SCORE)),
cons_string(crm_element_value(xml_obj, PCMK_XA_RSC_ROLE)),
cons_string(crm_element_value(xml_obj, PCMK_XA_WITH_RSC_ROLE)));
return pcmk_rc_ok;
}
void
cli_resource_print_cts_constraints(pcmk_scheduler_t *scheduler)
{
pcmk__xe_foreach_child(pcmk_find_cib_element(scheduler->input,
PCMK_XE_CONSTRAINTS),
NULL, print_constraint, scheduler);
}
void
cli_resource_print_cts(pcmk_resource_t *rsc, pcmk__output_t *out)
{
const char *host = NULL;
bool needs_quorum = TRUE;
const char *rtype = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE);
const char *rprov = crm_element_value(rsc->priv->xml, PCMK_XA_PROVIDER);
const char *rclass = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS);
pcmk_node_t *node = pcmk__current_node(rsc);
if (pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)) {
needs_quorum = FALSE;
} else {
// @TODO check requires in resource meta-data and rsc_defaults
}
if (node != NULL) {
host = node->priv->name;
}
out->info(out, "Resource: %s %s %s %s %s %s %s %s %d %lld %#.16llx",
rsc->priv->xml->name, rsc->id,
pcmk__s(rsc->priv->history_id, rsc->id),
((rsc->priv->parent == NULL)? "NA" : rsc->priv->parent->id),
rprov ? rprov : "NA", rclass, rtype, host ? host : "NA", needs_quorum, rsc->flags,
rsc->flags);
g_list_foreach(rsc->priv->children, (GFunc) cli_resource_print_cts, out);
}
// \return Standard Pacemaker return code
int
cli_resource_print_operations(const char *rsc_id, const char *host_uname,
bool active, pcmk_scheduler_t *scheduler)
{
pcmk__output_t *out = scheduler->priv->out;
int rc = pcmk_rc_no_output;
GList *ops = find_operations(rsc_id, host_uname, active, scheduler);
if (!ops) {
return rc;
}
out->begin_list(out, NULL, NULL, "Resource Operations");
rc = pcmk_rc_ok;
for (GList *lpc = ops; lpc != NULL; lpc = lpc->next) {
xmlNode *xml_op = (xmlNode *) lpc->data;
out->message(out, "node-and-op", scheduler, xml_op);
}
out->end_list(out);
return rc;
}
// \return Standard Pacemaker return code
int
cli_resource_print(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler,
bool expanded)
{
pcmk__output_t *out = scheduler->priv->out;
uint32_t show_opts = pcmk_show_pending;
GList *all = NULL;
all = g_list_prepend(all, (gpointer) "*");
out->begin_list(out, NULL, NULL, "Resource Config");
out->message(out, (const char *) rsc->priv->xml->name, show_opts, rsc, all,
all);
out->message(out, "resource-config", rsc, !expanded);
out->end_list(out);
g_list_free(all);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("attribute-changed", "attr_update_data_t *")
static int
attribute_changed_default(pcmk__output_t *out, va_list args)
{
attr_update_data_t *ud = va_arg(args, attr_update_data_t *);
out->info(out, "Set '%s' option: "
PCMK_XA_ID "=%s%s%s%s%s value=%s",
ud->given_rsc_id, ud->found_attr_id,
((ud->attr_set_id == NULL)? "" : " " PCMK__XA_SET "="),
pcmk__s(ud->attr_set_id, ""),
((ud->attr_name == NULL)? "" : " " PCMK_XA_NAME "="),
pcmk__s(ud->attr_name, ""), ud->attr_value);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("attribute-changed", "attr_update_data_t *")
static int
attribute_changed_xml(pcmk__output_t *out, va_list args)
{
attr_update_data_t *ud = va_arg(args, attr_update_data_t *);
pcmk__output_xml_create_parent(out,
(const char *) ud->rsc->priv->xml->name,
PCMK_XA_ID, ud->rsc->id,
NULL);
pcmk__output_xml_create_parent(out, ud->attr_set_type,
PCMK_XA_ID, ud->attr_set_id,
NULL);
pcmk__output_create_xml_node(out, PCMK_XE_NVPAIR,
PCMK_XA_ID, ud->found_attr_id,
PCMK_XA_VALUE, ud->attr_value,
PCMK_XA_NAME, ud->attr_name,
NULL);
pcmk__output_xml_pop_parent(out);
pcmk__output_xml_pop_parent(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("attribute-changed-list", "GList *")
static int
attribute_changed_list_default(pcmk__output_t *out, va_list args)
{
GList *results = va_arg(args, GList *);
if (results == NULL) {
return pcmk_rc_no_output;
}
for (GList *iter = results; iter != NULL; iter = iter->next) {
attr_update_data_t *ud = iter->data;
out->message(out, "attribute-changed", ud);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("attribute-changed-list", "GList *")
static int
attribute_changed_list_xml(pcmk__output_t *out, va_list args)
{
GList *results = va_arg(args, GList *);
if (results == NULL) {
return pcmk_rc_no_output;
}
pcmk__output_xml_create_parent(out, PCMK__XE_RESOURCE_SETTINGS, NULL);
for (GList *iter = results; iter != NULL; iter = iter->next) {
attr_update_data_t *ud = iter->data;
out->message(out, "attribute-changed", ud);
}
pcmk__output_xml_pop_parent(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("attribute-list", "pcmk_resource_t *", "const char *",
"const char *")
static int
attribute_list_default(pcmk__output_t *out, va_list args) {
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
const char *attr = va_arg(args, char *);
const char *value = va_arg(args, const char *);
if (value != NULL) {
out->begin_list(out, NULL, NULL, "Attributes");
out->list_item(out, attr, "%s", value);
out->end_list(out);
return pcmk_rc_ok;
} else {
out->err(out, "Attribute '%s' not found for '%s'", attr, rsc->id);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("agent-status", "int", "const char *", "const char *", "const char *",
"const char *", "const char *", "crm_exit_t", "const char *")
static int
agent_status_default(pcmk__output_t *out, va_list args) {
int status = va_arg(args, int);
const char *action = va_arg(args, const char *);
const char *name = va_arg(args, const char *);
const char *class = va_arg(args, const char *);
const char *provider = va_arg(args, const char *);
const char *type = va_arg(args, const char *);
crm_exit_t rc = va_arg(args, crm_exit_t);
const char *exit_reason = va_arg(args, const char *);
if (status == PCMK_EXEC_DONE) {
/* Operation [for ] ([:]:)
* returned ([: ])
*/
out->info(out, "Operation %s%s%s (%s%s%s:%s) returned %d (%s%s%s)",
action,
((name == NULL)? "" : " for "), ((name == NULL)? "" : name),
class,
((provider == NULL)? "" : ":"),
((provider == NULL)? "" : provider),
- type, (int) rc, services_ocf_exitcode_str((int) rc),
+ type, (int) rc, crm_exit_str(rc),
((exit_reason == NULL)? "" : ": "),
((exit_reason == NULL)? "" : exit_reason));
} else {
/* Operation [for ] ([:]:)
* could not be executed ([: ])
*/
out->err(out,
"Operation %s%s%s (%s%s%s:%s) could not be executed (%s%s%s)",
action,
((name == NULL)? "" : " for "), ((name == NULL)? "" : name),
class,
((provider == NULL)? "" : ":"),
((provider == NULL)? "" : provider),
type, pcmk_exec_status_str(status),
((exit_reason == NULL)? "" : ": "),
((exit_reason == NULL)? "" : exit_reason));
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("agent-status", "int", "const char *", "const char *", "const char *",
"const char *", "const char *", "crm_exit_t", "const char *")
static int
agent_status_xml(pcmk__output_t *out, va_list args) {
int status = va_arg(args, int);
const char *action G_GNUC_UNUSED = va_arg(args, const char *);
const char *name G_GNUC_UNUSED = va_arg(args, const char *);
const char *class G_GNUC_UNUSED = va_arg(args, const char *);
const char *provider G_GNUC_UNUSED = va_arg(args, const char *);
const char *type G_GNUC_UNUSED = va_arg(args, const char *);
crm_exit_t rc = va_arg(args, crm_exit_t);
const char *exit_reason = va_arg(args, const char *);
char *exit_s = pcmk__itoa(rc);
- const char *message = services_ocf_exitcode_str((int) rc);
+ const char *message = crm_exit_str(rc);
char *status_s = pcmk__itoa(status);
const char *execution_message = pcmk_exec_status_str(status);
pcmk__output_create_xml_node(out, PCMK_XE_AGENT_STATUS,
PCMK_XA_CODE, exit_s,
PCMK_XA_MESSAGE, message,
PCMK_XA_EXECUTION_CODE, status_s,
PCMK_XA_EXECUTION_MESSAGE, execution_message,
PCMK_XA_REASON, exit_reason,
NULL);
free(exit_s);
free(status_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("attribute-list", "pcmk_resource_t *", "const char *",
"const char *")
static int
attribute_list_text(pcmk__output_t *out, va_list args) {
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
const char *attr = va_arg(args, char *);
const char *value = va_arg(args, const char *);
if (value != NULL) {
pcmk__formatted_printf(out, "%s\n", value);
return pcmk_rc_ok;
} else {
out->err(out, "Attribute '%s' not found for '%s'", attr, rsc->id);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("override", "const char *", "const char *", "const char *")
static int
override_default(pcmk__output_t *out, va_list args) {
const char *rsc_name = va_arg(args, const char *);
const char *name = va_arg(args, const char *);
const char *value = va_arg(args, const char *);
if (rsc_name == NULL) {
out->list_item(out, NULL, "Overriding the cluster configuration with '%s' = '%s'",
name, value);
} else {
out->list_item(out, NULL, "Overriding the cluster configuration for '%s' with '%s' = '%s'",
rsc_name, name, value);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("override", "const char *", "const char *", "const char *")
static int
override_xml(pcmk__output_t *out, va_list args) {
const char *rsc_name = va_arg(args, const char *);
const char *name = va_arg(args, const char *);
const char *value = va_arg(args, const char *);
xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_OVERRIDE,
PCMK_XA_NAME, name,
PCMK_XA_VALUE, value,
NULL);
if (rsc_name != NULL) {
crm_xml_add(node, PCMK_XA_RSC, rsc_name);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("property-list", "pcmk_resource_t *", "const char *")
static int
property_list_default(pcmk__output_t *out, va_list args) {
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
const char *attr = va_arg(args, char *);
const char *value = crm_element_value(rsc->priv->xml, attr);
if (value != NULL) {
out->begin_list(out, NULL, NULL, "Properties");
out->list_item(out, attr, "%s", value);
out->end_list(out);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("property-list", "pcmk_resource_t *", "const char *")
static int
property_list_text(pcmk__output_t *out, va_list args) {
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
const char *attr = va_arg(args, const char *);
const char *value = crm_element_value(rsc->priv->xml, attr);
if (value != NULL) {
pcmk__formatted_printf(out, "%s\n", value);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-agent-action", "int", "const char *", "const char *",
"const char *", "const char *", "const char *", "GHashTable *",
"crm_exit_t", "int", "const char *", "const char *", "const char *")
static int
resource_agent_action_default(pcmk__output_t *out, va_list args) {
int verbose = va_arg(args, int);
const char *class = va_arg(args, const char *);
const char *provider = va_arg(args, const char *);
const char *type = va_arg(args, const char *);
const char *rsc_name = va_arg(args, const char *);
const char *action = va_arg(args, const char *);
GHashTable *overrides = va_arg(args, GHashTable *);
crm_exit_t rc = va_arg(args, crm_exit_t);
int status = va_arg(args, int);
const char *exit_reason = va_arg(args, const char *);
const char *stdout_data = va_arg(args, const char *);
const char *stderr_data = va_arg(args, const char *);
if (overrides) {
GHashTableIter iter;
const char *name = NULL;
const char *value = NULL;
out->begin_list(out, NULL, NULL, PCMK_XE_OVERRIDES);
g_hash_table_iter_init(&iter, overrides);
while (g_hash_table_iter_next(&iter, (gpointer *) &name, (gpointer *) &value)) {
out->message(out, "override", rsc_name, name, value);
}
out->end_list(out);
}
out->message(out, "agent-status", status, action, rsc_name, class, provider,
type, rc, exit_reason);
/* hide output for validate-all if not in verbose */
if ((verbose == 0)
&& pcmk__str_eq(action, PCMK_ACTION_VALIDATE_ALL, pcmk__str_casei)) {
return pcmk_rc_ok;
}
if (stdout_data || stderr_data) {
xmlNodePtr doc = NULL;
if (stdout_data != NULL) {
doc = pcmk__xml_parse(stdout_data);
}
if (doc != NULL) {
out->output_xml(out, PCMK_XE_COMMAND, stdout_data);
pcmk__xml_free(doc);
} else {
out->subprocess_output(out, rc, stdout_data, stderr_data);
}
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-agent-action", "int", "const char *", "const char *",
"const char *", "const char *", "const char *", "GHashTable *",
"crm_exit_t", "int", "const char *", "const char *", "const char *")
static int
resource_agent_action_xml(pcmk__output_t *out, va_list args) {
int verbose G_GNUC_UNUSED = va_arg(args, int);
const char *class = va_arg(args, const char *);
const char *provider = va_arg(args, const char *);
const char *type = va_arg(args, const char *);
const char *rsc_name = va_arg(args, const char *);
const char *action = va_arg(args, const char *);
GHashTable *overrides = va_arg(args, GHashTable *);
crm_exit_t rc = va_arg(args, crm_exit_t);
int status = va_arg(args, int);
const char *exit_reason = va_arg(args, const char *);
const char *stdout_data = va_arg(args, const char *);
const char *stderr_data = va_arg(args, const char *);
xmlNodePtr node = NULL;
node = pcmk__output_xml_create_parent(out, PCMK_XE_RESOURCE_AGENT_ACTION,
PCMK_XA_ACTION, action,
PCMK_XA_CLASS, class,
PCMK_XA_TYPE, type,
NULL);
if (rsc_name) {
crm_xml_add(node, PCMK_XA_RSC, rsc_name);
}
crm_xml_add(node, PCMK_XA_PROVIDER, provider);
if (overrides) {
GHashTableIter iter;
const char *name = NULL;
const char *value = NULL;
out->begin_list(out, NULL, NULL, PCMK_XE_OVERRIDES);
g_hash_table_iter_init(&iter, overrides);
while (g_hash_table_iter_next(&iter, (gpointer *) &name, (gpointer *) &value)) {
out->message(out, "override", rsc_name, name, value);
}
out->end_list(out);
}
out->message(out, "agent-status", status, action, rsc_name, class, provider,
type, rc, exit_reason);
if (stdout_data || stderr_data) {
xmlNodePtr doc = NULL;
if (stdout_data != NULL) {
doc = pcmk__xml_parse(stdout_data);
}
if (doc != NULL) {
out->output_xml(out, PCMK_XE_COMMAND, stdout_data);
pcmk__xml_free(doc);
} else {
out->subprocess_output(out, rc, stdout_data, stderr_data);
}
}
pcmk__output_xml_pop_parent(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-check-list", "resource_checks_t *")
static int
resource_check_list_default(pcmk__output_t *out, va_list args) {
resource_checks_t *checks = va_arg(args, resource_checks_t *);
const pcmk_resource_t *parent = pe__const_top_resource(checks->rsc, false);
const pcmk_scheduler_t *scheduler = checks->rsc->priv->scheduler;
if (checks->flags == 0) {
return pcmk_rc_no_output;
}
out->begin_list(out, NULL, NULL, "Resource Checks");
if (pcmk_is_set(checks->flags, rsc_remain_stopped)) {
out->list_item(out, "check", "Configuration specifies '%s' should remain stopped",
parent->id);
}
if (pcmk_is_set(checks->flags, rsc_unpromotable)) {
out->list_item(out, "check", "Configuration specifies '%s' should not be promoted",
parent->id);
}
if (pcmk_is_set(checks->flags, rsc_unmanaged)) {
out->list_item(out, "check", "Configuration prevents cluster from stopping or starting unmanaged '%s'",
parent->id);
}
if (pcmk_is_set(checks->flags, rsc_locked)) {
out->list_item(out, "check", "'%s' is locked to node %s due to shutdown",
parent->id, checks->lock_node);
}
if (pcmk_is_set(checks->flags, rsc_node_health)) {
out->list_item(out, "check",
"'%s' cannot run on unhealthy nodes due to "
PCMK_OPT_NODE_HEALTH_STRATEGY "='%s'",
parent->id,
pcmk__cluster_option(scheduler->priv->options,
PCMK_OPT_NODE_HEALTH_STRATEGY));
}
out->end_list(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-check-list", "resource_checks_t *")
static int
resource_check_list_xml(pcmk__output_t *out, va_list args) {
resource_checks_t *checks = va_arg(args, resource_checks_t *);
const pcmk_resource_t *parent = pe__const_top_resource(checks->rsc, false);
xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_CHECK,
PCMK_XA_ID, parent->id,
NULL);
if (pcmk_is_set(checks->flags, rsc_remain_stopped)) {
pcmk__xe_set_bool_attr(node, PCMK_XA_REMAIN_STOPPED, true);
}
if (pcmk_is_set(checks->flags, rsc_unpromotable)) {
pcmk__xe_set_bool_attr(node, PCMK_XA_PROMOTABLE, false);
}
if (pcmk_is_set(checks->flags, rsc_unmanaged)) {
pcmk__xe_set_bool_attr(node, PCMK_XA_UNMANAGED, true);
}
if (pcmk_is_set(checks->flags, rsc_locked)) {
crm_xml_add(node, PCMK_XA_LOCKED_TO_HYPHEN, checks->lock_node);
}
if (pcmk_is_set(checks->flags, rsc_node_health)) {
pcmk__xe_set_bool_attr(node, PCMK_XA_UNHEALTHY, true);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-search-list", "GList *", "const gchar *")
static int
resource_search_list_default(pcmk__output_t *out, va_list args)
{
GList *nodes = va_arg(args, GList *);
const gchar *requested_name = va_arg(args, const gchar *);
bool printed = false;
int rc = pcmk_rc_no_output;
if (!out->is_quiet(out) && nodes == NULL) {
out->err(out, "resource %s is NOT running", requested_name);
return rc;
}
for (GList *lpc = nodes; lpc != NULL; lpc = lpc->next) {
node_info_t *ni = (node_info_t *) lpc->data;
if (!printed) {
out->begin_list(out, NULL, NULL, "Nodes");
printed = true;
rc = pcmk_rc_ok;
}
if (out->is_quiet(out)) {
out->list_item(out, "node", "%s", ni->node_name);
} else {
const char *role_text = "";
if (ni->promoted) {
role_text = " " PCMK_ROLE_PROMOTED;
}
out->list_item(out, "node", "resource %s is running on: %s%s",
requested_name, ni->node_name, role_text);
}
}
if (printed) {
out->end_list(out);
}
return rc;
}
PCMK__OUTPUT_ARGS("resource-search-list", "GList *", "const gchar *")
static int
resource_search_list_xml(pcmk__output_t *out, va_list args)
{
GList *nodes = va_arg(args, GList *);
const gchar *requested_name = va_arg(args, const gchar *);
pcmk__output_xml_create_parent(out, PCMK_XE_NODES,
PCMK_XA_RESOURCE, requested_name,
NULL);
for (GList *lpc = nodes; lpc != NULL; lpc = lpc->next) {
node_info_t *ni = (node_info_t *) lpc->data;
xmlNodePtr sub_node = pcmk__output_create_xml_text_node(out,
PCMK_XE_NODE,
ni->node_name);
if (ni->promoted) {
crm_xml_add(sub_node, PCMK_XA_STATE, "promoted");
}
}
pcmk__output_xml_pop_parent(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-reasons-list", "GList *", "pcmk_resource_t *",
"pcmk_node_t *")
static int
resource_reasons_list_default(pcmk__output_t *out, va_list args)
{
GList *resources = va_arg(args, GList *);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
pcmk_node_t *node = va_arg(args, pcmk_node_t *);
const char *host_uname = (node == NULL)? NULL : node->priv->name;
out->begin_list(out, NULL, NULL, "Resource Reasons");
if ((rsc == NULL) && (host_uname == NULL)) {
GList *lpc = NULL;
GList *hosts = NULL;
for (lpc = resources; lpc != NULL; lpc = lpc->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) lpc->data;
rsc->priv->fns->location(rsc, &hosts, pcmk__rsc_node_current);
if (hosts == NULL) {
out->list_item(out, "reason", "Resource %s is not running", rsc->id);
} else {
out->list_item(out, "reason", "Resource %s is running", rsc->id);
}
cli_resource_check(out, rsc, NULL);
g_list_free(hosts);
hosts = NULL;
}
} else if ((rsc != NULL) && (host_uname != NULL)) {
if (resource_is_running_on(rsc, host_uname)) {
out->list_item(out, "reason", "Resource %s is running on host %s",
rsc->id, host_uname);
} else {
out->list_item(out, "reason", "Resource %s is not running on host %s",
rsc->id, host_uname);
}
cli_resource_check(out, rsc, node);
} else if ((rsc == NULL) && (host_uname != NULL)) {
const char* host_uname = node->priv->name;
GList *allResources = node->priv->assigned_resources;
GList *activeResources = node->details->running_rsc;
GList *unactiveResources = pcmk__subtract_lists(allResources, activeResources, (GCompareFunc) strcmp);
GList *lpc = NULL;
for (lpc = activeResources; lpc != NULL; lpc = lpc->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) lpc->data;
out->list_item(out, "reason", "Resource %s is running on host %s",
rsc->id, host_uname);
cli_resource_check(out, rsc, node);
}
for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) lpc->data;
out->list_item(out, "reason", "Resource %s is assigned to host %s but not running",
rsc->id, host_uname);
cli_resource_check(out, rsc, node);
}
g_list_free(allResources);
g_list_free(activeResources);
g_list_free(unactiveResources);
} else if ((rsc != NULL) && (host_uname == NULL)) {
GList *hosts = NULL;
rsc->priv->fns->location(rsc, &hosts, pcmk__rsc_node_current);
out->list_item(out, "reason", "Resource %s is %srunning",
rsc->id, (hosts? "" : "not "));
cli_resource_check(out, rsc, NULL);
g_list_free(hosts);
}
out->end_list(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("resource-reasons-list", "GList *", "pcmk_resource_t *",
"pcmk_node_t *")
static int
resource_reasons_list_xml(pcmk__output_t *out, va_list args)
{
GList *resources = va_arg(args, GList *);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
pcmk_node_t *node = va_arg(args, pcmk_node_t *);
const char *host_uname = (node == NULL)? NULL : node->priv->name;
xmlNodePtr xml_node = pcmk__output_xml_create_parent(out, PCMK_XE_REASON,
NULL);
if ((rsc == NULL) && (host_uname == NULL)) {
GList *lpc = NULL;
GList *hosts = NULL;
pcmk__output_xml_create_parent(out, PCMK_XE_RESOURCES, NULL);
for (lpc = resources; lpc != NULL; lpc = lpc->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) lpc->data;
const char *running = NULL;
rsc->priv->fns->location(rsc, &hosts, pcmk__rsc_node_current);
running = pcmk__btoa(hosts != NULL);
pcmk__output_xml_create_parent(out, PCMK_XE_RESOURCE,
PCMK_XA_ID, rsc->id,
PCMK_XA_RUNNING, running,
NULL);
cli_resource_check(out, rsc, NULL);
pcmk__output_xml_pop_parent(out);
g_list_free(hosts);
hosts = NULL;
}
pcmk__output_xml_pop_parent(out);
} else if ((rsc != NULL) && (host_uname != NULL)) {
if (resource_is_running_on(rsc, host_uname)) {
crm_xml_add(xml_node, PCMK_XA_RUNNING_ON, host_uname);
}
cli_resource_check(out, rsc, node);
} else if ((rsc == NULL) && (host_uname != NULL)) {
const char* host_uname = node->priv->name;
GList *allResources = node->priv->assigned_resources;
GList *activeResources = node->details->running_rsc;
GList *unactiveResources = pcmk__subtract_lists(allResources, activeResources, (GCompareFunc) strcmp);
GList *lpc = NULL;
pcmk__output_xml_create_parent(out, PCMK_XE_RESOURCES, NULL);
for (lpc = activeResources; lpc != NULL; lpc = lpc->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) lpc->data;
pcmk__output_xml_create_parent(out, PCMK_XE_RESOURCE,
PCMK_XA_ID, rsc->id,
PCMK_XA_RUNNING, PCMK_VALUE_TRUE,
PCMK_XA_HOST, host_uname,
NULL);
cli_resource_check(out, rsc, node);
pcmk__output_xml_pop_parent(out);
}
for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) lpc->data;
pcmk__output_xml_create_parent(out, PCMK_XE_RESOURCE,
PCMK_XA_ID, rsc->id,
PCMK_XA_RUNNING, PCMK_VALUE_FALSE,
PCMK_XA_HOST, host_uname,
NULL);
cli_resource_check(out, rsc, node);
pcmk__output_xml_pop_parent(out);
}
pcmk__output_xml_pop_parent(out);
g_list_free(allResources);
g_list_free(activeResources);
g_list_free(unactiveResources);
} else if ((rsc != NULL) && (host_uname == NULL)) {
GList *hosts = NULL;
rsc->priv->fns->location(rsc, &hosts, pcmk__rsc_node_current);
crm_xml_add(xml_node, PCMK_XA_RUNNING, pcmk__btoa(hosts != NULL));
cli_resource_check(out, rsc, NULL);
g_list_free(hosts);
}
pcmk__output_xml_pop_parent(out);
return pcmk_rc_ok;
}
static void
add_resource_name(pcmk_resource_t *rsc, pcmk__output_t *out)
{
if (rsc->priv->children == NULL) {
/* Sometimes PCMK_XE_RESOURCE might act as a PCMK_XA_NAME instead of an
* XML element name, depending on whether pcmk__output_enable_list_element
* was called.
*/
out->list_item(out, PCMK_XE_RESOURCE, "%s", rsc->id);
} else {
g_list_foreach(rsc->priv->children, (GFunc) add_resource_name, out);
}
}
PCMK__OUTPUT_ARGS("resource-names-list", "GList *")
static int
resource_names(pcmk__output_t *out, va_list args) {
GList *resources = va_arg(args, GList *);
if (resources == NULL) {
out->err(out, "NO resources configured\n");
return pcmk_rc_no_output;
}
out->begin_list(out, NULL, NULL, "Resource Names");
g_list_foreach(resources, (GFunc) add_resource_name, out);
out->end_list(out);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "agent-status", "default", agent_status_default },
{ "agent-status", "xml", agent_status_xml },
{ "attribute-changed", "default", attribute_changed_default },
{ "attribute-changed", "xml", attribute_changed_xml },
{ "attribute-changed-list", "default", attribute_changed_list_default },
{ "attribute-changed-list", "xml", attribute_changed_list_xml },
{ "attribute-list", "default", attribute_list_default },
{ "attribute-list", "text", attribute_list_text },
{ "override", "default", override_default },
{ "override", "xml", override_xml },
{ "property-list", "default", property_list_default },
{ "property-list", "text", property_list_text },
{ "resource-agent-action", "default", resource_agent_action_default },
{ "resource-agent-action", "xml", resource_agent_action_xml },
{ "resource-check-list", "default", resource_check_list_default },
{ "resource-check-list", "xml", resource_check_list_xml },
{ "resource-search-list", "default", resource_search_list_default },
{ "resource-search-list", "xml", resource_search_list_xml },
{ "resource-reasons-list", "default", resource_reasons_list_default },
{ "resource-reasons-list", "xml", resource_reasons_list_xml },
{ "resource-names-list", "default", resource_names },
{ NULL, NULL, NULL }
};
void
crm_resource_register_messages(pcmk__output_t *out) {
pcmk__register_messages(out, fmt_functions);
}